1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import binascii
7import importlib.util
8import io
9import itertools
10import os
11import posixpath
12import shutil
13import stat
14import struct
15import sys
16import threading
17import time
18import contextlib
19
20try:
21    import zlib # We may need its compression method
22    crc32 = zlib.crc32
23except ImportError:
24    zlib = None
25    crc32 = binascii.crc32
26
27try:
28    import bz2 # We may need its compression method
29except ImportError:
30    bz2 = None
31
32try:
33    import lzma # We may need its compression method
34except ImportError:
35    lzma = None
36
37__all__ = ["BadZipFile", "BadZipfile", "error",
38           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
39           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
40           "Path"]
41
42class BadZipFile(Exception):
43    pass
44
45
46class LargeZipFile(Exception):
47    """
48    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
49    and those extensions are disabled.
50    """
51
52error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
53
54
55ZIP64_LIMIT = (1 << 31) - 1
56ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
57ZIP_MAX_COMMENT = (1 << 16) - 1
58
59# constants for Zip file compression methods
60ZIP_STORED = 0
61ZIP_DEFLATED = 8
62ZIP_BZIP2 = 12
63ZIP_LZMA = 14
64# Other ZIP compression methods not supported
65
66DEFAULT_VERSION = 20
67ZIP64_VERSION = 45
68BZIP2_VERSION = 46
69LZMA_VERSION = 63
70# we recognize (but not necessarily support) all features up to that version
71MAX_EXTRACT_VERSION = 63
72
73# Below are some formats and associated data for reading/writing headers using
74# the struct module.  The names and structures of headers/records are those used
75# in the PKWARE description of the ZIP file format:
76#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
77# (URL valid as of January 2008)
78
79# The "end of central directory" structure, magic number, size, and indices
80# (section V.I in the format document)
81structEndArchive = b"<4s4H2LH"
82stringEndArchive = b"PK\005\006"
83sizeEndCentDir = struct.calcsize(structEndArchive)
84
85_ECD_SIGNATURE = 0
86_ECD_DISK_NUMBER = 1
87_ECD_DISK_START = 2
88_ECD_ENTRIES_THIS_DISK = 3
89_ECD_ENTRIES_TOTAL = 4
90_ECD_SIZE = 5
91_ECD_OFFSET = 6
92_ECD_COMMENT_SIZE = 7
93# These last two indices are not part of the structure as defined in the
94# spec, but they are used internally by this module as a convenience
95_ECD_COMMENT = 8
96_ECD_LOCATION = 9
97
98# The "central directory" structure, magic number, size, and indices
99# of entries in the structure (section V.F in the format document)
100structCentralDir = "<4s4B4HL2L5H2L"
101stringCentralDir = b"PK\001\002"
102sizeCentralDir = struct.calcsize(structCentralDir)
103
104# indexes of entries in the central directory structure
105_CD_SIGNATURE = 0
106_CD_CREATE_VERSION = 1
107_CD_CREATE_SYSTEM = 2
108_CD_EXTRACT_VERSION = 3
109_CD_EXTRACT_SYSTEM = 4
110_CD_FLAG_BITS = 5
111_CD_COMPRESS_TYPE = 6
112_CD_TIME = 7
113_CD_DATE = 8
114_CD_CRC = 9
115_CD_COMPRESSED_SIZE = 10
116_CD_UNCOMPRESSED_SIZE = 11
117_CD_FILENAME_LENGTH = 12
118_CD_EXTRA_FIELD_LENGTH = 13
119_CD_COMMENT_LENGTH = 14
120_CD_DISK_NUMBER_START = 15
121_CD_INTERNAL_FILE_ATTRIBUTES = 16
122_CD_EXTERNAL_FILE_ATTRIBUTES = 17
123_CD_LOCAL_HEADER_OFFSET = 18
124
125# The "local file header" structure, magic number, size, and indices
126# (section V.A in the format document)
127structFileHeader = "<4s2B4HL2L2H"
128stringFileHeader = b"PK\003\004"
129sizeFileHeader = struct.calcsize(structFileHeader)
130
131_FH_SIGNATURE = 0
132_FH_EXTRACT_VERSION = 1
133_FH_EXTRACT_SYSTEM = 2
134_FH_GENERAL_PURPOSE_FLAG_BITS = 3
135_FH_COMPRESSION_METHOD = 4
136_FH_LAST_MOD_TIME = 5
137_FH_LAST_MOD_DATE = 6
138_FH_CRC = 7
139_FH_COMPRESSED_SIZE = 8
140_FH_UNCOMPRESSED_SIZE = 9
141_FH_FILENAME_LENGTH = 10
142_FH_EXTRA_FIELD_LENGTH = 11
143
144# The "Zip64 end of central directory locator" structure, magic number, and size
145structEndArchive64Locator = "<4sLQL"
146stringEndArchive64Locator = b"PK\x06\x07"
147sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
148
149# The "Zip64 end of central directory" record, magic number, size, and indices
150# (section V.G in the format document)
151structEndArchive64 = "<4sQ2H2L4Q"
152stringEndArchive64 = b"PK\x06\x06"
153sizeEndCentDir64 = struct.calcsize(structEndArchive64)
154
155_CD64_SIGNATURE = 0
156_CD64_DIRECTORY_RECSIZE = 1
157_CD64_CREATE_VERSION = 2
158_CD64_EXTRACT_VERSION = 3
159_CD64_DISK_NUMBER = 4
160_CD64_DISK_NUMBER_START = 5
161_CD64_NUMBER_ENTRIES_THIS_DISK = 6
162_CD64_NUMBER_ENTRIES_TOTAL = 7
163_CD64_DIRECTORY_SIZE = 8
164_CD64_OFFSET_START_CENTDIR = 9
165
166_DD_SIGNATURE = 0x08074b50
167
168_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
169
170def _strip_extra(extra, xids):
171    # Remove Extra Fields with specified IDs.
172    unpack = _EXTRA_FIELD_STRUCT.unpack
173    modified = False
174    buffer = []
175    start = i = 0
176    while i + 4 <= len(extra):
177        xid, xlen = unpack(extra[i : i + 4])
178        j = i + 4 + xlen
179        if xid in xids:
180            if i != start:
181                buffer.append(extra[start : i])
182            start = j
183            modified = True
184        i = j
185    if not modified:
186        return extra
187    return b''.join(buffer)
188
189def _check_zipfile(fp):
190    try:
191        if _EndRecData(fp):
192            return True         # file has correct magic number
193    except OSError:
194        pass
195    return False
196
197def is_zipfile(filename):
198    """Quickly see if a file is a ZIP file by checking the magic number.
199
200    The filename argument may be a file or file-like object too.
201    """
202    result = False
203    try:
204        if hasattr(filename, "read"):
205            result = _check_zipfile(fp=filename)
206        else:
207            with open(filename, "rb") as fp:
208                result = _check_zipfile(fp)
209    except OSError:
210        pass
211    return result
212
213def _EndRecData64(fpin, offset, endrec):
214    """
215    Read the ZIP64 end-of-archive records and use that to update endrec
216    """
217    try:
218        fpin.seek(offset - sizeEndCentDir64Locator, 2)
219    except OSError:
220        # If the seek fails, the file is not large enough to contain a ZIP64
221        # end-of-archive record, so just return the end record we were given.
222        return endrec
223
224    data = fpin.read(sizeEndCentDir64Locator)
225    if len(data) != sizeEndCentDir64Locator:
226        return endrec
227    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
228    if sig != stringEndArchive64Locator:
229        return endrec
230
231    if diskno != 0 or disks > 1:
232        raise BadZipFile("zipfiles that span multiple disks are not supported")
233
234    # Assume no 'zip64 extensible data'
235    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
236    data = fpin.read(sizeEndCentDir64)
237    if len(data) != sizeEndCentDir64:
238        return endrec
239    sig, sz, create_version, read_version, disk_num, disk_dir, \
240        dircount, dircount2, dirsize, diroffset = \
241        struct.unpack(structEndArchive64, data)
242    if sig != stringEndArchive64:
243        return endrec
244
245    # Update the original endrec using data from the ZIP64 record
246    endrec[_ECD_SIGNATURE] = sig
247    endrec[_ECD_DISK_NUMBER] = disk_num
248    endrec[_ECD_DISK_START] = disk_dir
249    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
250    endrec[_ECD_ENTRIES_TOTAL] = dircount2
251    endrec[_ECD_SIZE] = dirsize
252    endrec[_ECD_OFFSET] = diroffset
253    return endrec
254
255
256def _EndRecData(fpin):
257    """Return data from the "End of Central Directory" record, or None.
258
259    The data is a list of the nine items in the ZIP "End of central dir"
260    record followed by a tenth item, the file seek offset of this record."""
261
262    # Determine file size
263    fpin.seek(0, 2)
264    filesize = fpin.tell()
265
266    # Check to see if this is ZIP file with no archive comment (the
267    # "end of central directory" structure should be the last item in the
268    # file if this is the case).
269    try:
270        fpin.seek(-sizeEndCentDir, 2)
271    except OSError:
272        return None
273    data = fpin.read()
274    if (len(data) == sizeEndCentDir and
275        data[0:4] == stringEndArchive and
276        data[-2:] == b"\000\000"):
277        # the signature is correct and there's no comment, unpack structure
278        endrec = struct.unpack(structEndArchive, data)
279        endrec=list(endrec)
280
281        # Append a blank comment and record start offset
282        endrec.append(b"")
283        endrec.append(filesize - sizeEndCentDir)
284
285        # Try to read the "Zip64 end of central directory" structure
286        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
287
288    # Either this is not a ZIP file, or it is a ZIP file with an archive
289    # comment.  Search the end of the file for the "end of central directory"
290    # record signature. The comment is the last item in the ZIP file and may be
291    # up to 64K long.  It is assumed that the "end of central directory" magic
292    # number does not appear in the comment.
293    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
294    fpin.seek(maxCommentStart, 0)
295    data = fpin.read()
296    start = data.rfind(stringEndArchive)
297    if start >= 0:
298        # found the magic number; attempt to unpack and interpret
299        recData = data[start:start+sizeEndCentDir]
300        if len(recData) != sizeEndCentDir:
301            # Zip file is corrupted.
302            return None
303        endrec = list(struct.unpack(structEndArchive, recData))
304        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
305        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
306        endrec.append(comment)
307        endrec.append(maxCommentStart + start)
308
309        # Try to read the "Zip64 end of central directory" structure
310        return _EndRecData64(fpin, maxCommentStart + start - filesize,
311                             endrec)
312
313    # Unable to find a valid end of central directory structure
314    return None
315
316
317class ZipInfo (object):
318    """Class with attributes describing each file in the ZIP archive."""
319
320    __slots__ = (
321        'orig_filename',
322        'filename',
323        'date_time',
324        'compress_type',
325        '_compresslevel',
326        'comment',
327        'extra',
328        'create_system',
329        'create_version',
330        'extract_version',
331        'reserved',
332        'flag_bits',
333        'volume',
334        'internal_attr',
335        'external_attr',
336        'header_offset',
337        'CRC',
338        'compress_size',
339        'file_size',
340        '_raw_time',
341    )
342
343    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
344        self.orig_filename = filename   # Original file name in archive
345
346        # Terminate the file name at the first null byte.  Null bytes in file
347        # names are used as tricks by viruses in archives.
348        null_byte = filename.find(chr(0))
349        if null_byte >= 0:
350            filename = filename[0:null_byte]
351        # This is used to ensure paths in generated ZIP files always use
352        # forward slashes as the directory separator, as required by the
353        # ZIP format specification.
354        if os.sep != "/" and os.sep in filename:
355            filename = filename.replace(os.sep, "/")
356
357        self.filename = filename        # Normalized file name
358        self.date_time = date_time      # year, month, day, hour, min, sec
359
360        if date_time[0] < 1980:
361            raise ValueError('ZIP does not support timestamps before 1980')
362
363        # Standard values:
364        self.compress_type = ZIP_STORED # Type of compression for the file
365        self._compresslevel = None      # Level for the compressor
366        self.comment = b""              # Comment for each file
367        self.extra = b""                # ZIP extra data
368        if sys.platform == 'win32':
369            self.create_system = 0          # System which created ZIP archive
370        else:
371            # Assume everything else is unix-y
372            self.create_system = 3          # System which created ZIP archive
373        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
374        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
375        self.reserved = 0               # Must be zero
376        self.flag_bits = 0              # ZIP flag bits
377        self.volume = 0                 # Volume number of file header
378        self.internal_attr = 0          # Internal attributes
379        self.external_attr = 0          # External file attributes
380        self.compress_size = 0          # Size of the compressed file
381        self.file_size = 0              # Size of the uncompressed file
382        # Other attributes are set by class ZipFile:
383        # header_offset         Byte offset to the file header
384        # CRC                   CRC-32 of the uncompressed file
385
386    def __repr__(self):
387        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
388        if self.compress_type != ZIP_STORED:
389            result.append(' compress_type=%s' %
390                          compressor_names.get(self.compress_type,
391                                               self.compress_type))
392        hi = self.external_attr >> 16
393        lo = self.external_attr & 0xFFFF
394        if hi:
395            result.append(' filemode=%r' % stat.filemode(hi))
396        if lo:
397            result.append(' external_attr=%#x' % lo)
398        isdir = self.is_dir()
399        if not isdir or self.file_size:
400            result.append(' file_size=%r' % self.file_size)
401        if ((not isdir or self.compress_size) and
402            (self.compress_type != ZIP_STORED or
403             self.file_size != self.compress_size)):
404            result.append(' compress_size=%r' % self.compress_size)
405        result.append('>')
406        return ''.join(result)
407
408    def FileHeader(self, zip64=None):
409        """Return the per-file header as a bytes object."""
410        dt = self.date_time
411        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
412        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
413        if self.flag_bits & 0x08:
414            # Set these to zero because we write them after the file data
415            CRC = compress_size = file_size = 0
416        else:
417            CRC = self.CRC
418            compress_size = self.compress_size
419            file_size = self.file_size
420
421        extra = self.extra
422
423        min_version = 0
424        if zip64 is None:
425            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
426        if zip64:
427            fmt = '<HHQQ'
428            extra = extra + struct.pack(fmt,
429                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
430        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
431            if not zip64:
432                raise LargeZipFile("Filesize would require ZIP64 extensions")
433            # File is larger than what fits into a 4 byte integer,
434            # fall back to the ZIP64 extension
435            file_size = 0xffffffff
436            compress_size = 0xffffffff
437            min_version = ZIP64_VERSION
438
439        if self.compress_type == ZIP_BZIP2:
440            min_version = max(BZIP2_VERSION, min_version)
441        elif self.compress_type == ZIP_LZMA:
442            min_version = max(LZMA_VERSION, min_version)
443
444        self.extract_version = max(min_version, self.extract_version)
445        self.create_version = max(min_version, self.create_version)
446        filename, flag_bits = self._encodeFilenameFlags()
447        header = struct.pack(structFileHeader, stringFileHeader,
448                             self.extract_version, self.reserved, flag_bits,
449                             self.compress_type, dostime, dosdate, CRC,
450                             compress_size, file_size,
451                             len(filename), len(extra))
452        return header + filename + extra
453
454    def _encodeFilenameFlags(self):
455        try:
456            return self.filename.encode('ascii'), self.flag_bits
457        except UnicodeEncodeError:
458            return self.filename.encode('utf-8'), self.flag_bits | 0x800
459
460    def _decodeExtra(self):
461        # Try to decode the extra field.
462        extra = self.extra
463        unpack = struct.unpack
464        while len(extra) >= 4:
465            tp, ln = unpack('<HH', extra[:4])
466            if ln+4 > len(extra):
467                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
468            if tp == 0x0001:
469                data = extra[4:ln+4]
470                # ZIP64 extension (large files and/or large archives)
471                try:
472                    if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
473                        field = "File size"
474                        self.file_size, = unpack('<Q', data[:8])
475                        data = data[8:]
476                    if self.compress_size == 0xFFFF_FFFF:
477                        field = "Compress size"
478                        self.compress_size, = unpack('<Q', data[:8])
479                        data = data[8:]
480                    if self.header_offset == 0xFFFF_FFFF:
481                        field = "Header offset"
482                        self.header_offset, = unpack('<Q', data[:8])
483                except struct.error:
484                    raise BadZipFile(f"Corrupt zip64 extra field. "
485                                     f"{field} not found.") from None
486
487            extra = extra[ln+4:]
488
489    @classmethod
490    def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
491        """Construct an appropriate ZipInfo for a file on the filesystem.
492
493        filename should be the path to a file or directory on the filesystem.
494
495        arcname is the name which it will have within the archive (by default,
496        this will be the same as filename, but without a drive letter and with
497        leading path separators removed).
498        """
499        if isinstance(filename, os.PathLike):
500            filename = os.fspath(filename)
501        st = os.stat(filename)
502        isdir = stat.S_ISDIR(st.st_mode)
503        mtime = time.localtime(st.st_mtime)
504        date_time = mtime[0:6]
505        if not strict_timestamps and date_time[0] < 1980:
506            date_time = (1980, 1, 1, 0, 0, 0)
507        elif not strict_timestamps and date_time[0] > 2107:
508            date_time = (2107, 12, 31, 23, 59, 59)
509        # Create ZipInfo instance to store file information
510        if arcname is None:
511            arcname = filename
512        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
513        while arcname[0] in (os.sep, os.altsep):
514            arcname = arcname[1:]
515        if isdir:
516            arcname += '/'
517        zinfo = cls(arcname, date_time)
518        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
519        if isdir:
520            zinfo.file_size = 0
521            zinfo.external_attr |= 0x10  # MS-DOS directory flag
522        else:
523            zinfo.file_size = st.st_size
524
525        return zinfo
526
527    def is_dir(self):
528        """Return True if this archive member is a directory."""
529        return self.filename[-1] == '/'
530
531
532# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
533# internal keys. We noticed that a direct implementation is faster than
534# relying on binascii.crc32().
535
536_crctable = None
537def _gen_crc(crc):
538    for j in range(8):
539        if crc & 1:
540            crc = (crc >> 1) ^ 0xEDB88320
541        else:
542            crc >>= 1
543    return crc
544
545# ZIP supports a password-based form of encryption. Even though known
546# plaintext attacks have been found against it, it is still useful
547# to be able to get data out of such a file.
548#
549# Usage:
550#     zd = _ZipDecrypter(mypwd)
551#     plain_bytes = zd(cypher_bytes)
552
553def _ZipDecrypter(pwd):
554    key0 = 305419896
555    key1 = 591751049
556    key2 = 878082192
557
558    global _crctable
559    if _crctable is None:
560        _crctable = list(map(_gen_crc, range(256)))
561    crctable = _crctable
562
563    def crc32(ch, crc):
564        """Compute the CRC32 primitive on one byte."""
565        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
566
567    def update_keys(c):
568        nonlocal key0, key1, key2
569        key0 = crc32(c, key0)
570        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
571        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
572        key2 = crc32(key1 >> 24, key2)
573
574    for p in pwd:
575        update_keys(p)
576
577    def decrypter(data):
578        """Decrypt a bytes object."""
579        result = bytearray()
580        append = result.append
581        for c in data:
582            k = key2 | 2
583            c ^= ((k * (k^1)) >> 8) & 0xFF
584            update_keys(c)
585            append(c)
586        return bytes(result)
587
588    return decrypter
589
590
591class LZMACompressor:
592
593    def __init__(self):
594        self._comp = None
595
596    def _init(self):
597        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
598        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
599            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
600        ])
601        return struct.pack('<BBH', 9, 4, len(props)) + props
602
603    def compress(self, data):
604        if self._comp is None:
605            return self._init() + self._comp.compress(data)
606        return self._comp.compress(data)
607
608    def flush(self):
609        if self._comp is None:
610            return self._init() + self._comp.flush()
611        return self._comp.flush()
612
613
614class LZMADecompressor:
615
616    def __init__(self):
617        self._decomp = None
618        self._unconsumed = b''
619        self.eof = False
620
621    def decompress(self, data):
622        if self._decomp is None:
623            self._unconsumed += data
624            if len(self._unconsumed) <= 4:
625                return b''
626            psize, = struct.unpack('<H', self._unconsumed[2:4])
627            if len(self._unconsumed) <= 4 + psize:
628                return b''
629
630            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
631                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
632                                               self._unconsumed[4:4 + psize])
633            ])
634            data = self._unconsumed[4 + psize:]
635            del self._unconsumed
636
637        result = self._decomp.decompress(data)
638        self.eof = self._decomp.eof
639        return result
640
641
642compressor_names = {
643    0: 'store',
644    1: 'shrink',
645    2: 'reduce',
646    3: 'reduce',
647    4: 'reduce',
648    5: 'reduce',
649    6: 'implode',
650    7: 'tokenize',
651    8: 'deflate',
652    9: 'deflate64',
653    10: 'implode',
654    12: 'bzip2',
655    14: 'lzma',
656    18: 'terse',
657    19: 'lz77',
658    97: 'wavpack',
659    98: 'ppmd',
660}
661
662def _check_compression(compression):
663    if compression == ZIP_STORED:
664        pass
665    elif compression == ZIP_DEFLATED:
666        if not zlib:
667            raise RuntimeError(
668                "Compression requires the (missing) zlib module")
669    elif compression == ZIP_BZIP2:
670        if not bz2:
671            raise RuntimeError(
672                "Compression requires the (missing) bz2 module")
673    elif compression == ZIP_LZMA:
674        if not lzma:
675            raise RuntimeError(
676                "Compression requires the (missing) lzma module")
677    else:
678        raise NotImplementedError("That compression method is not supported")
679
680
681def _get_compressor(compress_type, compresslevel=None):
682    if compress_type == ZIP_DEFLATED:
683        if compresslevel is not None:
684            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
685        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
686    elif compress_type == ZIP_BZIP2:
687        if compresslevel is not None:
688            return bz2.BZ2Compressor(compresslevel)
689        return bz2.BZ2Compressor()
690    # compresslevel is ignored for ZIP_LZMA
691    elif compress_type == ZIP_LZMA:
692        return LZMACompressor()
693    else:
694        return None
695
696
697def _get_decompressor(compress_type):
698    _check_compression(compress_type)
699    if compress_type == ZIP_STORED:
700        return None
701    elif compress_type == ZIP_DEFLATED:
702        return zlib.decompressobj(-15)
703    elif compress_type == ZIP_BZIP2:
704        return bz2.BZ2Decompressor()
705    elif compress_type == ZIP_LZMA:
706        return LZMADecompressor()
707    else:
708        descr = compressor_names.get(compress_type)
709        if descr:
710            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
711        else:
712            raise NotImplementedError("compression type %d" % (compress_type,))
713
714
715class _SharedFile:
716    def __init__(self, file, pos, close, lock, writing):
717        self._file = file
718        self._pos = pos
719        self._close = close
720        self._lock = lock
721        self._writing = writing
722        self.seekable = file.seekable
723        self.tell = file.tell
724
725    def seek(self, offset, whence=0):
726        with self._lock:
727            if self._writing():
728                raise ValueError("Can't reposition in the ZIP file while "
729                        "there is an open writing handle on it. "
730                        "Close the writing handle before trying to read.")
731            self._file.seek(offset, whence)
732            self._pos = self._file.tell()
733            return self._pos
734
735    def read(self, n=-1):
736        with self._lock:
737            if self._writing():
738                raise ValueError("Can't read from the ZIP file while there "
739                        "is an open writing handle on it. "
740                        "Close the writing handle before trying to read.")
741            self._file.seek(self._pos)
742            data = self._file.read(n)
743            self._pos = self._file.tell()
744            return data
745
746    def close(self):
747        if self._file is not None:
748            fileobj = self._file
749            self._file = None
750            self._close(fileobj)
751
752# Provide the tell method for unseekable stream
753class _Tellable:
754    def __init__(self, fp):
755        self.fp = fp
756        self.offset = 0
757
758    def write(self, data):
759        n = self.fp.write(data)
760        self.offset += n
761        return n
762
763    def tell(self):
764        return self.offset
765
766    def flush(self):
767        self.fp.flush()
768
769    def close(self):
770        self.fp.close()
771
772
773class ZipExtFile(io.BufferedIOBase):
774    """File-like object for reading an archive member.
775       Is returned by ZipFile.open().
776    """
777
778    # Max size supported by decompressor.
779    MAX_N = 1 << 31 - 1
780
781    # Read from compressed files in 4k blocks.
782    MIN_READ_SIZE = 4096
783
784    # Chunk size to read during seek
785    MAX_SEEK_READ = 1 << 24
786
787    def __init__(self, fileobj, mode, zipinfo, pwd=None,
788                 close_fileobj=False):
789        self._fileobj = fileobj
790        self._pwd = pwd
791        self._close_fileobj = close_fileobj
792
793        self._compress_type = zipinfo.compress_type
794        self._compress_left = zipinfo.compress_size
795        self._left = zipinfo.file_size
796
797        self._decompressor = _get_decompressor(self._compress_type)
798
799        self._eof = False
800        self._readbuffer = b''
801        self._offset = 0
802
803        self.newlines = None
804
805        self.mode = mode
806        self.name = zipinfo.filename
807
808        if hasattr(zipinfo, 'CRC'):
809            self._expected_crc = zipinfo.CRC
810            self._running_crc = crc32(b'')
811        else:
812            self._expected_crc = None
813
814        self._seekable = False
815        try:
816            if fileobj.seekable():
817                self._orig_compress_start = fileobj.tell()
818                self._orig_compress_size = zipinfo.compress_size
819                self._orig_file_size = zipinfo.file_size
820                self._orig_start_crc = self._running_crc
821                self._seekable = True
822        except AttributeError:
823            pass
824
825        self._decrypter = None
826        if pwd:
827            if zipinfo.flag_bits & 0x8:
828                # compare against the file type from extended local headers
829                check_byte = (zipinfo._raw_time >> 8) & 0xff
830            else:
831                # compare against the CRC otherwise
832                check_byte = (zipinfo.CRC >> 24) & 0xff
833            h = self._init_decrypter()
834            if h != check_byte:
835                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
836
837
838    def _init_decrypter(self):
839        self._decrypter = _ZipDecrypter(self._pwd)
840        # The first 12 bytes in the cypher stream is an encryption header
841        #  used to strengthen the algorithm. The first 11 bytes are
842        #  completely random, while the 12th contains the MSB of the CRC,
843        #  or the MSB of the file time depending on the header type
844        #  and is used to check the correctness of the password.
845        header = self._fileobj.read(12)
846        self._compress_left -= 12
847        return self._decrypter(header)[11]
848
849    def __repr__(self):
850        result = ['<%s.%s' % (self.__class__.__module__,
851                              self.__class__.__qualname__)]
852        if not self.closed:
853            result.append(' name=%r mode=%r' % (self.name, self.mode))
854            if self._compress_type != ZIP_STORED:
855                result.append(' compress_type=%s' %
856                              compressor_names.get(self._compress_type,
857                                                   self._compress_type))
858        else:
859            result.append(' [closed]')
860        result.append('>')
861        return ''.join(result)
862
863    def readline(self, limit=-1):
864        """Read and return a line from the stream.
865
866        If limit is specified, at most limit bytes will be read.
867        """
868
869        if limit < 0:
870            # Shortcut common case - newline found in buffer.
871            i = self._readbuffer.find(b'\n', self._offset) + 1
872            if i > 0:
873                line = self._readbuffer[self._offset: i]
874                self._offset = i
875                return line
876
877        return io.BufferedIOBase.readline(self, limit)
878
879    def peek(self, n=1):
880        """Returns buffered bytes without advancing the position."""
881        if n > len(self._readbuffer) - self._offset:
882            chunk = self.read(n)
883            if len(chunk) > self._offset:
884                self._readbuffer = chunk + self._readbuffer[self._offset:]
885                self._offset = 0
886            else:
887                self._offset -= len(chunk)
888
889        # Return up to 512 bytes to reduce allocation overhead for tight loops.
890        return self._readbuffer[self._offset: self._offset + 512]
891
892    def readable(self):
893        if self.closed:
894            raise ValueError("I/O operation on closed file.")
895        return True
896
897    def read(self, n=-1):
898        """Read and return up to n bytes.
899        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
900        """
901        if self.closed:
902            raise ValueError("read from closed file.")
903        if n is None or n < 0:
904            buf = self._readbuffer[self._offset:]
905            self._readbuffer = b''
906            self._offset = 0
907            while not self._eof:
908                buf += self._read1(self.MAX_N)
909            return buf
910
911        end = n + self._offset
912        if end < len(self._readbuffer):
913            buf = self._readbuffer[self._offset:end]
914            self._offset = end
915            return buf
916
917        n = end - len(self._readbuffer)
918        buf = self._readbuffer[self._offset:]
919        self._readbuffer = b''
920        self._offset = 0
921        while n > 0 and not self._eof:
922            data = self._read1(n)
923            if n < len(data):
924                self._readbuffer = data
925                self._offset = n
926                buf += data[:n]
927                break
928            buf += data
929            n -= len(data)
930        return buf
931
932    def _update_crc(self, newdata):
933        # Update the CRC using the given data.
934        if self._expected_crc is None:
935            # No need to compute the CRC if we don't have a reference value
936            return
937        self._running_crc = crc32(newdata, self._running_crc)
938        # Check the CRC if we're at the end of the file
939        if self._eof and self._running_crc != self._expected_crc:
940            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
941
942    def read1(self, n):
943        """Read up to n bytes with at most one read() system call."""
944
945        if n is None or n < 0:
946            buf = self._readbuffer[self._offset:]
947            self._readbuffer = b''
948            self._offset = 0
949            while not self._eof:
950                data = self._read1(self.MAX_N)
951                if data:
952                    buf += data
953                    break
954            return buf
955
956        end = n + self._offset
957        if end < len(self._readbuffer):
958            buf = self._readbuffer[self._offset:end]
959            self._offset = end
960            return buf
961
962        n = end - len(self._readbuffer)
963        buf = self._readbuffer[self._offset:]
964        self._readbuffer = b''
965        self._offset = 0
966        if n > 0:
967            while not self._eof:
968                data = self._read1(n)
969                if n < len(data):
970                    self._readbuffer = data
971                    self._offset = n
972                    buf += data[:n]
973                    break
974                if data:
975                    buf += data
976                    break
977        return buf
978
979    def _read1(self, n):
980        # Read up to n compressed bytes with at most one read() system call,
981        # decrypt and decompress them.
982        if self._eof or n <= 0:
983            return b''
984
985        # Read from file.
986        if self._compress_type == ZIP_DEFLATED:
987            ## Handle unconsumed data.
988            data = self._decompressor.unconsumed_tail
989            if n > len(data):
990                data += self._read2(n - len(data))
991        else:
992            data = self._read2(n)
993
994        if self._compress_type == ZIP_STORED:
995            self._eof = self._compress_left <= 0
996        elif self._compress_type == ZIP_DEFLATED:
997            n = max(n, self.MIN_READ_SIZE)
998            data = self._decompressor.decompress(data, n)
999            self._eof = (self._decompressor.eof or
1000                         self._compress_left <= 0 and
1001                         not self._decompressor.unconsumed_tail)
1002            if self._eof:
1003                data += self._decompressor.flush()
1004        else:
1005            data = self._decompressor.decompress(data)
1006            self._eof = self._decompressor.eof or self._compress_left <= 0
1007
1008        data = data[:self._left]
1009        self._left -= len(data)
1010        if self._left <= 0:
1011            self._eof = True
1012        self._update_crc(data)
1013        return data
1014
1015    def _read2(self, n):
1016        if self._compress_left <= 0:
1017            return b''
1018
1019        n = max(n, self.MIN_READ_SIZE)
1020        n = min(n, self._compress_left)
1021
1022        data = self._fileobj.read(n)
1023        self._compress_left -= len(data)
1024        if not data:
1025            raise EOFError
1026
1027        if self._decrypter is not None:
1028            data = self._decrypter(data)
1029        return data
1030
1031    def close(self):
1032        try:
1033            if self._close_fileobj:
1034                self._fileobj.close()
1035        finally:
1036            super().close()
1037
1038    def seekable(self):
1039        if self.closed:
1040            raise ValueError("I/O operation on closed file.")
1041        return self._seekable
1042
1043    def seek(self, offset, whence=0):
1044        if self.closed:
1045            raise ValueError("seek on closed file.")
1046        if not self._seekable:
1047            raise io.UnsupportedOperation("underlying stream is not seekable")
1048        curr_pos = self.tell()
1049        if whence == 0: # Seek from start of file
1050            new_pos = offset
1051        elif whence == 1: # Seek from current position
1052            new_pos = curr_pos + offset
1053        elif whence == 2: # Seek from EOF
1054            new_pos = self._orig_file_size + offset
1055        else:
1056            raise ValueError("whence must be os.SEEK_SET (0), "
1057                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1058
1059        if new_pos > self._orig_file_size:
1060            new_pos = self._orig_file_size
1061
1062        if new_pos < 0:
1063            new_pos = 0
1064
1065        read_offset = new_pos - curr_pos
1066        buff_offset = read_offset + self._offset
1067
1068        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1069            # Just move the _offset index if the new position is in the _readbuffer
1070            self._offset = buff_offset
1071            read_offset = 0
1072        elif read_offset < 0:
1073            # Position is before the current position. Reset the ZipExtFile
1074            self._fileobj.seek(self._orig_compress_start)
1075            self._running_crc = self._orig_start_crc
1076            self._compress_left = self._orig_compress_size
1077            self._left = self._orig_file_size
1078            self._readbuffer = b''
1079            self._offset = 0
1080            self._decompressor = _get_decompressor(self._compress_type)
1081            self._eof = False
1082            read_offset = new_pos
1083            if self._decrypter is not None:
1084                self._init_decrypter()
1085
1086        while read_offset > 0:
1087            read_len = min(self.MAX_SEEK_READ, read_offset)
1088            self.read(read_len)
1089            read_offset -= read_len
1090
1091        return self.tell()
1092
1093    def tell(self):
1094        if self.closed:
1095            raise ValueError("tell on closed file.")
1096        if not self._seekable:
1097            raise io.UnsupportedOperation("underlying stream is not seekable")
1098        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1099        return filepos
1100
1101
1102class _ZipWriteFile(io.BufferedIOBase):
1103    def __init__(self, zf, zinfo, zip64):
1104        self._zinfo = zinfo
1105        self._zip64 = zip64
1106        self._zipfile = zf
1107        self._compressor = _get_compressor(zinfo.compress_type,
1108                                           zinfo._compresslevel)
1109        self._file_size = 0
1110        self._compress_size = 0
1111        self._crc = 0
1112
1113    @property
1114    def _fileobj(self):
1115        return self._zipfile.fp
1116
1117    def writable(self):
1118        return True
1119
1120    def write(self, data):
1121        if self.closed:
1122            raise ValueError('I/O operation on closed file.')
1123        nbytes = len(data)
1124        self._file_size += nbytes
1125        self._crc = crc32(data, self._crc)
1126        if self._compressor:
1127            data = self._compressor.compress(data)
1128            self._compress_size += len(data)
1129        self._fileobj.write(data)
1130        return nbytes
1131
1132    def close(self):
1133        if self.closed:
1134            return
1135        try:
1136            super().close()
1137            # Flush any data from the compressor, and update header info
1138            if self._compressor:
1139                buf = self._compressor.flush()
1140                self._compress_size += len(buf)
1141                self._fileobj.write(buf)
1142                self._zinfo.compress_size = self._compress_size
1143            else:
1144                self._zinfo.compress_size = self._file_size
1145            self._zinfo.CRC = self._crc
1146            self._zinfo.file_size = self._file_size
1147
1148            # Write updated header info
1149            if self._zinfo.flag_bits & 0x08:
1150                # Write CRC and file sizes after the file data
1151                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1152                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1153                    self._zinfo.compress_size, self._zinfo.file_size))
1154                self._zipfile.start_dir = self._fileobj.tell()
1155            else:
1156                if not self._zip64:
1157                    if self._file_size > ZIP64_LIMIT:
1158                        raise RuntimeError(
1159                            'File size unexpectedly exceeded ZIP64 limit')
1160                    if self._compress_size > ZIP64_LIMIT:
1161                        raise RuntimeError(
1162                            'Compressed size unexpectedly exceeded ZIP64 limit')
1163                # Seek backwards and write file header (which will now include
1164                # correct CRC and file sizes)
1165
1166                # Preserve current position in file
1167                self._zipfile.start_dir = self._fileobj.tell()
1168                self._fileobj.seek(self._zinfo.header_offset)
1169                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1170                self._fileobj.seek(self._zipfile.start_dir)
1171
1172            # Successfully written: Add file to our caches
1173            self._zipfile.filelist.append(self._zinfo)
1174            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1175        finally:
1176            self._zipfile._writing = False
1177
1178
1179
1180class ZipFile:
1181    """ Class with methods to open, read, write, close, list zip files.
1182
1183    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1184                compresslevel=None)
1185
1186    file: Either the path to the file, or a file-like object.
1187          If it is a path, the file will be opened and closed by ZipFile.
1188    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1189          or append 'a'.
1190    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1191                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1192    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1193                needed, otherwise it will raise an exception when this would
1194                be necessary.
1195    compresslevel: None (default for the given compression type) or an integer
1196                   specifying the level to pass to the compressor.
1197                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1198                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1199                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1200
1201    """
1202
1203    fp = None                   # Set here since __del__ checks it
1204    _windows_illegal_name_trans_table = None
1205
1206    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1207                 compresslevel=None, *, strict_timestamps=True):
1208        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1209        or append 'a'."""
1210        if mode not in ('r', 'w', 'x', 'a'):
1211            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1212
1213        _check_compression(compression)
1214
1215        self._allowZip64 = allowZip64
1216        self._didModify = False
1217        self.debug = 0  # Level of printing: 0 through 3
1218        self.NameToInfo = {}    # Find file info given name
1219        self.filelist = []      # List of ZipInfo instances for archive
1220        self.compression = compression  # Method of compression
1221        self.compresslevel = compresslevel
1222        self.mode = mode
1223        self.pwd = None
1224        self._comment = b''
1225        self._strict_timestamps = strict_timestamps
1226
1227        # Check if we were passed a file-like object
1228        if isinstance(file, os.PathLike):
1229            file = os.fspath(file)
1230        if isinstance(file, str):
1231            # No, it's a filename
1232            self._filePassed = 0
1233            self.filename = file
1234            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1235                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1236            filemode = modeDict[mode]
1237            while True:
1238                try:
1239                    self.fp = io.open(file, filemode)
1240                except OSError:
1241                    if filemode in modeDict:
1242                        filemode = modeDict[filemode]
1243                        continue
1244                    raise
1245                break
1246        else:
1247            self._filePassed = 1
1248            self.fp = file
1249            self.filename = getattr(file, 'name', None)
1250        self._fileRefCnt = 1
1251        self._lock = threading.RLock()
1252        self._seekable = True
1253        self._writing = False
1254
1255        try:
1256            if mode == 'r':
1257                self._RealGetContents()
1258            elif mode in ('w', 'x'):
1259                # set the modified flag so central directory gets written
1260                # even if no files are added to the archive
1261                self._didModify = True
1262                try:
1263                    self.start_dir = self.fp.tell()
1264                except (AttributeError, OSError):
1265                    self.fp = _Tellable(self.fp)
1266                    self.start_dir = 0
1267                    self._seekable = False
1268                else:
1269                    # Some file-like objects can provide tell() but not seek()
1270                    try:
1271                        self.fp.seek(self.start_dir)
1272                    except (AttributeError, OSError):
1273                        self._seekable = False
1274            elif mode == 'a':
1275                try:
1276                    # See if file is a zip file
1277                    self._RealGetContents()
1278                    # seek to start of directory and overwrite
1279                    self.fp.seek(self.start_dir)
1280                except BadZipFile:
1281                    # file is not a zip file, just append
1282                    self.fp.seek(0, 2)
1283
1284                    # set the modified flag so central directory gets written
1285                    # even if no files are added to the archive
1286                    self._didModify = True
1287                    self.start_dir = self.fp.tell()
1288            else:
1289                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1290        except:
1291            fp = self.fp
1292            self.fp = None
1293            self._fpclose(fp)
1294            raise
1295
1296    def __enter__(self):
1297        return self
1298
1299    def __exit__(self, type, value, traceback):
1300        self.close()
1301
1302    def __repr__(self):
1303        result = ['<%s.%s' % (self.__class__.__module__,
1304                              self.__class__.__qualname__)]
1305        if self.fp is not None:
1306            if self._filePassed:
1307                result.append(' file=%r' % self.fp)
1308            elif self.filename is not None:
1309                result.append(' filename=%r' % self.filename)
1310            result.append(' mode=%r' % self.mode)
1311        else:
1312            result.append(' [closed]')
1313        result.append('>')
1314        return ''.join(result)
1315
1316    def _RealGetContents(self):
1317        """Read in the table of contents for the ZIP file."""
1318        fp = self.fp
1319        try:
1320            endrec = _EndRecData(fp)
1321        except OSError:
1322            raise BadZipFile("File is not a zip file")
1323        if not endrec:
1324            raise BadZipFile("File is not a zip file")
1325        if self.debug > 1:
1326            print(endrec)
1327        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1328        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1329        self._comment = endrec[_ECD_COMMENT]    # archive comment
1330
1331        # "concat" is zero, unless zip was concatenated to another file
1332        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1333        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1334            # If Zip64 extension structures are present, account for them
1335            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1336
1337        if self.debug > 2:
1338            inferred = concat + offset_cd
1339            print("given, inferred, offset", offset_cd, inferred, concat)
1340        # self.start_dir:  Position of start of central directory
1341        self.start_dir = offset_cd + concat
1342        fp.seek(self.start_dir, 0)
1343        data = fp.read(size_cd)
1344        fp = io.BytesIO(data)
1345        total = 0
1346        while total < size_cd:
1347            centdir = fp.read(sizeCentralDir)
1348            if len(centdir) != sizeCentralDir:
1349                raise BadZipFile("Truncated central directory")
1350            centdir = struct.unpack(structCentralDir, centdir)
1351            if centdir[_CD_SIGNATURE] != stringCentralDir:
1352                raise BadZipFile("Bad magic number for central directory")
1353            if self.debug > 2:
1354                print(centdir)
1355            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1356            flags = centdir[5]
1357            if flags & 0x800:
1358                # UTF-8 file names extension
1359                filename = filename.decode('utf-8')
1360            else:
1361                # Historical ZIP filename encoding
1362                filename = filename.decode('cp437')
1363            # Create ZipInfo instance to store file information
1364            x = ZipInfo(filename)
1365            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1366            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1367            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1368            (x.create_version, x.create_system, x.extract_version, x.reserved,
1369             x.flag_bits, x.compress_type, t, d,
1370             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1371            if x.extract_version > MAX_EXTRACT_VERSION:
1372                raise NotImplementedError("zip file version %.1f" %
1373                                          (x.extract_version / 10))
1374            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1375            # Convert date/time code to (year, month, day, hour, min, sec)
1376            x._raw_time = t
1377            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1378                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1379
1380            x._decodeExtra()
1381            x.header_offset = x.header_offset + concat
1382            self.filelist.append(x)
1383            self.NameToInfo[x.filename] = x
1384
1385            # update total bytes read from central directory
1386            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1387                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1388                     + centdir[_CD_COMMENT_LENGTH])
1389
1390            if self.debug > 2:
1391                print("total", total)
1392
1393
1394    def namelist(self):
1395        """Return a list of file names in the archive."""
1396        return [data.filename for data in self.filelist]
1397
1398    def infolist(self):
1399        """Return a list of class ZipInfo instances for files in the
1400        archive."""
1401        return self.filelist
1402
1403    def printdir(self, file=None):
1404        """Print a table of contents for the zip file."""
1405        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1406              file=file)
1407        for zinfo in self.filelist:
1408            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1409            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1410                  file=file)
1411
1412    def testzip(self):
1413        """Read all the files and check the CRC."""
1414        chunk_size = 2 ** 20
1415        for zinfo in self.filelist:
1416            try:
1417                # Read by chunks, to avoid an OverflowError or a
1418                # MemoryError with very large embedded files.
1419                with self.open(zinfo.filename, "r") as f:
1420                    while f.read(chunk_size):     # Check CRC-32
1421                        pass
1422            except BadZipFile:
1423                return zinfo.filename
1424
1425    def getinfo(self, name):
1426        """Return the instance of ZipInfo given 'name'."""
1427        info = self.NameToInfo.get(name)
1428        if info is None:
1429            raise KeyError(
1430                'There is no item named %r in the archive' % name)
1431
1432        return info
1433
1434    def setpassword(self, pwd):
1435        """Set default password for encrypted files."""
1436        if pwd and not isinstance(pwd, bytes):
1437            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1438        if pwd:
1439            self.pwd = pwd
1440        else:
1441            self.pwd = None
1442
1443    @property
1444    def comment(self):
1445        """The comment text associated with the ZIP file."""
1446        return self._comment
1447
1448    @comment.setter
1449    def comment(self, comment):
1450        if not isinstance(comment, bytes):
1451            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1452        # check for valid comment length
1453        if len(comment) > ZIP_MAX_COMMENT:
1454            import warnings
1455            warnings.warn('Archive comment is too long; truncating to %d bytes'
1456                          % ZIP_MAX_COMMENT, stacklevel=2)
1457            comment = comment[:ZIP_MAX_COMMENT]
1458        self._comment = comment
1459        self._didModify = True
1460
1461    def read(self, name, pwd=None):
1462        """Return file bytes for name."""
1463        with self.open(name, "r", pwd) as fp:
1464            return fp.read()
1465
1466    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1467        """Return file-like object for 'name'.
1468
1469        name is a string for the file name within the ZIP file, or a ZipInfo
1470        object.
1471
1472        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1473        write to a file newly added to the archive.
1474
1475        pwd is the password to decrypt files (only used for reading).
1476
1477        When writing, if the file size is not known in advance but may exceed
1478        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1479        files.  If the size is known in advance, it is best to pass a ZipInfo
1480        instance for name, with zinfo.file_size set.
1481        """
1482        if mode not in {"r", "w"}:
1483            raise ValueError('open() requires mode "r" or "w"')
1484        if pwd and not isinstance(pwd, bytes):
1485            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1486        if pwd and (mode == "w"):
1487            raise ValueError("pwd is only supported for reading files")
1488        if not self.fp:
1489            raise ValueError(
1490                "Attempt to use ZIP archive that was already closed")
1491
1492        # Make sure we have an info object
1493        if isinstance(name, ZipInfo):
1494            # 'name' is already an info object
1495            zinfo = name
1496        elif mode == 'w':
1497            zinfo = ZipInfo(name)
1498            zinfo.compress_type = self.compression
1499            zinfo._compresslevel = self.compresslevel
1500        else:
1501            # Get info object for name
1502            zinfo = self.getinfo(name)
1503
1504        if mode == 'w':
1505            return self._open_to_write(zinfo, force_zip64=force_zip64)
1506
1507        if self._writing:
1508            raise ValueError("Can't read from the ZIP file while there "
1509                    "is an open writing handle on it. "
1510                    "Close the writing handle before trying to read.")
1511
1512        # Open for reading:
1513        self._fileRefCnt += 1
1514        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1515                               self._fpclose, self._lock, lambda: self._writing)
1516        try:
1517            # Skip the file header:
1518            fheader = zef_file.read(sizeFileHeader)
1519            if len(fheader) != sizeFileHeader:
1520                raise BadZipFile("Truncated file header")
1521            fheader = struct.unpack(structFileHeader, fheader)
1522            if fheader[_FH_SIGNATURE] != stringFileHeader:
1523                raise BadZipFile("Bad magic number for file header")
1524
1525            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1526            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1527                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1528
1529            if zinfo.flag_bits & 0x20:
1530                # Zip 2.7: compressed patched data
1531                raise NotImplementedError("compressed patched data (flag bit 5)")
1532
1533            if zinfo.flag_bits & 0x40:
1534                # strong encryption
1535                raise NotImplementedError("strong encryption (flag bit 6)")
1536
1537            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:
1538                # UTF-8 filename
1539                fname_str = fname.decode("utf-8")
1540            else:
1541                fname_str = fname.decode("cp437")
1542
1543            if fname_str != zinfo.orig_filename:
1544                raise BadZipFile(
1545                    'File name in directory %r and header %r differ.'
1546                    % (zinfo.orig_filename, fname))
1547
1548            # check for encrypted flag & handle password
1549            is_encrypted = zinfo.flag_bits & 0x1
1550            if is_encrypted:
1551                if not pwd:
1552                    pwd = self.pwd
1553                if not pwd:
1554                    raise RuntimeError("File %r is encrypted, password "
1555                                       "required for extraction" % name)
1556            else:
1557                pwd = None
1558
1559            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1560        except:
1561            zef_file.close()
1562            raise
1563
1564    def _open_to_write(self, zinfo, force_zip64=False):
1565        if force_zip64 and not self._allowZip64:
1566            raise ValueError(
1567                "force_zip64 is True, but allowZip64 was False when opening "
1568                "the ZIP file."
1569            )
1570        if self._writing:
1571            raise ValueError("Can't write to the ZIP file while there is "
1572                             "another write handle open on it. "
1573                             "Close the first handle before opening another.")
1574
1575        # Size and CRC are overwritten with correct data after processing the file
1576        zinfo.compress_size = 0
1577        zinfo.CRC = 0
1578
1579        zinfo.flag_bits = 0x00
1580        if zinfo.compress_type == ZIP_LZMA:
1581            # Compressed data includes an end-of-stream (EOS) marker
1582            zinfo.flag_bits |= 0x02
1583        if not self._seekable:
1584            zinfo.flag_bits |= 0x08
1585
1586        if not zinfo.external_attr:
1587            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1588
1589        # Compressed size can be larger than uncompressed size
1590        zip64 = self._allowZip64 and \
1591                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1592
1593        if self._seekable:
1594            self.fp.seek(self.start_dir)
1595        zinfo.header_offset = self.fp.tell()
1596
1597        self._writecheck(zinfo)
1598        self._didModify = True
1599
1600        self.fp.write(zinfo.FileHeader(zip64))
1601
1602        self._writing = True
1603        return _ZipWriteFile(self, zinfo, zip64)
1604
1605    def extract(self, member, path=None, pwd=None):
1606        """Extract a member from the archive to the current working directory,
1607           using its full name. Its file information is extracted as accurately
1608           as possible. `member' may be a filename or a ZipInfo object. You can
1609           specify a different directory using `path'.
1610        """
1611        if path is None:
1612            path = os.getcwd()
1613        else:
1614            path = os.fspath(path)
1615
1616        return self._extract_member(member, path, pwd)
1617
1618    def extractall(self, path=None, members=None, pwd=None):
1619        """Extract all members from the archive to the current working
1620           directory. `path' specifies a different directory to extract to.
1621           `members' is optional and must be a subset of the list returned
1622           by namelist().
1623        """
1624        if members is None:
1625            members = self.namelist()
1626
1627        if path is None:
1628            path = os.getcwd()
1629        else:
1630            path = os.fspath(path)
1631
1632        for zipinfo in members:
1633            self._extract_member(zipinfo, path, pwd)
1634
1635    @classmethod
1636    def _sanitize_windows_name(cls, arcname, pathsep):
1637        """Replace bad characters and remove trailing dots from parts."""
1638        table = cls._windows_illegal_name_trans_table
1639        if not table:
1640            illegal = ':<>|"?*'
1641            table = str.maketrans(illegal, '_' * len(illegal))
1642            cls._windows_illegal_name_trans_table = table
1643        arcname = arcname.translate(table)
1644        # remove trailing dots
1645        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1646        # rejoin, removing empty parts.
1647        arcname = pathsep.join(x for x in arcname if x)
1648        return arcname
1649
1650    def _extract_member(self, member, targetpath, pwd):
1651        """Extract the ZipInfo object 'member' to a physical
1652           file on the path targetpath.
1653        """
1654        if not isinstance(member, ZipInfo):
1655            member = self.getinfo(member)
1656
1657        # build the destination pathname, replacing
1658        # forward slashes to platform specific separators.
1659        arcname = member.filename.replace('/', os.path.sep)
1660
1661        if os.path.altsep:
1662            arcname = arcname.replace(os.path.altsep, os.path.sep)
1663        # interpret absolute pathname as relative, remove drive letter or
1664        # UNC path, redundant separators, "." and ".." components.
1665        arcname = os.path.splitdrive(arcname)[1]
1666        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1667        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1668                                   if x not in invalid_path_parts)
1669        if os.path.sep == '\\':
1670            # filter illegal characters on Windows
1671            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1672
1673        targetpath = os.path.join(targetpath, arcname)
1674        targetpath = os.path.normpath(targetpath)
1675
1676        # Create all upper directories if necessary.
1677        upperdirs = os.path.dirname(targetpath)
1678        if upperdirs and not os.path.exists(upperdirs):
1679            os.makedirs(upperdirs)
1680
1681        if member.is_dir():
1682            if not os.path.isdir(targetpath):
1683                os.mkdir(targetpath)
1684            return targetpath
1685
1686        with self.open(member, pwd=pwd) as source, \
1687             open(targetpath, "wb") as target:
1688            shutil.copyfileobj(source, target)
1689
1690        return targetpath
1691
1692    def _writecheck(self, zinfo):
1693        """Check for errors before writing a file to the archive."""
1694        if zinfo.filename in self.NameToInfo:
1695            import warnings
1696            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1697        if self.mode not in ('w', 'x', 'a'):
1698            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1699        if not self.fp:
1700            raise ValueError(
1701                "Attempt to write ZIP archive that was already closed")
1702        _check_compression(zinfo.compress_type)
1703        if not self._allowZip64:
1704            requires_zip64 = None
1705            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1706                requires_zip64 = "Files count"
1707            elif zinfo.file_size > ZIP64_LIMIT:
1708                requires_zip64 = "Filesize"
1709            elif zinfo.header_offset > ZIP64_LIMIT:
1710                requires_zip64 = "Zipfile size"
1711            if requires_zip64:
1712                raise LargeZipFile(requires_zip64 +
1713                                   " would require ZIP64 extensions")
1714
1715    def write(self, filename, arcname=None,
1716              compress_type=None, compresslevel=None):
1717        """Put the bytes from filename into the archive under the name
1718        arcname."""
1719        if not self.fp:
1720            raise ValueError(
1721                "Attempt to write to ZIP archive that was already closed")
1722        if self._writing:
1723            raise ValueError(
1724                "Can't write to ZIP archive while an open writing handle exists"
1725            )
1726
1727        zinfo = ZipInfo.from_file(filename, arcname,
1728                                  strict_timestamps=self._strict_timestamps)
1729
1730        if zinfo.is_dir():
1731            zinfo.compress_size = 0
1732            zinfo.CRC = 0
1733        else:
1734            if compress_type is not None:
1735                zinfo.compress_type = compress_type
1736            else:
1737                zinfo.compress_type = self.compression
1738
1739            if compresslevel is not None:
1740                zinfo._compresslevel = compresslevel
1741            else:
1742                zinfo._compresslevel = self.compresslevel
1743
1744        if zinfo.is_dir():
1745            with self._lock:
1746                if self._seekable:
1747                    self.fp.seek(self.start_dir)
1748                zinfo.header_offset = self.fp.tell()  # Start of header bytes
1749                if zinfo.compress_type == ZIP_LZMA:
1750                # Compressed data includes an end-of-stream (EOS) marker
1751                    zinfo.flag_bits |= 0x02
1752
1753                self._writecheck(zinfo)
1754                self._didModify = True
1755
1756                self.filelist.append(zinfo)
1757                self.NameToInfo[zinfo.filename] = zinfo
1758                self.fp.write(zinfo.FileHeader(False))
1759                self.start_dir = self.fp.tell()
1760        else:
1761            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1762                shutil.copyfileobj(src, dest, 1024*8)
1763
1764    def writestr(self, zinfo_or_arcname, data,
1765                 compress_type=None, compresslevel=None):
1766        """Write a file into the archive.  The contents is 'data', which
1767        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1768        it is encoded as UTF-8 first.
1769        'zinfo_or_arcname' is either a ZipInfo instance or
1770        the name of the file in the archive."""
1771        if isinstance(data, str):
1772            data = data.encode("utf-8")
1773        if not isinstance(zinfo_or_arcname, ZipInfo):
1774            zinfo = ZipInfo(filename=zinfo_or_arcname,
1775                            date_time=time.localtime(time.time())[:6])
1776            zinfo.compress_type = self.compression
1777            zinfo._compresslevel = self.compresslevel
1778            if zinfo.filename[-1] == '/':
1779                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1780                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1781            else:
1782                zinfo.external_attr = 0o600 << 16     # ?rw-------
1783        else:
1784            zinfo = zinfo_or_arcname
1785
1786        if not self.fp:
1787            raise ValueError(
1788                "Attempt to write to ZIP archive that was already closed")
1789        if self._writing:
1790            raise ValueError(
1791                "Can't write to ZIP archive while an open writing handle exists."
1792            )
1793
1794        if compress_type is not None:
1795            zinfo.compress_type = compress_type
1796
1797        if compresslevel is not None:
1798            zinfo._compresslevel = compresslevel
1799
1800        zinfo.file_size = len(data)            # Uncompressed size
1801        with self._lock:
1802            with self.open(zinfo, mode='w') as dest:
1803                dest.write(data)
1804
1805    def __del__(self):
1806        """Call the "close()" method in case the user forgot."""
1807        self.close()
1808
1809    def close(self):
1810        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1811        records."""
1812        if self.fp is None:
1813            return
1814
1815        if self._writing:
1816            raise ValueError("Can't close the ZIP file while there is "
1817                             "an open writing handle on it. "
1818                             "Close the writing handle before closing the zip.")
1819
1820        try:
1821            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1822                with self._lock:
1823                    if self._seekable:
1824                        self.fp.seek(self.start_dir)
1825                    self._write_end_record()
1826        finally:
1827            fp = self.fp
1828            self.fp = None
1829            self._fpclose(fp)
1830
1831    def _write_end_record(self):
1832        for zinfo in self.filelist:         # write central directory
1833            dt = zinfo.date_time
1834            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1835            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1836            extra = []
1837            if zinfo.file_size > ZIP64_LIMIT \
1838               or zinfo.compress_size > ZIP64_LIMIT:
1839                extra.append(zinfo.file_size)
1840                extra.append(zinfo.compress_size)
1841                file_size = 0xffffffff
1842                compress_size = 0xffffffff
1843            else:
1844                file_size = zinfo.file_size
1845                compress_size = zinfo.compress_size
1846
1847            if zinfo.header_offset > ZIP64_LIMIT:
1848                extra.append(zinfo.header_offset)
1849                header_offset = 0xffffffff
1850            else:
1851                header_offset = zinfo.header_offset
1852
1853            extra_data = zinfo.extra
1854            min_version = 0
1855            if extra:
1856                # Append a ZIP64 field to the extra's
1857                extra_data = _strip_extra(extra_data, (1,))
1858                extra_data = struct.pack(
1859                    '<HH' + 'Q'*len(extra),
1860                    1, 8*len(extra), *extra) + extra_data
1861
1862                min_version = ZIP64_VERSION
1863
1864            if zinfo.compress_type == ZIP_BZIP2:
1865                min_version = max(BZIP2_VERSION, min_version)
1866            elif zinfo.compress_type == ZIP_LZMA:
1867                min_version = max(LZMA_VERSION, min_version)
1868
1869            extract_version = max(min_version, zinfo.extract_version)
1870            create_version = max(min_version, zinfo.create_version)
1871            filename, flag_bits = zinfo._encodeFilenameFlags()
1872            centdir = struct.pack(structCentralDir,
1873                                  stringCentralDir, create_version,
1874                                  zinfo.create_system, extract_version, zinfo.reserved,
1875                                  flag_bits, zinfo.compress_type, dostime, dosdate,
1876                                  zinfo.CRC, compress_size, file_size,
1877                                  len(filename), len(extra_data), len(zinfo.comment),
1878                                  0, zinfo.internal_attr, zinfo.external_attr,
1879                                  header_offset)
1880            self.fp.write(centdir)
1881            self.fp.write(filename)
1882            self.fp.write(extra_data)
1883            self.fp.write(zinfo.comment)
1884
1885        pos2 = self.fp.tell()
1886        # Write end-of-zip-archive record
1887        centDirCount = len(self.filelist)
1888        centDirSize = pos2 - self.start_dir
1889        centDirOffset = self.start_dir
1890        requires_zip64 = None
1891        if centDirCount > ZIP_FILECOUNT_LIMIT:
1892            requires_zip64 = "Files count"
1893        elif centDirOffset > ZIP64_LIMIT:
1894            requires_zip64 = "Central directory offset"
1895        elif centDirSize > ZIP64_LIMIT:
1896            requires_zip64 = "Central directory size"
1897        if requires_zip64:
1898            # Need to write the ZIP64 end-of-archive records
1899            if not self._allowZip64:
1900                raise LargeZipFile(requires_zip64 +
1901                                   " would require ZIP64 extensions")
1902            zip64endrec = struct.pack(
1903                structEndArchive64, stringEndArchive64,
1904                44, 45, 45, 0, 0, centDirCount, centDirCount,
1905                centDirSize, centDirOffset)
1906            self.fp.write(zip64endrec)
1907
1908            zip64locrec = struct.pack(
1909                structEndArchive64Locator,
1910                stringEndArchive64Locator, 0, pos2, 1)
1911            self.fp.write(zip64locrec)
1912            centDirCount = min(centDirCount, 0xFFFF)
1913            centDirSize = min(centDirSize, 0xFFFFFFFF)
1914            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1915
1916        endrec = struct.pack(structEndArchive, stringEndArchive,
1917                             0, 0, centDirCount, centDirCount,
1918                             centDirSize, centDirOffset, len(self._comment))
1919        self.fp.write(endrec)
1920        self.fp.write(self._comment)
1921        if self.mode == "a":
1922            self.fp.truncate()
1923        self.fp.flush()
1924
1925    def _fpclose(self, fp):
1926        assert self._fileRefCnt > 0
1927        self._fileRefCnt -= 1
1928        if not self._fileRefCnt and not self._filePassed:
1929            fp.close()
1930
1931
1932class PyZipFile(ZipFile):
1933    """Class to create ZIP archives with Python library files and packages."""
1934
1935    def __init__(self, file, mode="r", compression=ZIP_STORED,
1936                 allowZip64=True, optimize=-1):
1937        ZipFile.__init__(self, file, mode=mode, compression=compression,
1938                         allowZip64=allowZip64)
1939        self._optimize = optimize
1940
1941    def writepy(self, pathname, basename="", filterfunc=None):
1942        """Add all files from "pathname" to the ZIP archive.
1943
1944        If pathname is a package directory, search the directory and
1945        all package subdirectories recursively for all *.py and enter
1946        the modules into the archive.  If pathname is a plain
1947        directory, listdir *.py and enter all modules.  Else, pathname
1948        must be a Python *.py file and the module will be put into the
1949        archive.  Added modules are always module.pyc.
1950        This method will compile the module.py into module.pyc if
1951        necessary.
1952        If filterfunc(pathname) is given, it is called with every argument.
1953        When it is False, the file or directory is skipped.
1954        """
1955        pathname = os.fspath(pathname)
1956        if filterfunc and not filterfunc(pathname):
1957            if self.debug:
1958                label = 'path' if os.path.isdir(pathname) else 'file'
1959                print('%s %r skipped by filterfunc' % (label, pathname))
1960            return
1961        dir, name = os.path.split(pathname)
1962        if os.path.isdir(pathname):
1963            initname = os.path.join(pathname, "__init__.py")
1964            if os.path.isfile(initname):
1965                # This is a package directory, add it
1966                if basename:
1967                    basename = "%s/%s" % (basename, name)
1968                else:
1969                    basename = name
1970                if self.debug:
1971                    print("Adding package in", pathname, "as", basename)
1972                fname, arcname = self._get_codename(initname[0:-3], basename)
1973                if self.debug:
1974                    print("Adding", arcname)
1975                self.write(fname, arcname)
1976                dirlist = sorted(os.listdir(pathname))
1977                dirlist.remove("__init__.py")
1978                # Add all *.py files and package subdirectories
1979                for filename in dirlist:
1980                    path = os.path.join(pathname, filename)
1981                    root, ext = os.path.splitext(filename)
1982                    if os.path.isdir(path):
1983                        if os.path.isfile(os.path.join(path, "__init__.py")):
1984                            # This is a package directory, add it
1985                            self.writepy(path, basename,
1986                                         filterfunc=filterfunc)  # Recursive call
1987                    elif ext == ".py":
1988                        if filterfunc and not filterfunc(path):
1989                            if self.debug:
1990                                print('file %r skipped by filterfunc' % path)
1991                            continue
1992                        fname, arcname = self._get_codename(path[0:-3],
1993                                                            basename)
1994                        if self.debug:
1995                            print("Adding", arcname)
1996                        self.write(fname, arcname)
1997            else:
1998                # This is NOT a package directory, add its files at top level
1999                if self.debug:
2000                    print("Adding files from directory", pathname)
2001                for filename in sorted(os.listdir(pathname)):
2002                    path = os.path.join(pathname, filename)
2003                    root, ext = os.path.splitext(filename)
2004                    if ext == ".py":
2005                        if filterfunc and not filterfunc(path):
2006                            if self.debug:
2007                                print('file %r skipped by filterfunc' % path)
2008                            continue
2009                        fname, arcname = self._get_codename(path[0:-3],
2010                                                            basename)
2011                        if self.debug:
2012                            print("Adding", arcname)
2013                        self.write(fname, arcname)
2014        else:
2015            if pathname[-3:] != ".py":
2016                raise RuntimeError(
2017                    'Files added with writepy() must end with ".py"')
2018            fname, arcname = self._get_codename(pathname[0:-3], basename)
2019            if self.debug:
2020                print("Adding file", arcname)
2021            self.write(fname, arcname)
2022
2023    def _get_codename(self, pathname, basename):
2024        """Return (filename, archivename) for the path.
2025
2026        Given a module name path, return the correct file path and
2027        archive name, compiling if necessary.  For example, given
2028        /python/lib/string, return (/python/lib/string.pyc, string).
2029        """
2030        def _compile(file, optimize=-1):
2031            import py_compile
2032            if self.debug:
2033                print("Compiling", file)
2034            try:
2035                py_compile.compile(file, doraise=True, optimize=optimize)
2036            except py_compile.PyCompileError as err:
2037                print(err.msg)
2038                return False
2039            return True
2040
2041        file_py  = pathname + ".py"
2042        file_pyc = pathname + ".pyc"
2043        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2044        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2045        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2046        if self._optimize == -1:
2047            # legacy mode: use whatever file is present
2048            if (os.path.isfile(file_pyc) and
2049                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2050                # Use .pyc file.
2051                arcname = fname = file_pyc
2052            elif (os.path.isfile(pycache_opt0) and
2053                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2054                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2055                # file name in the archive.
2056                fname = pycache_opt0
2057                arcname = file_pyc
2058            elif (os.path.isfile(pycache_opt1) and
2059                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2060                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2061                # file name in the archive.
2062                fname = pycache_opt1
2063                arcname = file_pyc
2064            elif (os.path.isfile(pycache_opt2) and
2065                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2066                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2067                # file name in the archive.
2068                fname = pycache_opt2
2069                arcname = file_pyc
2070            else:
2071                # Compile py into PEP 3147 pyc file.
2072                if _compile(file_py):
2073                    if sys.flags.optimize == 0:
2074                        fname = pycache_opt0
2075                    elif sys.flags.optimize == 1:
2076                        fname = pycache_opt1
2077                    else:
2078                        fname = pycache_opt2
2079                    arcname = file_pyc
2080                else:
2081                    fname = arcname = file_py
2082        else:
2083            # new mode: use given optimization level
2084            if self._optimize == 0:
2085                fname = pycache_opt0
2086                arcname = file_pyc
2087            else:
2088                arcname = file_pyc
2089                if self._optimize == 1:
2090                    fname = pycache_opt1
2091                elif self._optimize == 2:
2092                    fname = pycache_opt2
2093                else:
2094                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2095                    raise ValueError(msg)
2096            if not (os.path.isfile(fname) and
2097                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2098                if not _compile(file_py, optimize=self._optimize):
2099                    fname = arcname = file_py
2100        archivename = os.path.split(arcname)[1]
2101        if basename:
2102            archivename = "%s/%s" % (basename, archivename)
2103        return (fname, archivename)
2104
2105
2106def _parents(path):
2107    """
2108    Given a path with elements separated by
2109    posixpath.sep, generate all parents of that path.
2110
2111    >>> list(_parents('b/d'))
2112    ['b']
2113    >>> list(_parents('/b/d/'))
2114    ['/b']
2115    >>> list(_parents('b/d/f/'))
2116    ['b/d', 'b']
2117    >>> list(_parents('b'))
2118    []
2119    >>> list(_parents(''))
2120    []
2121    """
2122    return itertools.islice(_ancestry(path), 1, None)
2123
2124
2125def _ancestry(path):
2126    """
2127    Given a path with elements separated by
2128    posixpath.sep, generate all elements of that path
2129
2130    >>> list(_ancestry('b/d'))
2131    ['b/d', 'b']
2132    >>> list(_ancestry('/b/d/'))
2133    ['/b/d', '/b']
2134    >>> list(_ancestry('b/d/f/'))
2135    ['b/d/f', 'b/d', 'b']
2136    >>> list(_ancestry('b'))
2137    ['b']
2138    >>> list(_ancestry(''))
2139    []
2140    """
2141    path = path.rstrip(posixpath.sep)
2142    while path and path != posixpath.sep:
2143        yield path
2144        path, tail = posixpath.split(path)
2145
2146
2147_dedupe = dict.fromkeys
2148"""Deduplicate an iterable in original order"""
2149
2150
2151def _difference(minuend, subtrahend):
2152    """
2153    Return items in minuend not in subtrahend, retaining order
2154    with O(1) lookup.
2155    """
2156    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2157
2158
2159class CompleteDirs(ZipFile):
2160    """
2161    A ZipFile subclass that ensures that implied directories
2162    are always included in the namelist.
2163    """
2164
2165    @staticmethod
2166    def _implied_dirs(names):
2167        parents = itertools.chain.from_iterable(map(_parents, names))
2168        as_dirs = (p + posixpath.sep for p in parents)
2169        return _dedupe(_difference(as_dirs, names))
2170
2171    def namelist(self):
2172        names = super(CompleteDirs, self).namelist()
2173        return names + list(self._implied_dirs(names))
2174
2175    def _name_set(self):
2176        return set(self.namelist())
2177
2178    def resolve_dir(self, name):
2179        """
2180        If the name represents a directory, return that name
2181        as a directory (with the trailing slash).
2182        """
2183        names = self._name_set()
2184        dirname = name + '/'
2185        dir_match = name not in names and dirname in names
2186        return dirname if dir_match else name
2187
2188    @classmethod
2189    def make(cls, source):
2190        """
2191        Given a source (filename or zipfile), return an
2192        appropriate CompleteDirs subclass.
2193        """
2194        if isinstance(source, CompleteDirs):
2195            return source
2196
2197        if not isinstance(source, ZipFile):
2198            return cls(source)
2199
2200        # Only allow for FastPath when supplied zipfile is read-only
2201        if 'r' not in source.mode:
2202            cls = CompleteDirs
2203
2204        res = cls.__new__(cls)
2205        vars(res).update(vars(source))
2206        return res
2207
2208
2209class FastLookup(CompleteDirs):
2210    """
2211    ZipFile subclass to ensure implicit
2212    dirs exist and are resolved rapidly.
2213    """
2214    def namelist(self):
2215        with contextlib.suppress(AttributeError):
2216            return self.__names
2217        self.__names = super(FastLookup, self).namelist()
2218        return self.__names
2219
2220    def _name_set(self):
2221        with contextlib.suppress(AttributeError):
2222            return self.__lookup
2223        self.__lookup = super(FastLookup, self)._name_set()
2224        return self.__lookup
2225
2226
2227class Path:
2228    """
2229    A pathlib-compatible interface for zip files.
2230
2231    Consider a zip file with this structure::
2232
2233        .
2234        ├── a.txt
2235        └── b
2236            ├── c.txt
2237            └── d
2238                └── e.txt
2239
2240    >>> data = io.BytesIO()
2241    >>> zf = ZipFile(data, 'w')
2242    >>> zf.writestr('a.txt', 'content of a')
2243    >>> zf.writestr('b/c.txt', 'content of c')
2244    >>> zf.writestr('b/d/e.txt', 'content of e')
2245    >>> zf.filename = 'abcde.zip'
2246
2247    Path accepts the zipfile object itself or a filename
2248
2249    >>> root = Path(zf)
2250
2251    From there, several path operations are available.
2252
2253    Directory iteration (including the zip file itself):
2254
2255    >>> a, b = root.iterdir()
2256    >>> a
2257    Path('abcde.zip', 'a.txt')
2258    >>> b
2259    Path('abcde.zip', 'b/')
2260
2261    name property:
2262
2263    >>> b.name
2264    'b'
2265
2266    join with divide operator:
2267
2268    >>> c = b / 'c.txt'
2269    >>> c
2270    Path('abcde.zip', 'b/c.txt')
2271    >>> c.name
2272    'c.txt'
2273
2274    Read text:
2275
2276    >>> c.read_text()
2277    'content of c'
2278
2279    existence:
2280
2281    >>> c.exists()
2282    True
2283    >>> (b / 'missing.txt').exists()
2284    False
2285
2286    Coercion to string:
2287
2288    >>> str(c)
2289    'abcde.zip/b/c.txt'
2290    """
2291
2292    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2293
2294    def __init__(self, root, at=""):
2295        self.root = FastLookup.make(root)
2296        self.at = at
2297
2298    def open(self, mode='r', *args, **kwargs):
2299        """
2300        Open this entry as text or binary following the semantics
2301        of ``pathlib.Path.open()`` by passing arguments through
2302        to io.TextIOWrapper().
2303        """
2304        pwd = kwargs.pop('pwd', None)
2305        zip_mode = mode[0]
2306        stream = self.root.open(self.at, zip_mode, pwd=pwd)
2307        if 'b' in mode:
2308            if args or kwargs:
2309                raise ValueError("encoding args invalid for binary operation")
2310            return stream
2311        return io.TextIOWrapper(stream, *args, **kwargs)
2312
2313    @property
2314    def name(self):
2315        return posixpath.basename(self.at.rstrip("/"))
2316
2317    def read_text(self, *args, **kwargs):
2318        with self.open('r', *args, **kwargs) as strm:
2319            return strm.read()
2320
2321    def read_bytes(self):
2322        with self.open('rb') as strm:
2323            return strm.read()
2324
2325    def _is_child(self, path):
2326        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2327
2328    def _next(self, at):
2329        return Path(self.root, at)
2330
2331    def is_dir(self):
2332        return not self.at or self.at.endswith("/")
2333
2334    def is_file(self):
2335        return not self.is_dir()
2336
2337    def exists(self):
2338        return self.at in self.root._name_set()
2339
2340    def iterdir(self):
2341        if not self.is_dir():
2342            raise ValueError("Can't listdir a file")
2343        subs = map(self._next, self.root.namelist())
2344        return filter(self._is_child, subs)
2345
2346    def __str__(self):
2347        return posixpath.join(self.root.filename, self.at)
2348
2349    def __repr__(self):
2350        return self.__repr.format(self=self)
2351
2352    def joinpath(self, add):
2353        next = posixpath.join(self.at, add)
2354        return self._next(self.root.resolve_dir(next))
2355
2356    __truediv__ = joinpath
2357
2358    @property
2359    def parent(self):
2360        parent_at = posixpath.dirname(self.at.rstrip('/'))
2361        if parent_at:
2362            parent_at += '/'
2363        return self._next(parent_at)
2364
2365
2366def main(args=None):
2367    import argparse
2368
2369    description = 'A simple command-line interface for zipfile module.'
2370    parser = argparse.ArgumentParser(description=description)
2371    group = parser.add_mutually_exclusive_group(required=True)
2372    group.add_argument('-l', '--list', metavar='<zipfile>',
2373                       help='Show listing of a zipfile')
2374    group.add_argument('-e', '--extract', nargs=2,
2375                       metavar=('<zipfile>', '<output_dir>'),
2376                       help='Extract zipfile into target dir')
2377    group.add_argument('-c', '--create', nargs='+',
2378                       metavar=('<name>', '<file>'),
2379                       help='Create zipfile from sources')
2380    group.add_argument('-t', '--test', metavar='<zipfile>',
2381                       help='Test if a zipfile is valid')
2382    args = parser.parse_args(args)
2383
2384    if args.test is not None:
2385        src = args.test
2386        with ZipFile(src, 'r') as zf:
2387            badfile = zf.testzip()
2388        if badfile:
2389            print("The following enclosed file is corrupted: {!r}".format(badfile))
2390        print("Done testing")
2391
2392    elif args.list is not None:
2393        src = args.list
2394        with ZipFile(src, 'r') as zf:
2395            zf.printdir()
2396
2397    elif args.extract is not None:
2398        src, curdir = args.extract
2399        with ZipFile(src, 'r') as zf:
2400            zf.extractall(curdir)
2401
2402    elif args.create is not None:
2403        zip_name = args.create.pop(0)
2404        files = args.create
2405
2406        def addToZip(zf, path, zippath):
2407            if os.path.isfile(path):
2408                zf.write(path, zippath, ZIP_DEFLATED)
2409            elif os.path.isdir(path):
2410                if zippath:
2411                    zf.write(path, zippath)
2412                for nm in sorted(os.listdir(path)):
2413                    addToZip(zf,
2414                             os.path.join(path, nm), os.path.join(zippath, nm))
2415            # else: ignore
2416
2417        with ZipFile(zip_name, 'w') as zf:
2418            for path in files:
2419                zippath = os.path.basename(path)
2420                if not zippath:
2421                    zippath = os.path.basename(os.path.dirname(path))
2422                if zippath in ('', os.curdir, os.pardir):
2423                    zippath = ''
2424                addToZip(zf, path, zippath)
2425
2426
2427if __name__ == "__main__":
2428    main()
2429