1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import io
7import os
8import importlib.util
9import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
15import threading
16
17try:
18    import zlib # We may need its compression method
19    crc32 = zlib.crc32
20except ImportError:
21    zlib = None
22    crc32 = binascii.crc32
23
24try:
25    import bz2 # We may need its compression method
26except ImportError:
27    bz2 = None
28
29try:
30    import lzma # We may need its compression method
31except ImportError:
32    lzma = None
33
34__all__ = ["BadZipFile", "BadZipfile", "error",
35           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
36           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
37
38class BadZipFile(Exception):
39    pass
40
41
42class LargeZipFile(Exception):
43    """
44    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
45    and those extensions are disabled.
46    """
47
48error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
49
50
51ZIP64_LIMIT = (1 << 31) - 1
52ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
53ZIP_MAX_COMMENT = (1 << 16) - 1
54
55# constants for Zip file compression methods
56ZIP_STORED = 0
57ZIP_DEFLATED = 8
58ZIP_BZIP2 = 12
59ZIP_LZMA = 14
60# Other ZIP compression methods not supported
61
62DEFAULT_VERSION = 20
63ZIP64_VERSION = 45
64BZIP2_VERSION = 46
65LZMA_VERSION = 63
66# we recognize (but not necessarily support) all features up to that version
67MAX_EXTRACT_VERSION = 63
68
69# Below are some formats and associated data for reading/writing headers using
70# the struct module.  The names and structures of headers/records are those used
71# in the PKWARE description of the ZIP file format:
72#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
73# (URL valid as of January 2008)
74
75# The "end of central directory" structure, magic number, size, and indices
76# (section V.I in the format document)
77structEndArchive = b"<4s4H2LH"
78stringEndArchive = b"PK\005\006"
79sizeEndCentDir = struct.calcsize(structEndArchive)
80
81_ECD_SIGNATURE = 0
82_ECD_DISK_NUMBER = 1
83_ECD_DISK_START = 2
84_ECD_ENTRIES_THIS_DISK = 3
85_ECD_ENTRIES_TOTAL = 4
86_ECD_SIZE = 5
87_ECD_OFFSET = 6
88_ECD_COMMENT_SIZE = 7
89# These last two indices are not part of the structure as defined in the
90# spec, but they are used internally by this module as a convenience
91_ECD_COMMENT = 8
92_ECD_LOCATION = 9
93
94# The "central directory" structure, magic number, size, and indices
95# of entries in the structure (section V.F in the format document)
96structCentralDir = "<4s4B4HL2L5H2L"
97stringCentralDir = b"PK\001\002"
98sizeCentralDir = struct.calcsize(structCentralDir)
99
100# indexes of entries in the central directory structure
101_CD_SIGNATURE = 0
102_CD_CREATE_VERSION = 1
103_CD_CREATE_SYSTEM = 2
104_CD_EXTRACT_VERSION = 3
105_CD_EXTRACT_SYSTEM = 4
106_CD_FLAG_BITS = 5
107_CD_COMPRESS_TYPE = 6
108_CD_TIME = 7
109_CD_DATE = 8
110_CD_CRC = 9
111_CD_COMPRESSED_SIZE = 10
112_CD_UNCOMPRESSED_SIZE = 11
113_CD_FILENAME_LENGTH = 12
114_CD_EXTRA_FIELD_LENGTH = 13
115_CD_COMMENT_LENGTH = 14
116_CD_DISK_NUMBER_START = 15
117_CD_INTERNAL_FILE_ATTRIBUTES = 16
118_CD_EXTERNAL_FILE_ATTRIBUTES = 17
119_CD_LOCAL_HEADER_OFFSET = 18
120
121# The "local file header" structure, magic number, size, and indices
122# (section V.A in the format document)
123structFileHeader = "<4s2B4HL2L2H"
124stringFileHeader = b"PK\003\004"
125sizeFileHeader = struct.calcsize(structFileHeader)
126
127_FH_SIGNATURE = 0
128_FH_EXTRACT_VERSION = 1
129_FH_EXTRACT_SYSTEM = 2
130_FH_GENERAL_PURPOSE_FLAG_BITS = 3
131_FH_COMPRESSION_METHOD = 4
132_FH_LAST_MOD_TIME = 5
133_FH_LAST_MOD_DATE = 6
134_FH_CRC = 7
135_FH_COMPRESSED_SIZE = 8
136_FH_UNCOMPRESSED_SIZE = 9
137_FH_FILENAME_LENGTH = 10
138_FH_EXTRA_FIELD_LENGTH = 11
139
140# The "Zip64 end of central directory locator" structure, magic number, and size
141structEndArchive64Locator = "<4sLQL"
142stringEndArchive64Locator = b"PK\x06\x07"
143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
144
145# The "Zip64 end of central directory" record, magic number, size, and indices
146# (section V.G in the format document)
147structEndArchive64 = "<4sQ2H2L4Q"
148stringEndArchive64 = b"PK\x06\x06"
149sizeEndCentDir64 = struct.calcsize(structEndArchive64)
150
151_CD64_SIGNATURE = 0
152_CD64_DIRECTORY_RECSIZE = 1
153_CD64_CREATE_VERSION = 2
154_CD64_EXTRACT_VERSION = 3
155_CD64_DISK_NUMBER = 4
156_CD64_DISK_NUMBER_START = 5
157_CD64_NUMBER_ENTRIES_THIS_DISK = 6
158_CD64_NUMBER_ENTRIES_TOTAL = 7
159_CD64_DIRECTORY_SIZE = 8
160_CD64_OFFSET_START_CENTDIR = 9
161
162_DD_SIGNATURE = 0x08074b50
163
164_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
165
166def _strip_extra(extra, xids):
167    # Remove Extra Fields with specified IDs.
168    unpack = _EXTRA_FIELD_STRUCT.unpack
169    modified = False
170    buffer = []
171    start = i = 0
172    while i + 4 <= len(extra):
173        xid, xlen = unpack(extra[i : i + 4])
174        j = i + 4 + xlen
175        if xid in xids:
176            if i != start:
177                buffer.append(extra[start : i])
178            start = j
179            modified = True
180        i = j
181    if not modified:
182        return extra
183    return b''.join(buffer)
184
185def _check_zipfile(fp):
186    try:
187        if _EndRecData(fp):
188            return True         # file has correct magic number
189    except OSError:
190        pass
191    return False
192
193def is_zipfile(filename):
194    """Quickly see if a file is a ZIP file by checking the magic number.
195
196    The filename argument may be a file or file-like object too.
197    """
198    result = False
199    try:
200        if hasattr(filename, "read"):
201            result = _check_zipfile(fp=filename)
202        else:
203            with open(filename, "rb") as fp:
204                result = _check_zipfile(fp)
205    except OSError:
206        pass
207    return result
208
209def _EndRecData64(fpin, offset, endrec):
210    """
211    Read the ZIP64 end-of-archive records and use that to update endrec
212    """
213    try:
214        fpin.seek(offset - sizeEndCentDir64Locator, 2)
215    except OSError:
216        # If the seek fails, the file is not large enough to contain a ZIP64
217        # end-of-archive record, so just return the end record we were given.
218        return endrec
219
220    data = fpin.read(sizeEndCentDir64Locator)
221    if len(data) != sizeEndCentDir64Locator:
222        return endrec
223    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
224    if sig != stringEndArchive64Locator:
225        return endrec
226
227    if diskno != 0 or disks != 1:
228        raise BadZipFile("zipfiles that span multiple disks are not supported")
229
230    # Assume no 'zip64 extensible data'
231    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
232    data = fpin.read(sizeEndCentDir64)
233    if len(data) != sizeEndCentDir64:
234        return endrec
235    sig, sz, create_version, read_version, disk_num, disk_dir, \
236        dircount, dircount2, dirsize, diroffset = \
237        struct.unpack(structEndArchive64, data)
238    if sig != stringEndArchive64:
239        return endrec
240
241    # Update the original endrec using data from the ZIP64 record
242    endrec[_ECD_SIGNATURE] = sig
243    endrec[_ECD_DISK_NUMBER] = disk_num
244    endrec[_ECD_DISK_START] = disk_dir
245    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
246    endrec[_ECD_ENTRIES_TOTAL] = dircount2
247    endrec[_ECD_SIZE] = dirsize
248    endrec[_ECD_OFFSET] = diroffset
249    return endrec
250
251
252def _EndRecData(fpin):
253    """Return data from the "End of Central Directory" record, or None.
254
255    The data is a list of the nine items in the ZIP "End of central dir"
256    record followed by a tenth item, the file seek offset of this record."""
257
258    # Determine file size
259    fpin.seek(0, 2)
260    filesize = fpin.tell()
261
262    # Check to see if this is ZIP file with no archive comment (the
263    # "end of central directory" structure should be the last item in the
264    # file if this is the case).
265    try:
266        fpin.seek(-sizeEndCentDir, 2)
267    except OSError:
268        return None
269    data = fpin.read()
270    if (len(data) == sizeEndCentDir and
271        data[0:4] == stringEndArchive and
272        data[-2:] == b"\000\000"):
273        # the signature is correct and there's no comment, unpack structure
274        endrec = struct.unpack(structEndArchive, data)
275        endrec=list(endrec)
276
277        # Append a blank comment and record start offset
278        endrec.append(b"")
279        endrec.append(filesize - sizeEndCentDir)
280
281        # Try to read the "Zip64 end of central directory" structure
282        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
283
284    # Either this is not a ZIP file, or it is a ZIP file with an archive
285    # comment.  Search the end of the file for the "end of central directory"
286    # record signature. The comment is the last item in the ZIP file and may be
287    # up to 64K long.  It is assumed that the "end of central directory" magic
288    # number does not appear in the comment.
289    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
290    fpin.seek(maxCommentStart, 0)
291    data = fpin.read()
292    start = data.rfind(stringEndArchive)
293    if start >= 0:
294        # found the magic number; attempt to unpack and interpret
295        recData = data[start:start+sizeEndCentDir]
296        if len(recData) != sizeEndCentDir:
297            # Zip file is corrupted.
298            return None
299        endrec = list(struct.unpack(structEndArchive, recData))
300        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
301        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
302        endrec.append(comment)
303        endrec.append(maxCommentStart + start)
304
305        # Try to read the "Zip64 end of central directory" structure
306        return _EndRecData64(fpin, maxCommentStart + start - filesize,
307                             endrec)
308
309    # Unable to find a valid end of central directory structure
310    return None
311
312
313class ZipInfo (object):
314    """Class with attributes describing each file in the ZIP archive."""
315
316    __slots__ = (
317        'orig_filename',
318        'filename',
319        'date_time',
320        'compress_type',
321        '_compresslevel',
322        'comment',
323        'extra',
324        'create_system',
325        'create_version',
326        'extract_version',
327        'reserved',
328        'flag_bits',
329        'volume',
330        'internal_attr',
331        'external_attr',
332        'header_offset',
333        'CRC',
334        'compress_size',
335        'file_size',
336        '_raw_time',
337    )
338
339    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
340        self.orig_filename = filename   # Original file name in archive
341
342        # Terminate the file name at the first null byte.  Null bytes in file
343        # names are used as tricks by viruses in archives.
344        null_byte = filename.find(chr(0))
345        if null_byte >= 0:
346            filename = filename[0:null_byte]
347        # This is used to ensure paths in generated ZIP files always use
348        # forward slashes as the directory separator, as required by the
349        # ZIP format specification.
350        if os.sep != "/" and os.sep in filename:
351            filename = filename.replace(os.sep, "/")
352
353        self.filename = filename        # Normalized file name
354        self.date_time = date_time      # year, month, day, hour, min, sec
355
356        if date_time[0] < 1980:
357            raise ValueError('ZIP does not support timestamps before 1980')
358
359        # Standard values:
360        self.compress_type = ZIP_STORED # Type of compression for the file
361        self._compresslevel = None      # Level for the compressor
362        self.comment = b""              # Comment for each file
363        self.extra = b""                # ZIP extra data
364        if sys.platform == 'win32':
365            self.create_system = 0          # System which created ZIP archive
366        else:
367            # Assume everything else is unix-y
368            self.create_system = 3          # System which created ZIP archive
369        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
370        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
371        self.reserved = 0               # Must be zero
372        self.flag_bits = 0              # ZIP flag bits
373        self.volume = 0                 # Volume number of file header
374        self.internal_attr = 0          # Internal attributes
375        self.external_attr = 0          # External file attributes
376        # Other attributes are set by class ZipFile:
377        # header_offset         Byte offset to the file header
378        # CRC                   CRC-32 of the uncompressed file
379        # compress_size         Size of the compressed file
380        # file_size             Size of the uncompressed file
381
382    def __repr__(self):
383        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
384        if self.compress_type != ZIP_STORED:
385            result.append(' compress_type=%s' %
386                          compressor_names.get(self.compress_type,
387                                               self.compress_type))
388        hi = self.external_attr >> 16
389        lo = self.external_attr & 0xFFFF
390        if hi:
391            result.append(' filemode=%r' % stat.filemode(hi))
392        if lo:
393            result.append(' external_attr=%#x' % lo)
394        isdir = self.is_dir()
395        if not isdir or self.file_size:
396            result.append(' file_size=%r' % self.file_size)
397        if ((not isdir or self.compress_size) and
398            (self.compress_type != ZIP_STORED or
399             self.file_size != self.compress_size)):
400            result.append(' compress_size=%r' % self.compress_size)
401        result.append('>')
402        return ''.join(result)
403
404    def FileHeader(self, zip64=None):
405        """Return the per-file header as a bytes object."""
406        dt = self.date_time
407        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
408        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
409        if self.flag_bits & 0x08:
410            # Set these to zero because we write them after the file data
411            CRC = compress_size = file_size = 0
412        else:
413            CRC = self.CRC
414            compress_size = self.compress_size
415            file_size = self.file_size
416
417        extra = self.extra
418
419        min_version = 0
420        if zip64 is None:
421            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
422        if zip64:
423            fmt = '<HHQQ'
424            extra = extra + struct.pack(fmt,
425                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
426        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
427            if not zip64:
428                raise LargeZipFile("Filesize would require ZIP64 extensions")
429            # File is larger than what fits into a 4 byte integer,
430            # fall back to the ZIP64 extension
431            file_size = 0xffffffff
432            compress_size = 0xffffffff
433            min_version = ZIP64_VERSION
434
435        if self.compress_type == ZIP_BZIP2:
436            min_version = max(BZIP2_VERSION, min_version)
437        elif self.compress_type == ZIP_LZMA:
438            min_version = max(LZMA_VERSION, min_version)
439
440        self.extract_version = max(min_version, self.extract_version)
441        self.create_version = max(min_version, self.create_version)
442        filename, flag_bits = self._encodeFilenameFlags()
443        header = struct.pack(structFileHeader, stringFileHeader,
444                             self.extract_version, self.reserved, flag_bits,
445                             self.compress_type, dostime, dosdate, CRC,
446                             compress_size, file_size,
447                             len(filename), len(extra))
448        return header + filename + extra
449
450    def _encodeFilenameFlags(self):
451        try:
452            return self.filename.encode('ascii'), self.flag_bits
453        except UnicodeEncodeError:
454            return self.filename.encode('utf-8'), self.flag_bits | 0x800
455
456    def _decodeExtra(self):
457        # Try to decode the extra field.
458        extra = self.extra
459        unpack = struct.unpack
460        while len(extra) >= 4:
461            tp, ln = unpack('<HH', extra[:4])
462            if ln+4 > len(extra):
463                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
464            if tp == 0x0001:
465                if ln >= 24:
466                    counts = unpack('<QQQ', extra[4:28])
467                elif ln == 16:
468                    counts = unpack('<QQ', extra[4:20])
469                elif ln == 8:
470                    counts = unpack('<Q', extra[4:12])
471                elif ln == 0:
472                    counts = ()
473                else:
474                    raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
475
476                idx = 0
477
478                # ZIP64 extension (large files and/or large archives)
479                if self.file_size in (0xffffffffffffffff, 0xffffffff):
480                    self.file_size = counts[idx]
481                    idx += 1
482
483                if self.compress_size == 0xFFFFFFFF:
484                    self.compress_size = counts[idx]
485                    idx += 1
486
487                if self.header_offset == 0xffffffff:
488                    old = self.header_offset
489                    self.header_offset = counts[idx]
490                    idx+=1
491
492            extra = extra[ln+4:]
493
494    @classmethod
495    def from_file(cls, filename, arcname=None):
496        """Construct an appropriate ZipInfo for a file on the filesystem.
497
498        filename should be the path to a file or directory on the filesystem.
499
500        arcname is the name which it will have within the archive (by default,
501        this will be the same as filename, but without a drive letter and with
502        leading path separators removed).
503        """
504        if isinstance(filename, os.PathLike):
505            filename = os.fspath(filename)
506        st = os.stat(filename)
507        isdir = stat.S_ISDIR(st.st_mode)
508        mtime = time.localtime(st.st_mtime)
509        date_time = mtime[0:6]
510        # Create ZipInfo instance to store file information
511        if arcname is None:
512            arcname = filename
513        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
514        while arcname[0] in (os.sep, os.altsep):
515            arcname = arcname[1:]
516        if isdir:
517            arcname += '/'
518        zinfo = cls(arcname, date_time)
519        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
520        if isdir:
521            zinfo.file_size = 0
522            zinfo.external_attr |= 0x10  # MS-DOS directory flag
523        else:
524            zinfo.file_size = st.st_size
525
526        return zinfo
527
528    def is_dir(self):
529        """Return True if this archive member is a directory."""
530        return self.filename[-1] == '/'
531
532
533# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
534# internal keys. We noticed that a direct implementation is faster than
535# relying on binascii.crc32().
536
537_crctable = None
538def _gen_crc(crc):
539    for j in range(8):
540        if crc & 1:
541            crc = (crc >> 1) ^ 0xEDB88320
542        else:
543            crc >>= 1
544    return crc
545
546# ZIP supports a password-based form of encryption. Even though known
547# plaintext attacks have been found against it, it is still useful
548# to be able to get data out of such a file.
549#
550# Usage:
551#     zd = _ZipDecrypter(mypwd)
552#     plain_bytes = zd(cypher_bytes)
553
554def _ZipDecrypter(pwd):
555    key0 = 305419896
556    key1 = 591751049
557    key2 = 878082192
558
559    global _crctable
560    if _crctable is None:
561        _crctable = list(map(_gen_crc, range(256)))
562    crctable = _crctable
563
564    def crc32(ch, crc):
565        """Compute the CRC32 primitive on one byte."""
566        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
567
568    def update_keys(c):
569        nonlocal key0, key1, key2
570        key0 = crc32(c, key0)
571        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
572        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
573        key2 = crc32(key1 >> 24, key2)
574
575    for p in pwd:
576        update_keys(p)
577
578    def decrypter(data):
579        """Decrypt a bytes object."""
580        result = bytearray()
581        append = result.append
582        for c in data:
583            k = key2 | 2
584            c ^= ((k * (k^1)) >> 8) & 0xFF
585            update_keys(c)
586            append(c)
587        return bytes(result)
588
589    return decrypter
590
591
592class LZMACompressor:
593
594    def __init__(self):
595        self._comp = None
596
597    def _init(self):
598        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
599        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
600            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
601        ])
602        return struct.pack('<BBH', 9, 4, len(props)) + props
603
604    def compress(self, data):
605        if self._comp is None:
606            return self._init() + self._comp.compress(data)
607        return self._comp.compress(data)
608
609    def flush(self):
610        if self._comp is None:
611            return self._init() + self._comp.flush()
612        return self._comp.flush()
613
614
615class LZMADecompressor:
616
617    def __init__(self):
618        self._decomp = None
619        self._unconsumed = b''
620        self.eof = False
621
622    def decompress(self, data):
623        if self._decomp is None:
624            self._unconsumed += data
625            if len(self._unconsumed) <= 4:
626                return b''
627            psize, = struct.unpack('<H', self._unconsumed[2:4])
628            if len(self._unconsumed) <= 4 + psize:
629                return b''
630
631            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
632                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
633                                               self._unconsumed[4:4 + psize])
634            ])
635            data = self._unconsumed[4 + psize:]
636            del self._unconsumed
637
638        result = self._decomp.decompress(data)
639        self.eof = self._decomp.eof
640        return result
641
642
643compressor_names = {
644    0: 'store',
645    1: 'shrink',
646    2: 'reduce',
647    3: 'reduce',
648    4: 'reduce',
649    5: 'reduce',
650    6: 'implode',
651    7: 'tokenize',
652    8: 'deflate',
653    9: 'deflate64',
654    10: 'implode',
655    12: 'bzip2',
656    14: 'lzma',
657    18: 'terse',
658    19: 'lz77',
659    97: 'wavpack',
660    98: 'ppmd',
661}
662
663def _check_compression(compression):
664    if compression == ZIP_STORED:
665        pass
666    elif compression == ZIP_DEFLATED:
667        if not zlib:
668            raise RuntimeError(
669                "Compression requires the (missing) zlib module")
670    elif compression == ZIP_BZIP2:
671        if not bz2:
672            raise RuntimeError(
673                "Compression requires the (missing) bz2 module")
674    elif compression == ZIP_LZMA:
675        if not lzma:
676            raise RuntimeError(
677                "Compression requires the (missing) lzma module")
678    else:
679        raise NotImplementedError("That compression method is not supported")
680
681
682def _get_compressor(compress_type, compresslevel=None):
683    if compress_type == ZIP_DEFLATED:
684        if compresslevel is not None:
685            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
686        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
687    elif compress_type == ZIP_BZIP2:
688        if compresslevel is not None:
689            return bz2.BZ2Compressor(compresslevel)
690        return bz2.BZ2Compressor()
691    # compresslevel is ignored for ZIP_LZMA
692    elif compress_type == ZIP_LZMA:
693        return LZMACompressor()
694    else:
695        return None
696
697
698def _get_decompressor(compress_type):
699    if compress_type == ZIP_STORED:
700        return None
701    elif compress_type == ZIP_DEFLATED:
702        return zlib.decompressobj(-15)
703    elif compress_type == ZIP_BZIP2:
704        return bz2.BZ2Decompressor()
705    elif compress_type == ZIP_LZMA:
706        return LZMADecompressor()
707    else:
708        descr = compressor_names.get(compress_type)
709        if descr:
710            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
711        else:
712            raise NotImplementedError("compression type %d" % (compress_type,))
713
714
715class _SharedFile:
716    def __init__(self, file, pos, close, lock, writing):
717        self._file = file
718        self._pos = pos
719        self._close = close
720        self._lock = lock
721        self._writing = writing
722        self.seekable = file.seekable
723        self.tell = file.tell
724
725    def seek(self, offset, whence=0):
726        with self._lock:
727            if self._writing():
728                raise ValueError("Can't reposition in the ZIP file while "
729                        "there is an open writing handle on it. "
730                        "Close the writing handle before trying to read.")
731            self._file.seek(offset, whence)
732            self._pos = self._file.tell()
733            return self._pos
734
735    def read(self, n=-1):
736        with self._lock:
737            if self._writing():
738                raise ValueError("Can't read from the ZIP file while there "
739                        "is an open writing handle on it. "
740                        "Close the writing handle before trying to read.")
741            self._file.seek(self._pos)
742            data = self._file.read(n)
743            self._pos = self._file.tell()
744            return data
745
746    def close(self):
747        if self._file is not None:
748            fileobj = self._file
749            self._file = None
750            self._close(fileobj)
751
752# Provide the tell method for unseekable stream
753class _Tellable:
754    def __init__(self, fp):
755        self.fp = fp
756        self.offset = 0
757
758    def write(self, data):
759        n = self.fp.write(data)
760        self.offset += n
761        return n
762
763    def tell(self):
764        return self.offset
765
766    def flush(self):
767        self.fp.flush()
768
769    def close(self):
770        self.fp.close()
771
772
773class ZipExtFile(io.BufferedIOBase):
774    """File-like object for reading an archive member.
775       Is returned by ZipFile.open().
776    """
777
778    # Max size supported by decompressor.
779    MAX_N = 1 << 31 - 1
780
781    # Read from compressed files in 4k blocks.
782    MIN_READ_SIZE = 4096
783
784    # Chunk size to read during seek
785    MAX_SEEK_READ = 1 << 24
786
787    def __init__(self, fileobj, mode, zipinfo, decrypter=None,
788                 close_fileobj=False):
789        self._fileobj = fileobj
790        self._decrypter = decrypter
791        self._close_fileobj = close_fileobj
792
793        self._compress_type = zipinfo.compress_type
794        self._compress_left = zipinfo.compress_size
795        self._left = zipinfo.file_size
796
797        self._decompressor = _get_decompressor(self._compress_type)
798
799        self._eof = False
800        self._readbuffer = b''
801        self._offset = 0
802
803        self.newlines = None
804
805        # Adjust read size for encrypted files since the first 12 bytes
806        # are for the encryption/password information.
807        if self._decrypter is not None:
808            self._compress_left -= 12
809
810        self.mode = mode
811        self.name = zipinfo.filename
812
813        if hasattr(zipinfo, 'CRC'):
814            self._expected_crc = zipinfo.CRC
815            self._running_crc = crc32(b'')
816        else:
817            self._expected_crc = None
818
819        self._seekable = False
820        try:
821            if fileobj.seekable():
822                self._orig_compress_start = fileobj.tell()
823                self._orig_compress_size = zipinfo.compress_size
824                self._orig_file_size = zipinfo.file_size
825                self._orig_start_crc = self._running_crc
826                self._seekable = True
827        except AttributeError:
828            pass
829
830    def __repr__(self):
831        result = ['<%s.%s' % (self.__class__.__module__,
832                              self.__class__.__qualname__)]
833        if not self.closed:
834            result.append(' name=%r mode=%r' % (self.name, self.mode))
835            if self._compress_type != ZIP_STORED:
836                result.append(' compress_type=%s' %
837                              compressor_names.get(self._compress_type,
838                                                   self._compress_type))
839        else:
840            result.append(' [closed]')
841        result.append('>')
842        return ''.join(result)
843
844    def readline(self, limit=-1):
845        """Read and return a line from the stream.
846
847        If limit is specified, at most limit bytes will be read.
848        """
849
850        if limit < 0:
851            # Shortcut common case - newline found in buffer.
852            i = self._readbuffer.find(b'\n', self._offset) + 1
853            if i > 0:
854                line = self._readbuffer[self._offset: i]
855                self._offset = i
856                return line
857
858        return io.BufferedIOBase.readline(self, limit)
859
860    def peek(self, n=1):
861        """Returns buffered bytes without advancing the position."""
862        if n > len(self._readbuffer) - self._offset:
863            chunk = self.read(n)
864            if len(chunk) > self._offset:
865                self._readbuffer = chunk + self._readbuffer[self._offset:]
866                self._offset = 0
867            else:
868                self._offset -= len(chunk)
869
870        # Return up to 512 bytes to reduce allocation overhead for tight loops.
871        return self._readbuffer[self._offset: self._offset + 512]
872
873    def readable(self):
874        return True
875
876    def read(self, n=-1):
877        """Read and return up to n bytes.
878        If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
879        """
880        if n is None or n < 0:
881            buf = self._readbuffer[self._offset:]
882            self._readbuffer = b''
883            self._offset = 0
884            while not self._eof:
885                buf += self._read1(self.MAX_N)
886            return buf
887
888        end = n + self._offset
889        if end < len(self._readbuffer):
890            buf = self._readbuffer[self._offset:end]
891            self._offset = end
892            return buf
893
894        n = end - len(self._readbuffer)
895        buf = self._readbuffer[self._offset:]
896        self._readbuffer = b''
897        self._offset = 0
898        while n > 0 and not self._eof:
899            data = self._read1(n)
900            if n < len(data):
901                self._readbuffer = data
902                self._offset = n
903                buf += data[:n]
904                break
905            buf += data
906            n -= len(data)
907        return buf
908
909    def _update_crc(self, newdata):
910        # Update the CRC using the given data.
911        if self._expected_crc is None:
912            # No need to compute the CRC if we don't have a reference value
913            return
914        self._running_crc = crc32(newdata, self._running_crc)
915        # Check the CRC if we're at the end of the file
916        if self._eof and self._running_crc != self._expected_crc:
917            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
918
919    def read1(self, n):
920        """Read up to n bytes with at most one read() system call."""
921
922        if n is None or n < 0:
923            buf = self._readbuffer[self._offset:]
924            self._readbuffer = b''
925            self._offset = 0
926            while not self._eof:
927                data = self._read1(self.MAX_N)
928                if data:
929                    buf += data
930                    break
931            return buf
932
933        end = n + self._offset
934        if end < len(self._readbuffer):
935            buf = self._readbuffer[self._offset:end]
936            self._offset = end
937            return buf
938
939        n = end - len(self._readbuffer)
940        buf = self._readbuffer[self._offset:]
941        self._readbuffer = b''
942        self._offset = 0
943        if n > 0:
944            while not self._eof:
945                data = self._read1(n)
946                if n < len(data):
947                    self._readbuffer = data
948                    self._offset = n
949                    buf += data[:n]
950                    break
951                if data:
952                    buf += data
953                    break
954        return buf
955
956    def _read1(self, n):
957        # Read up to n compressed bytes with at most one read() system call,
958        # decrypt and decompress them.
959        if self._eof or n <= 0:
960            return b''
961
962        # Read from file.
963        if self._compress_type == ZIP_DEFLATED:
964            ## Handle unconsumed data.
965            data = self._decompressor.unconsumed_tail
966            if n > len(data):
967                data += self._read2(n - len(data))
968        else:
969            data = self._read2(n)
970
971        if self._compress_type == ZIP_STORED:
972            self._eof = self._compress_left <= 0
973        elif self._compress_type == ZIP_DEFLATED:
974            n = max(n, self.MIN_READ_SIZE)
975            data = self._decompressor.decompress(data, n)
976            self._eof = (self._decompressor.eof or
977                         self._compress_left <= 0 and
978                         not self._decompressor.unconsumed_tail)
979            if self._eof:
980                data += self._decompressor.flush()
981        else:
982            data = self._decompressor.decompress(data)
983            self._eof = self._decompressor.eof or self._compress_left <= 0
984
985        data = data[:self._left]
986        self._left -= len(data)
987        if self._left <= 0:
988            self._eof = True
989        self._update_crc(data)
990        return data
991
992    def _read2(self, n):
993        if self._compress_left <= 0:
994            return b''
995
996        n = max(n, self.MIN_READ_SIZE)
997        n = min(n, self._compress_left)
998
999        data = self._fileobj.read(n)
1000        self._compress_left -= len(data)
1001        if not data:
1002            raise EOFError
1003
1004        if self._decrypter is not None:
1005            data = self._decrypter(data)
1006        return data
1007
1008    def close(self):
1009        try:
1010            if self._close_fileobj:
1011                self._fileobj.close()
1012        finally:
1013            super().close()
1014
1015    def seekable(self):
1016        return self._seekable
1017
1018    def seek(self, offset, whence=0):
1019        if not self._seekable:
1020            raise io.UnsupportedOperation("underlying stream is not seekable")
1021        curr_pos = self.tell()
1022        if whence == 0: # Seek from start of file
1023            new_pos = offset
1024        elif whence == 1: # Seek from current position
1025            new_pos = curr_pos + offset
1026        elif whence == 2: # Seek from EOF
1027            new_pos = self._orig_file_size + offset
1028        else:
1029            raise ValueError("whence must be os.SEEK_SET (0), "
1030                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1031
1032        if new_pos > self._orig_file_size:
1033            new_pos = self._orig_file_size
1034
1035        if new_pos < 0:
1036            new_pos = 0
1037
1038        read_offset = new_pos - curr_pos
1039        buff_offset = read_offset + self._offset
1040
1041        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1042            # Just move the _offset index if the new position is in the _readbuffer
1043            self._offset = buff_offset
1044            read_offset = 0
1045        elif read_offset < 0:
1046            # Position is before the current position. Reset the ZipExtFile
1047            self._fileobj.seek(self._orig_compress_start)
1048            self._running_crc = self._orig_start_crc
1049            self._compress_left = self._orig_compress_size
1050            self._left = self._orig_file_size
1051            self._readbuffer = b''
1052            self._offset = 0
1053            self._decompressor = _get_decompressor(self._compress_type)
1054            self._eof = False
1055            read_offset = new_pos
1056
1057        while read_offset > 0:
1058            read_len = min(self.MAX_SEEK_READ, read_offset)
1059            self.read(read_len)
1060            read_offset -= read_len
1061
1062        return self.tell()
1063
1064    def tell(self):
1065        if not self._seekable:
1066            raise io.UnsupportedOperation("underlying stream is not seekable")
1067        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1068        return filepos
1069
1070
1071class _ZipWriteFile(io.BufferedIOBase):
1072    def __init__(self, zf, zinfo, zip64):
1073        self._zinfo = zinfo
1074        self._zip64 = zip64
1075        self._zipfile = zf
1076        self._compressor = _get_compressor(zinfo.compress_type,
1077                                           zinfo._compresslevel)
1078        self._file_size = 0
1079        self._compress_size = 0
1080        self._crc = 0
1081
1082    @property
1083    def _fileobj(self):
1084        return self._zipfile.fp
1085
1086    def writable(self):
1087        return True
1088
1089    def write(self, data):
1090        if self.closed:
1091            raise ValueError('I/O operation on closed file.')
1092        nbytes = len(data)
1093        self._file_size += nbytes
1094        self._crc = crc32(data, self._crc)
1095        if self._compressor:
1096            data = self._compressor.compress(data)
1097            self._compress_size += len(data)
1098        self._fileobj.write(data)
1099        return nbytes
1100
1101    def close(self):
1102        if self.closed:
1103            return
1104        super().close()
1105        # Flush any data from the compressor, and update header info
1106        if self._compressor:
1107            buf = self._compressor.flush()
1108            self._compress_size += len(buf)
1109            self._fileobj.write(buf)
1110            self._zinfo.compress_size = self._compress_size
1111        else:
1112            self._zinfo.compress_size = self._file_size
1113        self._zinfo.CRC = self._crc
1114        self._zinfo.file_size = self._file_size
1115
1116        # Write updated header info
1117        if self._zinfo.flag_bits & 0x08:
1118            # Write CRC and file sizes after the file data
1119            fmt = '<LLQQ' if self._zip64 else '<LLLL'
1120            self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1121                self._zinfo.compress_size, self._zinfo.file_size))
1122            self._zipfile.start_dir = self._fileobj.tell()
1123        else:
1124            if not self._zip64:
1125                if self._file_size > ZIP64_LIMIT:
1126                    raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1127                                       'limit')
1128                if self._compress_size > ZIP64_LIMIT:
1129                    raise RuntimeError('Compressed size unexpectedly exceeded '
1130                                       'ZIP64 limit')
1131            # Seek backwards and write file header (which will now include
1132            # correct CRC and file sizes)
1133
1134            # Preserve current position in file
1135            self._zipfile.start_dir = self._fileobj.tell()
1136            self._fileobj.seek(self._zinfo.header_offset)
1137            self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1138            self._fileobj.seek(self._zipfile.start_dir)
1139
1140        self._zipfile._writing = False
1141
1142        # Successfully written: Add file to our caches
1143        self._zipfile.filelist.append(self._zinfo)
1144        self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1145
1146class ZipFile:
1147    """ Class with methods to open, read, write, close, list zip files.
1148
1149    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1150                compresslevel=None)
1151
1152    file: Either the path to the file, or a file-like object.
1153          If it is a path, the file will be opened and closed by ZipFile.
1154    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1155          or append 'a'.
1156    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1157                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1158    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1159                needed, otherwise it will raise an exception when this would
1160                be necessary.
1161    compresslevel: None (default for the given compression type) or an integer
1162                   specifying the level to pass to the compressor.
1163                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1164                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1165                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1166
1167    """
1168
1169    fp = None                   # Set here since __del__ checks it
1170    _windows_illegal_name_trans_table = None
1171
1172    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1173                 compresslevel=None):
1174        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1175        or append 'a'."""
1176        if mode not in ('r', 'w', 'x', 'a'):
1177            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1178
1179        _check_compression(compression)
1180
1181        self._allowZip64 = allowZip64
1182        self._didModify = False
1183        self.debug = 0  # Level of printing: 0 through 3
1184        self.NameToInfo = {}    # Find file info given name
1185        self.filelist = []      # List of ZipInfo instances for archive
1186        self.compression = compression  # Method of compression
1187        self.compresslevel = compresslevel
1188        self.mode = mode
1189        self.pwd = None
1190        self._comment = b''
1191
1192        # Check if we were passed a file-like object
1193        if isinstance(file, os.PathLike):
1194            file = os.fspath(file)
1195        if isinstance(file, str):
1196            # No, it's a filename
1197            self._filePassed = 0
1198            self.filename = file
1199            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1200                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1201            filemode = modeDict[mode]
1202            while True:
1203                try:
1204                    self.fp = io.open(file, filemode)
1205                except OSError:
1206                    if filemode in modeDict:
1207                        filemode = modeDict[filemode]
1208                        continue
1209                    raise
1210                break
1211        else:
1212            self._filePassed = 1
1213            self.fp = file
1214            self.filename = getattr(file, 'name', None)
1215        self._fileRefCnt = 1
1216        self._lock = threading.RLock()
1217        self._seekable = True
1218        self._writing = False
1219
1220        try:
1221            if mode == 'r':
1222                self._RealGetContents()
1223            elif mode in ('w', 'x'):
1224                # set the modified flag so central directory gets written
1225                # even if no files are added to the archive
1226                self._didModify = True
1227                try:
1228                    self.start_dir = self.fp.tell()
1229                except (AttributeError, OSError):
1230                    self.fp = _Tellable(self.fp)
1231                    self.start_dir = 0
1232                    self._seekable = False
1233                else:
1234                    # Some file-like objects can provide tell() but not seek()
1235                    try:
1236                        self.fp.seek(self.start_dir)
1237                    except (AttributeError, OSError):
1238                        self._seekable = False
1239            elif mode == 'a':
1240                try:
1241                    # See if file is a zip file
1242                    self._RealGetContents()
1243                    # seek to start of directory and overwrite
1244                    self.fp.seek(self.start_dir)
1245                except BadZipFile:
1246                    # file is not a zip file, just append
1247                    self.fp.seek(0, 2)
1248
1249                    # set the modified flag so central directory gets written
1250                    # even if no files are added to the archive
1251                    self._didModify = True
1252                    self.start_dir = self.fp.tell()
1253            else:
1254                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1255        except:
1256            fp = self.fp
1257            self.fp = None
1258            self._fpclose(fp)
1259            raise
1260
1261    def __enter__(self):
1262        return self
1263
1264    def __exit__(self, type, value, traceback):
1265        self.close()
1266
1267    def __repr__(self):
1268        result = ['<%s.%s' % (self.__class__.__module__,
1269                              self.__class__.__qualname__)]
1270        if self.fp is not None:
1271            if self._filePassed:
1272                result.append(' file=%r' % self.fp)
1273            elif self.filename is not None:
1274                result.append(' filename=%r' % self.filename)
1275            result.append(' mode=%r' % self.mode)
1276        else:
1277            result.append(' [closed]')
1278        result.append('>')
1279        return ''.join(result)
1280
1281    def _RealGetContents(self):
1282        """Read in the table of contents for the ZIP file."""
1283        fp = self.fp
1284        try:
1285            endrec = _EndRecData(fp)
1286        except OSError:
1287            raise BadZipFile("File is not a zip file")
1288        if not endrec:
1289            raise BadZipFile("File is not a zip file")
1290        if self.debug > 1:
1291            print(endrec)
1292        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1293        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1294        self._comment = endrec[_ECD_COMMENT]    # archive comment
1295
1296        # "concat" is zero, unless zip was concatenated to another file
1297        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1298        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1299            # If Zip64 extension structures are present, account for them
1300            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1301
1302        if self.debug > 2:
1303            inferred = concat + offset_cd
1304            print("given, inferred, offset", offset_cd, inferred, concat)
1305        # self.start_dir:  Position of start of central directory
1306        self.start_dir = offset_cd + concat
1307        fp.seek(self.start_dir, 0)
1308        data = fp.read(size_cd)
1309        fp = io.BytesIO(data)
1310        total = 0
1311        while total < size_cd:
1312            centdir = fp.read(sizeCentralDir)
1313            if len(centdir) != sizeCentralDir:
1314                raise BadZipFile("Truncated central directory")
1315            centdir = struct.unpack(structCentralDir, centdir)
1316            if centdir[_CD_SIGNATURE] != stringCentralDir:
1317                raise BadZipFile("Bad magic number for central directory")
1318            if self.debug > 2:
1319                print(centdir)
1320            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1321            flags = centdir[5]
1322            if flags & 0x800:
1323                # UTF-8 file names extension
1324                filename = filename.decode('utf-8')
1325            else:
1326                # Historical ZIP filename encoding
1327                filename = filename.decode('cp437')
1328            # Create ZipInfo instance to store file information
1329            x = ZipInfo(filename)
1330            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1331            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1332            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1333            (x.create_version, x.create_system, x.extract_version, x.reserved,
1334             x.flag_bits, x.compress_type, t, d,
1335             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1336            if x.extract_version > MAX_EXTRACT_VERSION:
1337                raise NotImplementedError("zip file version %.1f" %
1338                                          (x.extract_version / 10))
1339            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1340            # Convert date/time code to (year, month, day, hour, min, sec)
1341            x._raw_time = t
1342            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1343                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1344
1345            x._decodeExtra()
1346            x.header_offset = x.header_offset + concat
1347            self.filelist.append(x)
1348            self.NameToInfo[x.filename] = x
1349
1350            # update total bytes read from central directory
1351            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1352                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1353                     + centdir[_CD_COMMENT_LENGTH])
1354
1355            if self.debug > 2:
1356                print("total", total)
1357
1358
1359    def namelist(self):
1360        """Return a list of file names in the archive."""
1361        return [data.filename for data in self.filelist]
1362
1363    def infolist(self):
1364        """Return a list of class ZipInfo instances for files in the
1365        archive."""
1366        return self.filelist
1367
1368    def printdir(self, file=None):
1369        """Print a table of contents for the zip file."""
1370        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1371              file=file)
1372        for zinfo in self.filelist:
1373            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1374            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1375                  file=file)
1376
1377    def testzip(self):
1378        """Read all the files and check the CRC."""
1379        chunk_size = 2 ** 20
1380        for zinfo in self.filelist:
1381            try:
1382                # Read by chunks, to avoid an OverflowError or a
1383                # MemoryError with very large embedded files.
1384                with self.open(zinfo.filename, "r") as f:
1385                    while f.read(chunk_size):     # Check CRC-32
1386                        pass
1387            except BadZipFile:
1388                return zinfo.filename
1389
1390    def getinfo(self, name):
1391        """Return the instance of ZipInfo given 'name'."""
1392        info = self.NameToInfo.get(name)
1393        if info is None:
1394            raise KeyError(
1395                'There is no item named %r in the archive' % name)
1396
1397        return info
1398
1399    def setpassword(self, pwd):
1400        """Set default password for encrypted files."""
1401        if pwd and not isinstance(pwd, bytes):
1402            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1403        if pwd:
1404            self.pwd = pwd
1405        else:
1406            self.pwd = None
1407
1408    @property
1409    def comment(self):
1410        """The comment text associated with the ZIP file."""
1411        return self._comment
1412
1413    @comment.setter
1414    def comment(self, comment):
1415        if not isinstance(comment, bytes):
1416            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1417        # check for valid comment length
1418        if len(comment) > ZIP_MAX_COMMENT:
1419            import warnings
1420            warnings.warn('Archive comment is too long; truncating to %d bytes'
1421                          % ZIP_MAX_COMMENT, stacklevel=2)
1422            comment = comment[:ZIP_MAX_COMMENT]
1423        self._comment = comment
1424        self._didModify = True
1425
1426    def read(self, name, pwd=None):
1427        """Return file bytes for name."""
1428        with self.open(name, "r", pwd) as fp:
1429            return fp.read()
1430
1431    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1432        """Return file-like object for 'name'.
1433
1434        name is a string for the file name within the ZIP file, or a ZipInfo
1435        object.
1436
1437        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1438        write to a file newly added to the archive.
1439
1440        pwd is the password to decrypt files (only used for reading).
1441
1442        When writing, if the file size is not known in advance but may exceed
1443        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1444        files.  If the size is known in advance, it is best to pass a ZipInfo
1445        instance for name, with zinfo.file_size set.
1446        """
1447        if mode not in {"r", "w"}:
1448            raise ValueError('open() requires mode "r" or "w"')
1449        if pwd and not isinstance(pwd, bytes):
1450            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1451        if pwd and (mode == "w"):
1452            raise ValueError("pwd is only supported for reading files")
1453        if not self.fp:
1454            raise ValueError(
1455                "Attempt to use ZIP archive that was already closed")
1456
1457        # Make sure we have an info object
1458        if isinstance(name, ZipInfo):
1459            # 'name' is already an info object
1460            zinfo = name
1461        elif mode == 'w':
1462            zinfo = ZipInfo(name)
1463            zinfo.compress_type = self.compression
1464            zinfo._compresslevel = self.compresslevel
1465        else:
1466            # Get info object for name
1467            zinfo = self.getinfo(name)
1468
1469        if mode == 'w':
1470            return self._open_to_write(zinfo, force_zip64=force_zip64)
1471
1472        if self._writing:
1473            raise ValueError("Can't read from the ZIP file while there "
1474                    "is an open writing handle on it. "
1475                    "Close the writing handle before trying to read.")
1476
1477        # Open for reading:
1478        self._fileRefCnt += 1
1479        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1480                               self._fpclose, self._lock, lambda: self._writing)
1481        try:
1482            # Skip the file header:
1483            fheader = zef_file.read(sizeFileHeader)
1484            if len(fheader) != sizeFileHeader:
1485                raise BadZipFile("Truncated file header")
1486            fheader = struct.unpack(structFileHeader, fheader)
1487            if fheader[_FH_SIGNATURE] != stringFileHeader:
1488                raise BadZipFile("Bad magic number for file header")
1489
1490            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1491            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1492                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1493
1494            if zinfo.flag_bits & 0x20:
1495                # Zip 2.7: compressed patched data
1496                raise NotImplementedError("compressed patched data (flag bit 5)")
1497
1498            if zinfo.flag_bits & 0x40:
1499                # strong encryption
1500                raise NotImplementedError("strong encryption (flag bit 6)")
1501
1502            if zinfo.flag_bits & 0x800:
1503                # UTF-8 filename
1504                fname_str = fname.decode("utf-8")
1505            else:
1506                fname_str = fname.decode("cp437")
1507
1508            if fname_str != zinfo.orig_filename:
1509                raise BadZipFile(
1510                    'File name in directory %r and header %r differ.'
1511                    % (zinfo.orig_filename, fname))
1512
1513            # check for encrypted flag & handle password
1514            is_encrypted = zinfo.flag_bits & 0x1
1515            zd = None
1516            if is_encrypted:
1517                if not pwd:
1518                    pwd = self.pwd
1519                if not pwd:
1520                    raise RuntimeError("File %r is encrypted, password "
1521                                       "required for extraction" % name)
1522
1523                zd = _ZipDecrypter(pwd)
1524                # The first 12 bytes in the cypher stream is an encryption header
1525                #  used to strengthen the algorithm. The first 11 bytes are
1526                #  completely random, while the 12th contains the MSB of the CRC,
1527                #  or the MSB of the file time depending on the header type
1528                #  and is used to check the correctness of the password.
1529                header = zef_file.read(12)
1530                h = zd(header[0:12])
1531                if zinfo.flag_bits & 0x8:
1532                    # compare against the file type from extended local headers
1533                    check_byte = (zinfo._raw_time >> 8) & 0xff
1534                else:
1535                    # compare against the CRC otherwise
1536                    check_byte = (zinfo.CRC >> 24) & 0xff
1537                if h[11] != check_byte:
1538                    raise RuntimeError("Bad password for file %r" % name)
1539
1540            return ZipExtFile(zef_file, mode, zinfo, zd, True)
1541        except:
1542            zef_file.close()
1543            raise
1544
1545    def _open_to_write(self, zinfo, force_zip64=False):
1546        if force_zip64 and not self._allowZip64:
1547            raise ValueError(
1548                "force_zip64 is True, but allowZip64 was False when opening "
1549                "the ZIP file."
1550            )
1551        if self._writing:
1552            raise ValueError("Can't write to the ZIP file while there is "
1553                             "another write handle open on it. "
1554                             "Close the first handle before opening another.")
1555
1556        # Sizes and CRC are overwritten with correct data after processing the file
1557        if not hasattr(zinfo, 'file_size'):
1558            zinfo.file_size = 0
1559        zinfo.compress_size = 0
1560        zinfo.CRC = 0
1561
1562        zinfo.flag_bits = 0x00
1563        if zinfo.compress_type == ZIP_LZMA:
1564            # Compressed data includes an end-of-stream (EOS) marker
1565            zinfo.flag_bits |= 0x02
1566        if not self._seekable:
1567            zinfo.flag_bits |= 0x08
1568
1569        if not zinfo.external_attr:
1570            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1571
1572        # Compressed size can be larger than uncompressed size
1573        zip64 = self._allowZip64 and \
1574                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1575
1576        if self._seekable:
1577            self.fp.seek(self.start_dir)
1578        zinfo.header_offset = self.fp.tell()
1579
1580        self._writecheck(zinfo)
1581        self._didModify = True
1582
1583        self.fp.write(zinfo.FileHeader(zip64))
1584
1585        self._writing = True
1586        return _ZipWriteFile(self, zinfo, zip64)
1587
1588    def extract(self, member, path=None, pwd=None):
1589        """Extract a member from the archive to the current working directory,
1590           using its full name. Its file information is extracted as accurately
1591           as possible. `member' may be a filename or a ZipInfo object. You can
1592           specify a different directory using `path'.
1593        """
1594        if path is None:
1595            path = os.getcwd()
1596        else:
1597            path = os.fspath(path)
1598
1599        return self._extract_member(member, path, pwd)
1600
1601    def extractall(self, path=None, members=None, pwd=None):
1602        """Extract all members from the archive to the current working
1603           directory. `path' specifies a different directory to extract to.
1604           `members' is optional and must be a subset of the list returned
1605           by namelist().
1606        """
1607        if members is None:
1608            members = self.namelist()
1609
1610        if path is None:
1611            path = os.getcwd()
1612        else:
1613            path = os.fspath(path)
1614
1615        for zipinfo in members:
1616            self._extract_member(zipinfo, path, pwd)
1617
1618    @classmethod
1619    def _sanitize_windows_name(cls, arcname, pathsep):
1620        """Replace bad characters and remove trailing dots from parts."""
1621        table = cls._windows_illegal_name_trans_table
1622        if not table:
1623            illegal = ':<>|"?*'
1624            table = str.maketrans(illegal, '_' * len(illegal))
1625            cls._windows_illegal_name_trans_table = table
1626        arcname = arcname.translate(table)
1627        # remove trailing dots
1628        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1629        # rejoin, removing empty parts.
1630        arcname = pathsep.join(x for x in arcname if x)
1631        return arcname
1632
1633    def _extract_member(self, member, targetpath, pwd):
1634        """Extract the ZipInfo object 'member' to a physical
1635           file on the path targetpath.
1636        """
1637        if not isinstance(member, ZipInfo):
1638            member = self.getinfo(member)
1639
1640        # build the destination pathname, replacing
1641        # forward slashes to platform specific separators.
1642        arcname = member.filename.replace('/', os.path.sep)
1643
1644        if os.path.altsep:
1645            arcname = arcname.replace(os.path.altsep, os.path.sep)
1646        # interpret absolute pathname as relative, remove drive letter or
1647        # UNC path, redundant separators, "." and ".." components.
1648        arcname = os.path.splitdrive(arcname)[1]
1649        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1650        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1651                                   if x not in invalid_path_parts)
1652        if os.path.sep == '\\':
1653            # filter illegal characters on Windows
1654            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1655
1656        targetpath = os.path.join(targetpath, arcname)
1657        targetpath = os.path.normpath(targetpath)
1658
1659        # Create all upper directories if necessary.
1660        upperdirs = os.path.dirname(targetpath)
1661        if upperdirs and not os.path.exists(upperdirs):
1662            os.makedirs(upperdirs)
1663
1664        if member.is_dir():
1665            if not os.path.isdir(targetpath):
1666                os.mkdir(targetpath)
1667            return targetpath
1668
1669        with self.open(member, pwd=pwd) as source, \
1670             open(targetpath, "wb") as target:
1671            shutil.copyfileobj(source, target)
1672
1673        return targetpath
1674
1675    def _writecheck(self, zinfo):
1676        """Check for errors before writing a file to the archive."""
1677        if zinfo.filename in self.NameToInfo:
1678            import warnings
1679            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1680        if self.mode not in ('w', 'x', 'a'):
1681            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1682        if not self.fp:
1683            raise ValueError(
1684                "Attempt to write ZIP archive that was already closed")
1685        _check_compression(zinfo.compress_type)
1686        if not self._allowZip64:
1687            requires_zip64 = None
1688            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1689                requires_zip64 = "Files count"
1690            elif zinfo.file_size > ZIP64_LIMIT:
1691                requires_zip64 = "Filesize"
1692            elif zinfo.header_offset > ZIP64_LIMIT:
1693                requires_zip64 = "Zipfile size"
1694            if requires_zip64:
1695                raise LargeZipFile(requires_zip64 +
1696                                   " would require ZIP64 extensions")
1697
1698    def write(self, filename, arcname=None,
1699              compress_type=None, compresslevel=None):
1700        """Put the bytes from filename into the archive under the name
1701        arcname."""
1702        if not self.fp:
1703            raise ValueError(
1704                "Attempt to write to ZIP archive that was already closed")
1705        if self._writing:
1706            raise ValueError(
1707                "Can't write to ZIP archive while an open writing handle exists"
1708            )
1709
1710        zinfo = ZipInfo.from_file(filename, arcname)
1711
1712        if zinfo.is_dir():
1713            zinfo.compress_size = 0
1714            zinfo.CRC = 0
1715        else:
1716            if compress_type is not None:
1717                zinfo.compress_type = compress_type
1718            else:
1719                zinfo.compress_type = self.compression
1720
1721            if compresslevel is not None:
1722                zinfo._compresslevel = compresslevel
1723            else:
1724                zinfo._compresslevel = self.compresslevel
1725
1726        if zinfo.is_dir():
1727            with self._lock:
1728                if self._seekable:
1729                    self.fp.seek(self.start_dir)
1730                zinfo.header_offset = self.fp.tell()  # Start of header bytes
1731                if zinfo.compress_type == ZIP_LZMA:
1732                # Compressed data includes an end-of-stream (EOS) marker
1733                    zinfo.flag_bits |= 0x02
1734
1735                self._writecheck(zinfo)
1736                self._didModify = True
1737
1738                self.filelist.append(zinfo)
1739                self.NameToInfo[zinfo.filename] = zinfo
1740                self.fp.write(zinfo.FileHeader(False))
1741                self.start_dir = self.fp.tell()
1742        else:
1743            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1744                shutil.copyfileobj(src, dest, 1024*8)
1745
1746    def writestr(self, zinfo_or_arcname, data,
1747                 compress_type=None, compresslevel=None):
1748        """Write a file into the archive.  The contents is 'data', which
1749        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1750        it is encoded as UTF-8 first.
1751        'zinfo_or_arcname' is either a ZipInfo instance or
1752        the name of the file in the archive."""
1753        if isinstance(data, str):
1754            data = data.encode("utf-8")
1755        if not isinstance(zinfo_or_arcname, ZipInfo):
1756            zinfo = ZipInfo(filename=zinfo_or_arcname,
1757                            date_time=time.localtime(time.time())[:6])
1758            zinfo.compress_type = self.compression
1759            zinfo._compresslevel = self.compresslevel
1760            if zinfo.filename[-1] == '/':
1761                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1762                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1763            else:
1764                zinfo.external_attr = 0o600 << 16     # ?rw-------
1765        else:
1766            zinfo = zinfo_or_arcname
1767
1768        if not self.fp:
1769            raise ValueError(
1770                "Attempt to write to ZIP archive that was already closed")
1771        if self._writing:
1772            raise ValueError(
1773                "Can't write to ZIP archive while an open writing handle exists."
1774            )
1775
1776        if compress_type is not None:
1777            zinfo.compress_type = compress_type
1778
1779        if compresslevel is not None:
1780            zinfo._compresslevel = compresslevel
1781
1782        zinfo.file_size = len(data)            # Uncompressed size
1783        with self._lock:
1784            with self.open(zinfo, mode='w') as dest:
1785                dest.write(data)
1786
1787    def __del__(self):
1788        """Call the "close()" method in case the user forgot."""
1789        self.close()
1790
1791    def close(self):
1792        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1793        records."""
1794        if self.fp is None:
1795            return
1796
1797        if self._writing:
1798            raise ValueError("Can't close the ZIP file while there is "
1799                             "an open writing handle on it. "
1800                             "Close the writing handle before closing the zip.")
1801
1802        try:
1803            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1804                with self._lock:
1805                    if self._seekable:
1806                        self.fp.seek(self.start_dir)
1807                    self._write_end_record()
1808        finally:
1809            fp = self.fp
1810            self.fp = None
1811            self._fpclose(fp)
1812
1813    def _write_end_record(self):
1814        for zinfo in self.filelist:         # write central directory
1815            dt = zinfo.date_time
1816            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1817            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1818            extra = []
1819            if zinfo.file_size > ZIP64_LIMIT \
1820               or zinfo.compress_size > ZIP64_LIMIT:
1821                extra.append(zinfo.file_size)
1822                extra.append(zinfo.compress_size)
1823                file_size = 0xffffffff
1824                compress_size = 0xffffffff
1825            else:
1826                file_size = zinfo.file_size
1827                compress_size = zinfo.compress_size
1828
1829            if zinfo.header_offset > ZIP64_LIMIT:
1830                extra.append(zinfo.header_offset)
1831                header_offset = 0xffffffff
1832            else:
1833                header_offset = zinfo.header_offset
1834
1835            extra_data = zinfo.extra
1836            min_version = 0
1837            if extra:
1838                # Append a ZIP64 field to the extra's
1839                extra_data = _strip_extra(extra_data, (1,))
1840                extra_data = struct.pack(
1841                    '<HH' + 'Q'*len(extra),
1842                    1, 8*len(extra), *extra) + extra_data
1843
1844                min_version = ZIP64_VERSION
1845
1846            if zinfo.compress_type == ZIP_BZIP2:
1847                min_version = max(BZIP2_VERSION, min_version)
1848            elif zinfo.compress_type == ZIP_LZMA:
1849                min_version = max(LZMA_VERSION, min_version)
1850
1851            extract_version = max(min_version, zinfo.extract_version)
1852            create_version = max(min_version, zinfo.create_version)
1853            try:
1854                filename, flag_bits = zinfo._encodeFilenameFlags()
1855                centdir = struct.pack(structCentralDir,
1856                                      stringCentralDir, create_version,
1857                                      zinfo.create_system, extract_version, zinfo.reserved,
1858                                      flag_bits, zinfo.compress_type, dostime, dosdate,
1859                                      zinfo.CRC, compress_size, file_size,
1860                                      len(filename), len(extra_data), len(zinfo.comment),
1861                                      0, zinfo.internal_attr, zinfo.external_attr,
1862                                      header_offset)
1863            except DeprecationWarning:
1864                print((structCentralDir, stringCentralDir, create_version,
1865                       zinfo.create_system, extract_version, zinfo.reserved,
1866                       zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1867                       zinfo.CRC, compress_size, file_size,
1868                       len(zinfo.filename), len(extra_data), len(zinfo.comment),
1869                       0, zinfo.internal_attr, zinfo.external_attr,
1870                       header_offset), file=sys.stderr)
1871                raise
1872            self.fp.write(centdir)
1873            self.fp.write(filename)
1874            self.fp.write(extra_data)
1875            self.fp.write(zinfo.comment)
1876
1877        pos2 = self.fp.tell()
1878        # Write end-of-zip-archive record
1879        centDirCount = len(self.filelist)
1880        centDirSize = pos2 - self.start_dir
1881        centDirOffset = self.start_dir
1882        requires_zip64 = None
1883        if centDirCount > ZIP_FILECOUNT_LIMIT:
1884            requires_zip64 = "Files count"
1885        elif centDirOffset > ZIP64_LIMIT:
1886            requires_zip64 = "Central directory offset"
1887        elif centDirSize > ZIP64_LIMIT:
1888            requires_zip64 = "Central directory size"
1889        if requires_zip64:
1890            # Need to write the ZIP64 end-of-archive records
1891            if not self._allowZip64:
1892                raise LargeZipFile(requires_zip64 +
1893                                   " would require ZIP64 extensions")
1894            zip64endrec = struct.pack(
1895                structEndArchive64, stringEndArchive64,
1896                44, 45, 45, 0, 0, centDirCount, centDirCount,
1897                centDirSize, centDirOffset)
1898            self.fp.write(zip64endrec)
1899
1900            zip64locrec = struct.pack(
1901                structEndArchive64Locator,
1902                stringEndArchive64Locator, 0, pos2, 1)
1903            self.fp.write(zip64locrec)
1904            centDirCount = min(centDirCount, 0xFFFF)
1905            centDirSize = min(centDirSize, 0xFFFFFFFF)
1906            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1907
1908        endrec = struct.pack(structEndArchive, stringEndArchive,
1909                             0, 0, centDirCount, centDirCount,
1910                             centDirSize, centDirOffset, len(self._comment))
1911        self.fp.write(endrec)
1912        self.fp.write(self._comment)
1913        self.fp.flush()
1914
1915    def _fpclose(self, fp):
1916        assert self._fileRefCnt > 0
1917        self._fileRefCnt -= 1
1918        if not self._fileRefCnt and not self._filePassed:
1919            fp.close()
1920
1921
1922class PyZipFile(ZipFile):
1923    """Class to create ZIP archives with Python library files and packages."""
1924
1925    def __init__(self, file, mode="r", compression=ZIP_STORED,
1926                 allowZip64=True, optimize=-1):
1927        ZipFile.__init__(self, file, mode=mode, compression=compression,
1928                         allowZip64=allowZip64)
1929        self._optimize = optimize
1930
1931    def writepy(self, pathname, basename="", filterfunc=None):
1932        """Add all files from "pathname" to the ZIP archive.
1933
1934        If pathname is a package directory, search the directory and
1935        all package subdirectories recursively for all *.py and enter
1936        the modules into the archive.  If pathname is a plain
1937        directory, listdir *.py and enter all modules.  Else, pathname
1938        must be a Python *.py file and the module will be put into the
1939        archive.  Added modules are always module.pyc.
1940        This method will compile the module.py into module.pyc if
1941        necessary.
1942        If filterfunc(pathname) is given, it is called with every argument.
1943        When it is False, the file or directory is skipped.
1944        """
1945        pathname = os.fspath(pathname)
1946        if filterfunc and not filterfunc(pathname):
1947            if self.debug:
1948                label = 'path' if os.path.isdir(pathname) else 'file'
1949                print('%s %r skipped by filterfunc' % (label, pathname))
1950            return
1951        dir, name = os.path.split(pathname)
1952        if os.path.isdir(pathname):
1953            initname = os.path.join(pathname, "__init__.py")
1954            if os.path.isfile(initname):
1955                # This is a package directory, add it
1956                if basename:
1957                    basename = "%s/%s" % (basename, name)
1958                else:
1959                    basename = name
1960                if self.debug:
1961                    print("Adding package in", pathname, "as", basename)
1962                fname, arcname = self._get_codename(initname[0:-3], basename)
1963                if self.debug:
1964                    print("Adding", arcname)
1965                self.write(fname, arcname)
1966                dirlist = sorted(os.listdir(pathname))
1967                dirlist.remove("__init__.py")
1968                # Add all *.py files and package subdirectories
1969                for filename in dirlist:
1970                    path = os.path.join(pathname, filename)
1971                    root, ext = os.path.splitext(filename)
1972                    if os.path.isdir(path):
1973                        if os.path.isfile(os.path.join(path, "__init__.py")):
1974                            # This is a package directory, add it
1975                            self.writepy(path, basename,
1976                                         filterfunc=filterfunc)  # Recursive call
1977                    elif ext == ".py":
1978                        if filterfunc and not filterfunc(path):
1979                            if self.debug:
1980                                print('file %r skipped by filterfunc' % path)
1981                            continue
1982                        fname, arcname = self._get_codename(path[0:-3],
1983                                                            basename)
1984                        if self.debug:
1985                            print("Adding", arcname)
1986                        self.write(fname, arcname)
1987            else:
1988                # This is NOT a package directory, add its files at top level
1989                if self.debug:
1990                    print("Adding files from directory", pathname)
1991                for filename in sorted(os.listdir(pathname)):
1992                    path = os.path.join(pathname, filename)
1993                    root, ext = os.path.splitext(filename)
1994                    if ext == ".py":
1995                        if filterfunc and not filterfunc(path):
1996                            if self.debug:
1997                                print('file %r skipped by filterfunc' % path)
1998                            continue
1999                        fname, arcname = self._get_codename(path[0:-3],
2000                                                            basename)
2001                        if self.debug:
2002                            print("Adding", arcname)
2003                        self.write(fname, arcname)
2004        else:
2005            if pathname[-3:] != ".py":
2006                raise RuntimeError(
2007                    'Files added with writepy() must end with ".py"')
2008            fname, arcname = self._get_codename(pathname[0:-3], basename)
2009            if self.debug:
2010                print("Adding file", arcname)
2011            self.write(fname, arcname)
2012
2013    def _get_codename(self, pathname, basename):
2014        """Return (filename, archivename) for the path.
2015
2016        Given a module name path, return the correct file path and
2017        archive name, compiling if necessary.  For example, given
2018        /python/lib/string, return (/python/lib/string.pyc, string).
2019        """
2020        def _compile(file, optimize=-1):
2021            import py_compile
2022            if self.debug:
2023                print("Compiling", file)
2024            try:
2025                py_compile.compile(file, doraise=True, optimize=optimize)
2026            except py_compile.PyCompileError as err:
2027                print(err.msg)
2028                return False
2029            return True
2030
2031        file_py  = pathname + ".py"
2032        file_pyc = pathname + ".pyc"
2033        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2034        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2035        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2036        if self._optimize == -1:
2037            # legacy mode: use whatever file is present
2038            if (os.path.isfile(file_pyc) and
2039                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2040                # Use .pyc file.
2041                arcname = fname = file_pyc
2042            elif (os.path.isfile(pycache_opt0) and
2043                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2044                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2045                # file name in the archive.
2046                fname = pycache_opt0
2047                arcname = file_pyc
2048            elif (os.path.isfile(pycache_opt1) and
2049                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2050                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2051                # file name in the archive.
2052                fname = pycache_opt1
2053                arcname = file_pyc
2054            elif (os.path.isfile(pycache_opt2) and
2055                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2056                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2057                # file name in the archive.
2058                fname = pycache_opt2
2059                arcname = file_pyc
2060            else:
2061                # Compile py into PEP 3147 pyc file.
2062                if _compile(file_py):
2063                    if sys.flags.optimize == 0:
2064                        fname = pycache_opt0
2065                    elif sys.flags.optimize == 1:
2066                        fname = pycache_opt1
2067                    else:
2068                        fname = pycache_opt2
2069                    arcname = file_pyc
2070                else:
2071                    fname = arcname = file_py
2072        else:
2073            # new mode: use given optimization level
2074            if self._optimize == 0:
2075                fname = pycache_opt0
2076                arcname = file_pyc
2077            else:
2078                arcname = file_pyc
2079                if self._optimize == 1:
2080                    fname = pycache_opt1
2081                elif self._optimize == 2:
2082                    fname = pycache_opt2
2083                else:
2084                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2085                    raise ValueError(msg)
2086            if not (os.path.isfile(fname) and
2087                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2088                if not _compile(file_py, optimize=self._optimize):
2089                    fname = arcname = file_py
2090        archivename = os.path.split(arcname)[1]
2091        if basename:
2092            archivename = "%s/%s" % (basename, archivename)
2093        return (fname, archivename)
2094
2095
2096def main(args=None):
2097    import argparse
2098
2099    description = 'A simple command-line interface for zipfile module.'
2100    parser = argparse.ArgumentParser(description=description)
2101    group = parser.add_mutually_exclusive_group(required=True)
2102    group.add_argument('-l', '--list', metavar='<zipfile>',
2103                       help='Show listing of a zipfile')
2104    group.add_argument('-e', '--extract', nargs=2,
2105                       metavar=('<zipfile>', '<output_dir>'),
2106                       help='Extract zipfile into target dir')
2107    group.add_argument('-c', '--create', nargs='+',
2108                       metavar=('<name>', '<file>'),
2109                       help='Create zipfile from sources')
2110    group.add_argument('-t', '--test', metavar='<zipfile>',
2111                       help='Test if a zipfile is valid')
2112    args = parser.parse_args(args)
2113
2114    if args.test is not None:
2115        src = args.test
2116        with ZipFile(src, 'r') as zf:
2117            badfile = zf.testzip()
2118        if badfile:
2119            print("The following enclosed file is corrupted: {!r}".format(badfile))
2120        print("Done testing")
2121
2122    elif args.list is not None:
2123        src = args.list
2124        with ZipFile(src, 'r') as zf:
2125            zf.printdir()
2126
2127    elif args.extract is not None:
2128        src, curdir = args.extract
2129        with ZipFile(src, 'r') as zf:
2130            zf.extractall(curdir)
2131
2132    elif args.create is not None:
2133        zip_name = args.create.pop(0)
2134        files = args.create
2135
2136        def addToZip(zf, path, zippath):
2137            if os.path.isfile(path):
2138                zf.write(path, zippath, ZIP_DEFLATED)
2139            elif os.path.isdir(path):
2140                if zippath:
2141                    zf.write(path, zippath)
2142                for nm in sorted(os.listdir(path)):
2143                    addToZip(zf,
2144                             os.path.join(path, nm), os.path.join(zippath, nm))
2145            # else: ignore
2146
2147        with ZipFile(zip_name, 'w') as zf:
2148            for path in files:
2149                zippath = os.path.basename(path)
2150                if not zippath:
2151                    zippath = os.path.basename(os.path.dirname(path))
2152                if zippath in ('', os.curdir, os.pardir):
2153                    zippath = ''
2154                addToZip(zf, path, zippath)
2155
2156if __name__ == "__main__":
2157    main()
2158