1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import io 7import os 8import importlib.util 9import sys 10import time 11import stat 12import shutil 13import struct 14import binascii 15import threading 16 17try: 18 import zlib # We may need its compression method 19 crc32 = zlib.crc32 20except ImportError: 21 zlib = None 22 crc32 = binascii.crc32 23 24try: 25 import bz2 # We may need its compression method 26except ImportError: 27 bz2 = None 28 29try: 30 import lzma # We may need its compression method 31except ImportError: 32 lzma = None 33 34__all__ = ["BadZipFile", "BadZipfile", "error", 35 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 36 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"] 37 38class BadZipFile(Exception): 39 pass 40 41 42class LargeZipFile(Exception): 43 """ 44 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 45 and those extensions are disabled. 46 """ 47 48error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 49 50 51ZIP64_LIMIT = (1 << 31) - 1 52ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 53ZIP_MAX_COMMENT = (1 << 16) - 1 54 55# constants for Zip file compression methods 56ZIP_STORED = 0 57ZIP_DEFLATED = 8 58ZIP_BZIP2 = 12 59ZIP_LZMA = 14 60# Other ZIP compression methods not supported 61 62DEFAULT_VERSION = 20 63ZIP64_VERSION = 45 64BZIP2_VERSION = 46 65LZMA_VERSION = 63 66# we recognize (but not necessarily support) all features up to that version 67MAX_EXTRACT_VERSION = 63 68 69# Below are some formats and associated data for reading/writing headers using 70# the struct module. The names and structures of headers/records are those used 71# in the PKWARE description of the ZIP file format: 72# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 73# (URL valid as of January 2008) 74 75# The "end of central directory" structure, magic number, size, and indices 76# (section V.I in the format document) 77structEndArchive = b"<4s4H2LH" 78stringEndArchive = b"PK\005\006" 79sizeEndCentDir = struct.calcsize(structEndArchive) 80 81_ECD_SIGNATURE = 0 82_ECD_DISK_NUMBER = 1 83_ECD_DISK_START = 2 84_ECD_ENTRIES_THIS_DISK = 3 85_ECD_ENTRIES_TOTAL = 4 86_ECD_SIZE = 5 87_ECD_OFFSET = 6 88_ECD_COMMENT_SIZE = 7 89# These last two indices are not part of the structure as defined in the 90# spec, but they are used internally by this module as a convenience 91_ECD_COMMENT = 8 92_ECD_LOCATION = 9 93 94# The "central directory" structure, magic number, size, and indices 95# of entries in the structure (section V.F in the format document) 96structCentralDir = "<4s4B4HL2L5H2L" 97stringCentralDir = b"PK\001\002" 98sizeCentralDir = struct.calcsize(structCentralDir) 99 100# indexes of entries in the central directory structure 101_CD_SIGNATURE = 0 102_CD_CREATE_VERSION = 1 103_CD_CREATE_SYSTEM = 2 104_CD_EXTRACT_VERSION = 3 105_CD_EXTRACT_SYSTEM = 4 106_CD_FLAG_BITS = 5 107_CD_COMPRESS_TYPE = 6 108_CD_TIME = 7 109_CD_DATE = 8 110_CD_CRC = 9 111_CD_COMPRESSED_SIZE = 10 112_CD_UNCOMPRESSED_SIZE = 11 113_CD_FILENAME_LENGTH = 12 114_CD_EXTRA_FIELD_LENGTH = 13 115_CD_COMMENT_LENGTH = 14 116_CD_DISK_NUMBER_START = 15 117_CD_INTERNAL_FILE_ATTRIBUTES = 16 118_CD_EXTERNAL_FILE_ATTRIBUTES = 17 119_CD_LOCAL_HEADER_OFFSET = 18 120 121# The "local file header" structure, magic number, size, and indices 122# (section V.A in the format document) 123structFileHeader = "<4s2B4HL2L2H" 124stringFileHeader = b"PK\003\004" 125sizeFileHeader = struct.calcsize(structFileHeader) 126 127_FH_SIGNATURE = 0 128_FH_EXTRACT_VERSION = 1 129_FH_EXTRACT_SYSTEM = 2 130_FH_GENERAL_PURPOSE_FLAG_BITS = 3 131_FH_COMPRESSION_METHOD = 4 132_FH_LAST_MOD_TIME = 5 133_FH_LAST_MOD_DATE = 6 134_FH_CRC = 7 135_FH_COMPRESSED_SIZE = 8 136_FH_UNCOMPRESSED_SIZE = 9 137_FH_FILENAME_LENGTH = 10 138_FH_EXTRA_FIELD_LENGTH = 11 139 140# The "Zip64 end of central directory locator" structure, magic number, and size 141structEndArchive64Locator = "<4sLQL" 142stringEndArchive64Locator = b"PK\x06\x07" 143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 144 145# The "Zip64 end of central directory" record, magic number, size, and indices 146# (section V.G in the format document) 147structEndArchive64 = "<4sQ2H2L4Q" 148stringEndArchive64 = b"PK\x06\x06" 149sizeEndCentDir64 = struct.calcsize(structEndArchive64) 150 151_CD64_SIGNATURE = 0 152_CD64_DIRECTORY_RECSIZE = 1 153_CD64_CREATE_VERSION = 2 154_CD64_EXTRACT_VERSION = 3 155_CD64_DISK_NUMBER = 4 156_CD64_DISK_NUMBER_START = 5 157_CD64_NUMBER_ENTRIES_THIS_DISK = 6 158_CD64_NUMBER_ENTRIES_TOTAL = 7 159_CD64_DIRECTORY_SIZE = 8 160_CD64_OFFSET_START_CENTDIR = 9 161 162_DD_SIGNATURE = 0x08074b50 163 164_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 165 166def _strip_extra(extra, xids): 167 # Remove Extra Fields with specified IDs. 168 unpack = _EXTRA_FIELD_STRUCT.unpack 169 modified = False 170 buffer = [] 171 start = i = 0 172 while i + 4 <= len(extra): 173 xid, xlen = unpack(extra[i : i + 4]) 174 j = i + 4 + xlen 175 if xid in xids: 176 if i != start: 177 buffer.append(extra[start : i]) 178 start = j 179 modified = True 180 i = j 181 if not modified: 182 return extra 183 return b''.join(buffer) 184 185def _check_zipfile(fp): 186 try: 187 if _EndRecData(fp): 188 return True # file has correct magic number 189 except OSError: 190 pass 191 return False 192 193def is_zipfile(filename): 194 """Quickly see if a file is a ZIP file by checking the magic number. 195 196 The filename argument may be a file or file-like object too. 197 """ 198 result = False 199 try: 200 if hasattr(filename, "read"): 201 result = _check_zipfile(fp=filename) 202 else: 203 with open(filename, "rb") as fp: 204 result = _check_zipfile(fp) 205 except OSError: 206 pass 207 return result 208 209def _EndRecData64(fpin, offset, endrec): 210 """ 211 Read the ZIP64 end-of-archive records and use that to update endrec 212 """ 213 try: 214 fpin.seek(offset - sizeEndCentDir64Locator, 2) 215 except OSError: 216 # If the seek fails, the file is not large enough to contain a ZIP64 217 # end-of-archive record, so just return the end record we were given. 218 return endrec 219 220 data = fpin.read(sizeEndCentDir64Locator) 221 if len(data) != sizeEndCentDir64Locator: 222 return endrec 223 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 224 if sig != stringEndArchive64Locator: 225 return endrec 226 227 if diskno != 0 or disks != 1: 228 raise BadZipFile("zipfiles that span multiple disks are not supported") 229 230 # Assume no 'zip64 extensible data' 231 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 232 data = fpin.read(sizeEndCentDir64) 233 if len(data) != sizeEndCentDir64: 234 return endrec 235 sig, sz, create_version, read_version, disk_num, disk_dir, \ 236 dircount, dircount2, dirsize, diroffset = \ 237 struct.unpack(structEndArchive64, data) 238 if sig != stringEndArchive64: 239 return endrec 240 241 # Update the original endrec using data from the ZIP64 record 242 endrec[_ECD_SIGNATURE] = sig 243 endrec[_ECD_DISK_NUMBER] = disk_num 244 endrec[_ECD_DISK_START] = disk_dir 245 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 246 endrec[_ECD_ENTRIES_TOTAL] = dircount2 247 endrec[_ECD_SIZE] = dirsize 248 endrec[_ECD_OFFSET] = diroffset 249 return endrec 250 251 252def _EndRecData(fpin): 253 """Return data from the "End of Central Directory" record, or None. 254 255 The data is a list of the nine items in the ZIP "End of central dir" 256 record followed by a tenth item, the file seek offset of this record.""" 257 258 # Determine file size 259 fpin.seek(0, 2) 260 filesize = fpin.tell() 261 262 # Check to see if this is ZIP file with no archive comment (the 263 # "end of central directory" structure should be the last item in the 264 # file if this is the case). 265 try: 266 fpin.seek(-sizeEndCentDir, 2) 267 except OSError: 268 return None 269 data = fpin.read() 270 if (len(data) == sizeEndCentDir and 271 data[0:4] == stringEndArchive and 272 data[-2:] == b"\000\000"): 273 # the signature is correct and there's no comment, unpack structure 274 endrec = struct.unpack(structEndArchive, data) 275 endrec=list(endrec) 276 277 # Append a blank comment and record start offset 278 endrec.append(b"") 279 endrec.append(filesize - sizeEndCentDir) 280 281 # Try to read the "Zip64 end of central directory" structure 282 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 283 284 # Either this is not a ZIP file, or it is a ZIP file with an archive 285 # comment. Search the end of the file for the "end of central directory" 286 # record signature. The comment is the last item in the ZIP file and may be 287 # up to 64K long. It is assumed that the "end of central directory" magic 288 # number does not appear in the comment. 289 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 290 fpin.seek(maxCommentStart, 0) 291 data = fpin.read() 292 start = data.rfind(stringEndArchive) 293 if start >= 0: 294 # found the magic number; attempt to unpack and interpret 295 recData = data[start:start+sizeEndCentDir] 296 if len(recData) != sizeEndCentDir: 297 # Zip file is corrupted. 298 return None 299 endrec = list(struct.unpack(structEndArchive, recData)) 300 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 301 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 302 endrec.append(comment) 303 endrec.append(maxCommentStart + start) 304 305 # Try to read the "Zip64 end of central directory" structure 306 return _EndRecData64(fpin, maxCommentStart + start - filesize, 307 endrec) 308 309 # Unable to find a valid end of central directory structure 310 return None 311 312 313class ZipInfo (object): 314 """Class with attributes describing each file in the ZIP archive.""" 315 316 __slots__ = ( 317 'orig_filename', 318 'filename', 319 'date_time', 320 'compress_type', 321 '_compresslevel', 322 'comment', 323 'extra', 324 'create_system', 325 'create_version', 326 'extract_version', 327 'reserved', 328 'flag_bits', 329 'volume', 330 'internal_attr', 331 'external_attr', 332 'header_offset', 333 'CRC', 334 'compress_size', 335 'file_size', 336 '_raw_time', 337 ) 338 339 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 340 self.orig_filename = filename # Original file name in archive 341 342 # Terminate the file name at the first null byte. Null bytes in file 343 # names are used as tricks by viruses in archives. 344 null_byte = filename.find(chr(0)) 345 if null_byte >= 0: 346 filename = filename[0:null_byte] 347 # This is used to ensure paths in generated ZIP files always use 348 # forward slashes as the directory separator, as required by the 349 # ZIP format specification. 350 if os.sep != "/" and os.sep in filename: 351 filename = filename.replace(os.sep, "/") 352 353 self.filename = filename # Normalized file name 354 self.date_time = date_time # year, month, day, hour, min, sec 355 356 if date_time[0] < 1980: 357 raise ValueError('ZIP does not support timestamps before 1980') 358 359 # Standard values: 360 self.compress_type = ZIP_STORED # Type of compression for the file 361 self._compresslevel = None # Level for the compressor 362 self.comment = b"" # Comment for each file 363 self.extra = b"" # ZIP extra data 364 if sys.platform == 'win32': 365 self.create_system = 0 # System which created ZIP archive 366 else: 367 # Assume everything else is unix-y 368 self.create_system = 3 # System which created ZIP archive 369 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 370 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 371 self.reserved = 0 # Must be zero 372 self.flag_bits = 0 # ZIP flag bits 373 self.volume = 0 # Volume number of file header 374 self.internal_attr = 0 # Internal attributes 375 self.external_attr = 0 # External file attributes 376 # Other attributes are set by class ZipFile: 377 # header_offset Byte offset to the file header 378 # CRC CRC-32 of the uncompressed file 379 # compress_size Size of the compressed file 380 # file_size Size of the uncompressed file 381 382 def __repr__(self): 383 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 384 if self.compress_type != ZIP_STORED: 385 result.append(' compress_type=%s' % 386 compressor_names.get(self.compress_type, 387 self.compress_type)) 388 hi = self.external_attr >> 16 389 lo = self.external_attr & 0xFFFF 390 if hi: 391 result.append(' filemode=%r' % stat.filemode(hi)) 392 if lo: 393 result.append(' external_attr=%#x' % lo) 394 isdir = self.is_dir() 395 if not isdir or self.file_size: 396 result.append(' file_size=%r' % self.file_size) 397 if ((not isdir or self.compress_size) and 398 (self.compress_type != ZIP_STORED or 399 self.file_size != self.compress_size)): 400 result.append(' compress_size=%r' % self.compress_size) 401 result.append('>') 402 return ''.join(result) 403 404 def FileHeader(self, zip64=None): 405 """Return the per-file header as a bytes object.""" 406 dt = self.date_time 407 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 408 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 409 if self.flag_bits & 0x08: 410 # Set these to zero because we write them after the file data 411 CRC = compress_size = file_size = 0 412 else: 413 CRC = self.CRC 414 compress_size = self.compress_size 415 file_size = self.file_size 416 417 extra = self.extra 418 419 min_version = 0 420 if zip64 is None: 421 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 422 if zip64: 423 fmt = '<HHQQ' 424 extra = extra + struct.pack(fmt, 425 1, struct.calcsize(fmt)-4, file_size, compress_size) 426 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 427 if not zip64: 428 raise LargeZipFile("Filesize would require ZIP64 extensions") 429 # File is larger than what fits into a 4 byte integer, 430 # fall back to the ZIP64 extension 431 file_size = 0xffffffff 432 compress_size = 0xffffffff 433 min_version = ZIP64_VERSION 434 435 if self.compress_type == ZIP_BZIP2: 436 min_version = max(BZIP2_VERSION, min_version) 437 elif self.compress_type == ZIP_LZMA: 438 min_version = max(LZMA_VERSION, min_version) 439 440 self.extract_version = max(min_version, self.extract_version) 441 self.create_version = max(min_version, self.create_version) 442 filename, flag_bits = self._encodeFilenameFlags() 443 header = struct.pack(structFileHeader, stringFileHeader, 444 self.extract_version, self.reserved, flag_bits, 445 self.compress_type, dostime, dosdate, CRC, 446 compress_size, file_size, 447 len(filename), len(extra)) 448 return header + filename + extra 449 450 def _encodeFilenameFlags(self): 451 try: 452 return self.filename.encode('ascii'), self.flag_bits 453 except UnicodeEncodeError: 454 return self.filename.encode('utf-8'), self.flag_bits | 0x800 455 456 def _decodeExtra(self): 457 # Try to decode the extra field. 458 extra = self.extra 459 unpack = struct.unpack 460 while len(extra) >= 4: 461 tp, ln = unpack('<HH', extra[:4]) 462 if ln+4 > len(extra): 463 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 464 if tp == 0x0001: 465 if ln >= 24: 466 counts = unpack('<QQQ', extra[4:28]) 467 elif ln == 16: 468 counts = unpack('<QQ', extra[4:20]) 469 elif ln == 8: 470 counts = unpack('<Q', extra[4:12]) 471 elif ln == 0: 472 counts = () 473 else: 474 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 475 476 idx = 0 477 478 # ZIP64 extension (large files and/or large archives) 479 if self.file_size in (0xffffffffffffffff, 0xffffffff): 480 self.file_size = counts[idx] 481 idx += 1 482 483 if self.compress_size == 0xFFFFFFFF: 484 self.compress_size = counts[idx] 485 idx += 1 486 487 if self.header_offset == 0xffffffff: 488 old = self.header_offset 489 self.header_offset = counts[idx] 490 idx+=1 491 492 extra = extra[ln+4:] 493 494 @classmethod 495 def from_file(cls, filename, arcname=None): 496 """Construct an appropriate ZipInfo for a file on the filesystem. 497 498 filename should be the path to a file or directory on the filesystem. 499 500 arcname is the name which it will have within the archive (by default, 501 this will be the same as filename, but without a drive letter and with 502 leading path separators removed). 503 """ 504 if isinstance(filename, os.PathLike): 505 filename = os.fspath(filename) 506 st = os.stat(filename) 507 isdir = stat.S_ISDIR(st.st_mode) 508 mtime = time.localtime(st.st_mtime) 509 date_time = mtime[0:6] 510 # Create ZipInfo instance to store file information 511 if arcname is None: 512 arcname = filename 513 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 514 while arcname[0] in (os.sep, os.altsep): 515 arcname = arcname[1:] 516 if isdir: 517 arcname += '/' 518 zinfo = cls(arcname, date_time) 519 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 520 if isdir: 521 zinfo.file_size = 0 522 zinfo.external_attr |= 0x10 # MS-DOS directory flag 523 else: 524 zinfo.file_size = st.st_size 525 526 return zinfo 527 528 def is_dir(self): 529 """Return True if this archive member is a directory.""" 530 return self.filename[-1] == '/' 531 532 533# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 534# internal keys. We noticed that a direct implementation is faster than 535# relying on binascii.crc32(). 536 537_crctable = None 538def _gen_crc(crc): 539 for j in range(8): 540 if crc & 1: 541 crc = (crc >> 1) ^ 0xEDB88320 542 else: 543 crc >>= 1 544 return crc 545 546# ZIP supports a password-based form of encryption. Even though known 547# plaintext attacks have been found against it, it is still useful 548# to be able to get data out of such a file. 549# 550# Usage: 551# zd = _ZipDecrypter(mypwd) 552# plain_bytes = zd(cypher_bytes) 553 554def _ZipDecrypter(pwd): 555 key0 = 305419896 556 key1 = 591751049 557 key2 = 878082192 558 559 global _crctable 560 if _crctable is None: 561 _crctable = list(map(_gen_crc, range(256))) 562 crctable = _crctable 563 564 def crc32(ch, crc): 565 """Compute the CRC32 primitive on one byte.""" 566 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 567 568 def update_keys(c): 569 nonlocal key0, key1, key2 570 key0 = crc32(c, key0) 571 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 572 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 573 key2 = crc32(key1 >> 24, key2) 574 575 for p in pwd: 576 update_keys(p) 577 578 def decrypter(data): 579 """Decrypt a bytes object.""" 580 result = bytearray() 581 append = result.append 582 for c in data: 583 k = key2 | 2 584 c ^= ((k * (k^1)) >> 8) & 0xFF 585 update_keys(c) 586 append(c) 587 return bytes(result) 588 589 return decrypter 590 591 592class LZMACompressor: 593 594 def __init__(self): 595 self._comp = None 596 597 def _init(self): 598 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 599 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 600 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 601 ]) 602 return struct.pack('<BBH', 9, 4, len(props)) + props 603 604 def compress(self, data): 605 if self._comp is None: 606 return self._init() + self._comp.compress(data) 607 return self._comp.compress(data) 608 609 def flush(self): 610 if self._comp is None: 611 return self._init() + self._comp.flush() 612 return self._comp.flush() 613 614 615class LZMADecompressor: 616 617 def __init__(self): 618 self._decomp = None 619 self._unconsumed = b'' 620 self.eof = False 621 622 def decompress(self, data): 623 if self._decomp is None: 624 self._unconsumed += data 625 if len(self._unconsumed) <= 4: 626 return b'' 627 psize, = struct.unpack('<H', self._unconsumed[2:4]) 628 if len(self._unconsumed) <= 4 + psize: 629 return b'' 630 631 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 632 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 633 self._unconsumed[4:4 + psize]) 634 ]) 635 data = self._unconsumed[4 + psize:] 636 del self._unconsumed 637 638 result = self._decomp.decompress(data) 639 self.eof = self._decomp.eof 640 return result 641 642 643compressor_names = { 644 0: 'store', 645 1: 'shrink', 646 2: 'reduce', 647 3: 'reduce', 648 4: 'reduce', 649 5: 'reduce', 650 6: 'implode', 651 7: 'tokenize', 652 8: 'deflate', 653 9: 'deflate64', 654 10: 'implode', 655 12: 'bzip2', 656 14: 'lzma', 657 18: 'terse', 658 19: 'lz77', 659 97: 'wavpack', 660 98: 'ppmd', 661} 662 663def _check_compression(compression): 664 if compression == ZIP_STORED: 665 pass 666 elif compression == ZIP_DEFLATED: 667 if not zlib: 668 raise RuntimeError( 669 "Compression requires the (missing) zlib module") 670 elif compression == ZIP_BZIP2: 671 if not bz2: 672 raise RuntimeError( 673 "Compression requires the (missing) bz2 module") 674 elif compression == ZIP_LZMA: 675 if not lzma: 676 raise RuntimeError( 677 "Compression requires the (missing) lzma module") 678 else: 679 raise NotImplementedError("That compression method is not supported") 680 681 682def _get_compressor(compress_type, compresslevel=None): 683 if compress_type == ZIP_DEFLATED: 684 if compresslevel is not None: 685 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 686 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 687 elif compress_type == ZIP_BZIP2: 688 if compresslevel is not None: 689 return bz2.BZ2Compressor(compresslevel) 690 return bz2.BZ2Compressor() 691 # compresslevel is ignored for ZIP_LZMA 692 elif compress_type == ZIP_LZMA: 693 return LZMACompressor() 694 else: 695 return None 696 697 698def _get_decompressor(compress_type): 699 if compress_type == ZIP_STORED: 700 return None 701 elif compress_type == ZIP_DEFLATED: 702 return zlib.decompressobj(-15) 703 elif compress_type == ZIP_BZIP2: 704 return bz2.BZ2Decompressor() 705 elif compress_type == ZIP_LZMA: 706 return LZMADecompressor() 707 else: 708 descr = compressor_names.get(compress_type) 709 if descr: 710 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 711 else: 712 raise NotImplementedError("compression type %d" % (compress_type,)) 713 714 715class _SharedFile: 716 def __init__(self, file, pos, close, lock, writing): 717 self._file = file 718 self._pos = pos 719 self._close = close 720 self._lock = lock 721 self._writing = writing 722 self.seekable = file.seekable 723 self.tell = file.tell 724 725 def seek(self, offset, whence=0): 726 with self._lock: 727 if self._writing(): 728 raise ValueError("Can't reposition in the ZIP file while " 729 "there is an open writing handle on it. " 730 "Close the writing handle before trying to read.") 731 self._file.seek(offset, whence) 732 self._pos = self._file.tell() 733 return self._pos 734 735 def read(self, n=-1): 736 with self._lock: 737 if self._writing(): 738 raise ValueError("Can't read from the ZIP file while there " 739 "is an open writing handle on it. " 740 "Close the writing handle before trying to read.") 741 self._file.seek(self._pos) 742 data = self._file.read(n) 743 self._pos = self._file.tell() 744 return data 745 746 def close(self): 747 if self._file is not None: 748 fileobj = self._file 749 self._file = None 750 self._close(fileobj) 751 752# Provide the tell method for unseekable stream 753class _Tellable: 754 def __init__(self, fp): 755 self.fp = fp 756 self.offset = 0 757 758 def write(self, data): 759 n = self.fp.write(data) 760 self.offset += n 761 return n 762 763 def tell(self): 764 return self.offset 765 766 def flush(self): 767 self.fp.flush() 768 769 def close(self): 770 self.fp.close() 771 772 773class ZipExtFile(io.BufferedIOBase): 774 """File-like object for reading an archive member. 775 Is returned by ZipFile.open(). 776 """ 777 778 # Max size supported by decompressor. 779 MAX_N = 1 << 31 - 1 780 781 # Read from compressed files in 4k blocks. 782 MIN_READ_SIZE = 4096 783 784 # Chunk size to read during seek 785 MAX_SEEK_READ = 1 << 24 786 787 def __init__(self, fileobj, mode, zipinfo, decrypter=None, 788 close_fileobj=False): 789 self._fileobj = fileobj 790 self._decrypter = decrypter 791 self._close_fileobj = close_fileobj 792 793 self._compress_type = zipinfo.compress_type 794 self._compress_left = zipinfo.compress_size 795 self._left = zipinfo.file_size 796 797 self._decompressor = _get_decompressor(self._compress_type) 798 799 self._eof = False 800 self._readbuffer = b'' 801 self._offset = 0 802 803 self.newlines = None 804 805 # Adjust read size for encrypted files since the first 12 bytes 806 # are for the encryption/password information. 807 if self._decrypter is not None: 808 self._compress_left -= 12 809 810 self.mode = mode 811 self.name = zipinfo.filename 812 813 if hasattr(zipinfo, 'CRC'): 814 self._expected_crc = zipinfo.CRC 815 self._running_crc = crc32(b'') 816 else: 817 self._expected_crc = None 818 819 self._seekable = False 820 try: 821 if fileobj.seekable(): 822 self._orig_compress_start = fileobj.tell() 823 self._orig_compress_size = zipinfo.compress_size 824 self._orig_file_size = zipinfo.file_size 825 self._orig_start_crc = self._running_crc 826 self._seekable = True 827 except AttributeError: 828 pass 829 830 def __repr__(self): 831 result = ['<%s.%s' % (self.__class__.__module__, 832 self.__class__.__qualname__)] 833 if not self.closed: 834 result.append(' name=%r mode=%r' % (self.name, self.mode)) 835 if self._compress_type != ZIP_STORED: 836 result.append(' compress_type=%s' % 837 compressor_names.get(self._compress_type, 838 self._compress_type)) 839 else: 840 result.append(' [closed]') 841 result.append('>') 842 return ''.join(result) 843 844 def readline(self, limit=-1): 845 """Read and return a line from the stream. 846 847 If limit is specified, at most limit bytes will be read. 848 """ 849 850 if limit < 0: 851 # Shortcut common case - newline found in buffer. 852 i = self._readbuffer.find(b'\n', self._offset) + 1 853 if i > 0: 854 line = self._readbuffer[self._offset: i] 855 self._offset = i 856 return line 857 858 return io.BufferedIOBase.readline(self, limit) 859 860 def peek(self, n=1): 861 """Returns buffered bytes without advancing the position.""" 862 if n > len(self._readbuffer) - self._offset: 863 chunk = self.read(n) 864 if len(chunk) > self._offset: 865 self._readbuffer = chunk + self._readbuffer[self._offset:] 866 self._offset = 0 867 else: 868 self._offset -= len(chunk) 869 870 # Return up to 512 bytes to reduce allocation overhead for tight loops. 871 return self._readbuffer[self._offset: self._offset + 512] 872 873 def readable(self): 874 return True 875 876 def read(self, n=-1): 877 """Read and return up to n bytes. 878 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. 879 """ 880 if n is None or n < 0: 881 buf = self._readbuffer[self._offset:] 882 self._readbuffer = b'' 883 self._offset = 0 884 while not self._eof: 885 buf += self._read1(self.MAX_N) 886 return buf 887 888 end = n + self._offset 889 if end < len(self._readbuffer): 890 buf = self._readbuffer[self._offset:end] 891 self._offset = end 892 return buf 893 894 n = end - len(self._readbuffer) 895 buf = self._readbuffer[self._offset:] 896 self._readbuffer = b'' 897 self._offset = 0 898 while n > 0 and not self._eof: 899 data = self._read1(n) 900 if n < len(data): 901 self._readbuffer = data 902 self._offset = n 903 buf += data[:n] 904 break 905 buf += data 906 n -= len(data) 907 return buf 908 909 def _update_crc(self, newdata): 910 # Update the CRC using the given data. 911 if self._expected_crc is None: 912 # No need to compute the CRC if we don't have a reference value 913 return 914 self._running_crc = crc32(newdata, self._running_crc) 915 # Check the CRC if we're at the end of the file 916 if self._eof and self._running_crc != self._expected_crc: 917 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 918 919 def read1(self, n): 920 """Read up to n bytes with at most one read() system call.""" 921 922 if n is None or n < 0: 923 buf = self._readbuffer[self._offset:] 924 self._readbuffer = b'' 925 self._offset = 0 926 while not self._eof: 927 data = self._read1(self.MAX_N) 928 if data: 929 buf += data 930 break 931 return buf 932 933 end = n + self._offset 934 if end < len(self._readbuffer): 935 buf = self._readbuffer[self._offset:end] 936 self._offset = end 937 return buf 938 939 n = end - len(self._readbuffer) 940 buf = self._readbuffer[self._offset:] 941 self._readbuffer = b'' 942 self._offset = 0 943 if n > 0: 944 while not self._eof: 945 data = self._read1(n) 946 if n < len(data): 947 self._readbuffer = data 948 self._offset = n 949 buf += data[:n] 950 break 951 if data: 952 buf += data 953 break 954 return buf 955 956 def _read1(self, n): 957 # Read up to n compressed bytes with at most one read() system call, 958 # decrypt and decompress them. 959 if self._eof or n <= 0: 960 return b'' 961 962 # Read from file. 963 if self._compress_type == ZIP_DEFLATED: 964 ## Handle unconsumed data. 965 data = self._decompressor.unconsumed_tail 966 if n > len(data): 967 data += self._read2(n - len(data)) 968 else: 969 data = self._read2(n) 970 971 if self._compress_type == ZIP_STORED: 972 self._eof = self._compress_left <= 0 973 elif self._compress_type == ZIP_DEFLATED: 974 n = max(n, self.MIN_READ_SIZE) 975 data = self._decompressor.decompress(data, n) 976 self._eof = (self._decompressor.eof or 977 self._compress_left <= 0 and 978 not self._decompressor.unconsumed_tail) 979 if self._eof: 980 data += self._decompressor.flush() 981 else: 982 data = self._decompressor.decompress(data) 983 self._eof = self._decompressor.eof or self._compress_left <= 0 984 985 data = data[:self._left] 986 self._left -= len(data) 987 if self._left <= 0: 988 self._eof = True 989 self._update_crc(data) 990 return data 991 992 def _read2(self, n): 993 if self._compress_left <= 0: 994 return b'' 995 996 n = max(n, self.MIN_READ_SIZE) 997 n = min(n, self._compress_left) 998 999 data = self._fileobj.read(n) 1000 self._compress_left -= len(data) 1001 if not data: 1002 raise EOFError 1003 1004 if self._decrypter is not None: 1005 data = self._decrypter(data) 1006 return data 1007 1008 def close(self): 1009 try: 1010 if self._close_fileobj: 1011 self._fileobj.close() 1012 finally: 1013 super().close() 1014 1015 def seekable(self): 1016 return self._seekable 1017 1018 def seek(self, offset, whence=0): 1019 if not self._seekable: 1020 raise io.UnsupportedOperation("underlying stream is not seekable") 1021 curr_pos = self.tell() 1022 if whence == 0: # Seek from start of file 1023 new_pos = offset 1024 elif whence == 1: # Seek from current position 1025 new_pos = curr_pos + offset 1026 elif whence == 2: # Seek from EOF 1027 new_pos = self._orig_file_size + offset 1028 else: 1029 raise ValueError("whence must be os.SEEK_SET (0), " 1030 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1031 1032 if new_pos > self._orig_file_size: 1033 new_pos = self._orig_file_size 1034 1035 if new_pos < 0: 1036 new_pos = 0 1037 1038 read_offset = new_pos - curr_pos 1039 buff_offset = read_offset + self._offset 1040 1041 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1042 # Just move the _offset index if the new position is in the _readbuffer 1043 self._offset = buff_offset 1044 read_offset = 0 1045 elif read_offset < 0: 1046 # Position is before the current position. Reset the ZipExtFile 1047 self._fileobj.seek(self._orig_compress_start) 1048 self._running_crc = self._orig_start_crc 1049 self._compress_left = self._orig_compress_size 1050 self._left = self._orig_file_size 1051 self._readbuffer = b'' 1052 self._offset = 0 1053 self._decompressor = _get_decompressor(self._compress_type) 1054 self._eof = False 1055 read_offset = new_pos 1056 1057 while read_offset > 0: 1058 read_len = min(self.MAX_SEEK_READ, read_offset) 1059 self.read(read_len) 1060 read_offset -= read_len 1061 1062 return self.tell() 1063 1064 def tell(self): 1065 if not self._seekable: 1066 raise io.UnsupportedOperation("underlying stream is not seekable") 1067 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1068 return filepos 1069 1070 1071class _ZipWriteFile(io.BufferedIOBase): 1072 def __init__(self, zf, zinfo, zip64): 1073 self._zinfo = zinfo 1074 self._zip64 = zip64 1075 self._zipfile = zf 1076 self._compressor = _get_compressor(zinfo.compress_type, 1077 zinfo._compresslevel) 1078 self._file_size = 0 1079 self._compress_size = 0 1080 self._crc = 0 1081 1082 @property 1083 def _fileobj(self): 1084 return self._zipfile.fp 1085 1086 def writable(self): 1087 return True 1088 1089 def write(self, data): 1090 if self.closed: 1091 raise ValueError('I/O operation on closed file.') 1092 nbytes = len(data) 1093 self._file_size += nbytes 1094 self._crc = crc32(data, self._crc) 1095 if self._compressor: 1096 data = self._compressor.compress(data) 1097 self._compress_size += len(data) 1098 self._fileobj.write(data) 1099 return nbytes 1100 1101 def close(self): 1102 if self.closed: 1103 return 1104 super().close() 1105 # Flush any data from the compressor, and update header info 1106 if self._compressor: 1107 buf = self._compressor.flush() 1108 self._compress_size += len(buf) 1109 self._fileobj.write(buf) 1110 self._zinfo.compress_size = self._compress_size 1111 else: 1112 self._zinfo.compress_size = self._file_size 1113 self._zinfo.CRC = self._crc 1114 self._zinfo.file_size = self._file_size 1115 1116 # Write updated header info 1117 if self._zinfo.flag_bits & 0x08: 1118 # Write CRC and file sizes after the file data 1119 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1120 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1121 self._zinfo.compress_size, self._zinfo.file_size)) 1122 self._zipfile.start_dir = self._fileobj.tell() 1123 else: 1124 if not self._zip64: 1125 if self._file_size > ZIP64_LIMIT: 1126 raise RuntimeError('File size unexpectedly exceeded ZIP64 ' 1127 'limit') 1128 if self._compress_size > ZIP64_LIMIT: 1129 raise RuntimeError('Compressed size unexpectedly exceeded ' 1130 'ZIP64 limit') 1131 # Seek backwards and write file header (which will now include 1132 # correct CRC and file sizes) 1133 1134 # Preserve current position in file 1135 self._zipfile.start_dir = self._fileobj.tell() 1136 self._fileobj.seek(self._zinfo.header_offset) 1137 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1138 self._fileobj.seek(self._zipfile.start_dir) 1139 1140 self._zipfile._writing = False 1141 1142 # Successfully written: Add file to our caches 1143 self._zipfile.filelist.append(self._zinfo) 1144 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1145 1146class ZipFile: 1147 """ Class with methods to open, read, write, close, list zip files. 1148 1149 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1150 compresslevel=None) 1151 1152 file: Either the path to the file, or a file-like object. 1153 If it is a path, the file will be opened and closed by ZipFile. 1154 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1155 or append 'a'. 1156 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1157 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1158 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1159 needed, otherwise it will raise an exception when this would 1160 be necessary. 1161 compresslevel: None (default for the given compression type) or an integer 1162 specifying the level to pass to the compressor. 1163 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1164 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1165 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1166 1167 """ 1168 1169 fp = None # Set here since __del__ checks it 1170 _windows_illegal_name_trans_table = None 1171 1172 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1173 compresslevel=None): 1174 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1175 or append 'a'.""" 1176 if mode not in ('r', 'w', 'x', 'a'): 1177 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1178 1179 _check_compression(compression) 1180 1181 self._allowZip64 = allowZip64 1182 self._didModify = False 1183 self.debug = 0 # Level of printing: 0 through 3 1184 self.NameToInfo = {} # Find file info given name 1185 self.filelist = [] # List of ZipInfo instances for archive 1186 self.compression = compression # Method of compression 1187 self.compresslevel = compresslevel 1188 self.mode = mode 1189 self.pwd = None 1190 self._comment = b'' 1191 1192 # Check if we were passed a file-like object 1193 if isinstance(file, os.PathLike): 1194 file = os.fspath(file) 1195 if isinstance(file, str): 1196 # No, it's a filename 1197 self._filePassed = 0 1198 self.filename = file 1199 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1200 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1201 filemode = modeDict[mode] 1202 while True: 1203 try: 1204 self.fp = io.open(file, filemode) 1205 except OSError: 1206 if filemode in modeDict: 1207 filemode = modeDict[filemode] 1208 continue 1209 raise 1210 break 1211 else: 1212 self._filePassed = 1 1213 self.fp = file 1214 self.filename = getattr(file, 'name', None) 1215 self._fileRefCnt = 1 1216 self._lock = threading.RLock() 1217 self._seekable = True 1218 self._writing = False 1219 1220 try: 1221 if mode == 'r': 1222 self._RealGetContents() 1223 elif mode in ('w', 'x'): 1224 # set the modified flag so central directory gets written 1225 # even if no files are added to the archive 1226 self._didModify = True 1227 try: 1228 self.start_dir = self.fp.tell() 1229 except (AttributeError, OSError): 1230 self.fp = _Tellable(self.fp) 1231 self.start_dir = 0 1232 self._seekable = False 1233 else: 1234 # Some file-like objects can provide tell() but not seek() 1235 try: 1236 self.fp.seek(self.start_dir) 1237 except (AttributeError, OSError): 1238 self._seekable = False 1239 elif mode == 'a': 1240 try: 1241 # See if file is a zip file 1242 self._RealGetContents() 1243 # seek to start of directory and overwrite 1244 self.fp.seek(self.start_dir) 1245 except BadZipFile: 1246 # file is not a zip file, just append 1247 self.fp.seek(0, 2) 1248 1249 # set the modified flag so central directory gets written 1250 # even if no files are added to the archive 1251 self._didModify = True 1252 self.start_dir = self.fp.tell() 1253 else: 1254 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1255 except: 1256 fp = self.fp 1257 self.fp = None 1258 self._fpclose(fp) 1259 raise 1260 1261 def __enter__(self): 1262 return self 1263 1264 def __exit__(self, type, value, traceback): 1265 self.close() 1266 1267 def __repr__(self): 1268 result = ['<%s.%s' % (self.__class__.__module__, 1269 self.__class__.__qualname__)] 1270 if self.fp is not None: 1271 if self._filePassed: 1272 result.append(' file=%r' % self.fp) 1273 elif self.filename is not None: 1274 result.append(' filename=%r' % self.filename) 1275 result.append(' mode=%r' % self.mode) 1276 else: 1277 result.append(' [closed]') 1278 result.append('>') 1279 return ''.join(result) 1280 1281 def _RealGetContents(self): 1282 """Read in the table of contents for the ZIP file.""" 1283 fp = self.fp 1284 try: 1285 endrec = _EndRecData(fp) 1286 except OSError: 1287 raise BadZipFile("File is not a zip file") 1288 if not endrec: 1289 raise BadZipFile("File is not a zip file") 1290 if self.debug > 1: 1291 print(endrec) 1292 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1293 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1294 self._comment = endrec[_ECD_COMMENT] # archive comment 1295 1296 # "concat" is zero, unless zip was concatenated to another file 1297 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1298 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1299 # If Zip64 extension structures are present, account for them 1300 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1301 1302 if self.debug > 2: 1303 inferred = concat + offset_cd 1304 print("given, inferred, offset", offset_cd, inferred, concat) 1305 # self.start_dir: Position of start of central directory 1306 self.start_dir = offset_cd + concat 1307 fp.seek(self.start_dir, 0) 1308 data = fp.read(size_cd) 1309 fp = io.BytesIO(data) 1310 total = 0 1311 while total < size_cd: 1312 centdir = fp.read(sizeCentralDir) 1313 if len(centdir) != sizeCentralDir: 1314 raise BadZipFile("Truncated central directory") 1315 centdir = struct.unpack(structCentralDir, centdir) 1316 if centdir[_CD_SIGNATURE] != stringCentralDir: 1317 raise BadZipFile("Bad magic number for central directory") 1318 if self.debug > 2: 1319 print(centdir) 1320 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1321 flags = centdir[5] 1322 if flags & 0x800: 1323 # UTF-8 file names extension 1324 filename = filename.decode('utf-8') 1325 else: 1326 # Historical ZIP filename encoding 1327 filename = filename.decode('cp437') 1328 # Create ZipInfo instance to store file information 1329 x = ZipInfo(filename) 1330 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1331 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1332 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1333 (x.create_version, x.create_system, x.extract_version, x.reserved, 1334 x.flag_bits, x.compress_type, t, d, 1335 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1336 if x.extract_version > MAX_EXTRACT_VERSION: 1337 raise NotImplementedError("zip file version %.1f" % 1338 (x.extract_version / 10)) 1339 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1340 # Convert date/time code to (year, month, day, hour, min, sec) 1341 x._raw_time = t 1342 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1343 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1344 1345 x._decodeExtra() 1346 x.header_offset = x.header_offset + concat 1347 self.filelist.append(x) 1348 self.NameToInfo[x.filename] = x 1349 1350 # update total bytes read from central directory 1351 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1352 + centdir[_CD_EXTRA_FIELD_LENGTH] 1353 + centdir[_CD_COMMENT_LENGTH]) 1354 1355 if self.debug > 2: 1356 print("total", total) 1357 1358 1359 def namelist(self): 1360 """Return a list of file names in the archive.""" 1361 return [data.filename for data in self.filelist] 1362 1363 def infolist(self): 1364 """Return a list of class ZipInfo instances for files in the 1365 archive.""" 1366 return self.filelist 1367 1368 def printdir(self, file=None): 1369 """Print a table of contents for the zip file.""" 1370 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1371 file=file) 1372 for zinfo in self.filelist: 1373 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1374 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1375 file=file) 1376 1377 def testzip(self): 1378 """Read all the files and check the CRC.""" 1379 chunk_size = 2 ** 20 1380 for zinfo in self.filelist: 1381 try: 1382 # Read by chunks, to avoid an OverflowError or a 1383 # MemoryError with very large embedded files. 1384 with self.open(zinfo.filename, "r") as f: 1385 while f.read(chunk_size): # Check CRC-32 1386 pass 1387 except BadZipFile: 1388 return zinfo.filename 1389 1390 def getinfo(self, name): 1391 """Return the instance of ZipInfo given 'name'.""" 1392 info = self.NameToInfo.get(name) 1393 if info is None: 1394 raise KeyError( 1395 'There is no item named %r in the archive' % name) 1396 1397 return info 1398 1399 def setpassword(self, pwd): 1400 """Set default password for encrypted files.""" 1401 if pwd and not isinstance(pwd, bytes): 1402 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1403 if pwd: 1404 self.pwd = pwd 1405 else: 1406 self.pwd = None 1407 1408 @property 1409 def comment(self): 1410 """The comment text associated with the ZIP file.""" 1411 return self._comment 1412 1413 @comment.setter 1414 def comment(self, comment): 1415 if not isinstance(comment, bytes): 1416 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1417 # check for valid comment length 1418 if len(comment) > ZIP_MAX_COMMENT: 1419 import warnings 1420 warnings.warn('Archive comment is too long; truncating to %d bytes' 1421 % ZIP_MAX_COMMENT, stacklevel=2) 1422 comment = comment[:ZIP_MAX_COMMENT] 1423 self._comment = comment 1424 self._didModify = True 1425 1426 def read(self, name, pwd=None): 1427 """Return file bytes for name.""" 1428 with self.open(name, "r", pwd) as fp: 1429 return fp.read() 1430 1431 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1432 """Return file-like object for 'name'. 1433 1434 name is a string for the file name within the ZIP file, or a ZipInfo 1435 object. 1436 1437 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1438 write to a file newly added to the archive. 1439 1440 pwd is the password to decrypt files (only used for reading). 1441 1442 When writing, if the file size is not known in advance but may exceed 1443 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1444 files. If the size is known in advance, it is best to pass a ZipInfo 1445 instance for name, with zinfo.file_size set. 1446 """ 1447 if mode not in {"r", "w"}: 1448 raise ValueError('open() requires mode "r" or "w"') 1449 if pwd and not isinstance(pwd, bytes): 1450 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1451 if pwd and (mode == "w"): 1452 raise ValueError("pwd is only supported for reading files") 1453 if not self.fp: 1454 raise ValueError( 1455 "Attempt to use ZIP archive that was already closed") 1456 1457 # Make sure we have an info object 1458 if isinstance(name, ZipInfo): 1459 # 'name' is already an info object 1460 zinfo = name 1461 elif mode == 'w': 1462 zinfo = ZipInfo(name) 1463 zinfo.compress_type = self.compression 1464 zinfo._compresslevel = self.compresslevel 1465 else: 1466 # Get info object for name 1467 zinfo = self.getinfo(name) 1468 1469 if mode == 'w': 1470 return self._open_to_write(zinfo, force_zip64=force_zip64) 1471 1472 if self._writing: 1473 raise ValueError("Can't read from the ZIP file while there " 1474 "is an open writing handle on it. " 1475 "Close the writing handle before trying to read.") 1476 1477 # Open for reading: 1478 self._fileRefCnt += 1 1479 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1480 self._fpclose, self._lock, lambda: self._writing) 1481 try: 1482 # Skip the file header: 1483 fheader = zef_file.read(sizeFileHeader) 1484 if len(fheader) != sizeFileHeader: 1485 raise BadZipFile("Truncated file header") 1486 fheader = struct.unpack(structFileHeader, fheader) 1487 if fheader[_FH_SIGNATURE] != stringFileHeader: 1488 raise BadZipFile("Bad magic number for file header") 1489 1490 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1491 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1492 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1493 1494 if zinfo.flag_bits & 0x20: 1495 # Zip 2.7: compressed patched data 1496 raise NotImplementedError("compressed patched data (flag bit 5)") 1497 1498 if zinfo.flag_bits & 0x40: 1499 # strong encryption 1500 raise NotImplementedError("strong encryption (flag bit 6)") 1501 1502 if zinfo.flag_bits & 0x800: 1503 # UTF-8 filename 1504 fname_str = fname.decode("utf-8") 1505 else: 1506 fname_str = fname.decode("cp437") 1507 1508 if fname_str != zinfo.orig_filename: 1509 raise BadZipFile( 1510 'File name in directory %r and header %r differ.' 1511 % (zinfo.orig_filename, fname)) 1512 1513 # check for encrypted flag & handle password 1514 is_encrypted = zinfo.flag_bits & 0x1 1515 zd = None 1516 if is_encrypted: 1517 if not pwd: 1518 pwd = self.pwd 1519 if not pwd: 1520 raise RuntimeError("File %r is encrypted, password " 1521 "required for extraction" % name) 1522 1523 zd = _ZipDecrypter(pwd) 1524 # The first 12 bytes in the cypher stream is an encryption header 1525 # used to strengthen the algorithm. The first 11 bytes are 1526 # completely random, while the 12th contains the MSB of the CRC, 1527 # or the MSB of the file time depending on the header type 1528 # and is used to check the correctness of the password. 1529 header = zef_file.read(12) 1530 h = zd(header[0:12]) 1531 if zinfo.flag_bits & 0x8: 1532 # compare against the file type from extended local headers 1533 check_byte = (zinfo._raw_time >> 8) & 0xff 1534 else: 1535 # compare against the CRC otherwise 1536 check_byte = (zinfo.CRC >> 24) & 0xff 1537 if h[11] != check_byte: 1538 raise RuntimeError("Bad password for file %r" % name) 1539 1540 return ZipExtFile(zef_file, mode, zinfo, zd, True) 1541 except: 1542 zef_file.close() 1543 raise 1544 1545 def _open_to_write(self, zinfo, force_zip64=False): 1546 if force_zip64 and not self._allowZip64: 1547 raise ValueError( 1548 "force_zip64 is True, but allowZip64 was False when opening " 1549 "the ZIP file." 1550 ) 1551 if self._writing: 1552 raise ValueError("Can't write to the ZIP file while there is " 1553 "another write handle open on it. " 1554 "Close the first handle before opening another.") 1555 1556 # Sizes and CRC are overwritten with correct data after processing the file 1557 if not hasattr(zinfo, 'file_size'): 1558 zinfo.file_size = 0 1559 zinfo.compress_size = 0 1560 zinfo.CRC = 0 1561 1562 zinfo.flag_bits = 0x00 1563 if zinfo.compress_type == ZIP_LZMA: 1564 # Compressed data includes an end-of-stream (EOS) marker 1565 zinfo.flag_bits |= 0x02 1566 if not self._seekable: 1567 zinfo.flag_bits |= 0x08 1568 1569 if not zinfo.external_attr: 1570 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1571 1572 # Compressed size can be larger than uncompressed size 1573 zip64 = self._allowZip64 and \ 1574 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) 1575 1576 if self._seekable: 1577 self.fp.seek(self.start_dir) 1578 zinfo.header_offset = self.fp.tell() 1579 1580 self._writecheck(zinfo) 1581 self._didModify = True 1582 1583 self.fp.write(zinfo.FileHeader(zip64)) 1584 1585 self._writing = True 1586 return _ZipWriteFile(self, zinfo, zip64) 1587 1588 def extract(self, member, path=None, pwd=None): 1589 """Extract a member from the archive to the current working directory, 1590 using its full name. Its file information is extracted as accurately 1591 as possible. `member' may be a filename or a ZipInfo object. You can 1592 specify a different directory using `path'. 1593 """ 1594 if path is None: 1595 path = os.getcwd() 1596 else: 1597 path = os.fspath(path) 1598 1599 return self._extract_member(member, path, pwd) 1600 1601 def extractall(self, path=None, members=None, pwd=None): 1602 """Extract all members from the archive to the current working 1603 directory. `path' specifies a different directory to extract to. 1604 `members' is optional and must be a subset of the list returned 1605 by namelist(). 1606 """ 1607 if members is None: 1608 members = self.namelist() 1609 1610 if path is None: 1611 path = os.getcwd() 1612 else: 1613 path = os.fspath(path) 1614 1615 for zipinfo in members: 1616 self._extract_member(zipinfo, path, pwd) 1617 1618 @classmethod 1619 def _sanitize_windows_name(cls, arcname, pathsep): 1620 """Replace bad characters and remove trailing dots from parts.""" 1621 table = cls._windows_illegal_name_trans_table 1622 if not table: 1623 illegal = ':<>|"?*' 1624 table = str.maketrans(illegal, '_' * len(illegal)) 1625 cls._windows_illegal_name_trans_table = table 1626 arcname = arcname.translate(table) 1627 # remove trailing dots 1628 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1629 # rejoin, removing empty parts. 1630 arcname = pathsep.join(x for x in arcname if x) 1631 return arcname 1632 1633 def _extract_member(self, member, targetpath, pwd): 1634 """Extract the ZipInfo object 'member' to a physical 1635 file on the path targetpath. 1636 """ 1637 if not isinstance(member, ZipInfo): 1638 member = self.getinfo(member) 1639 1640 # build the destination pathname, replacing 1641 # forward slashes to platform specific separators. 1642 arcname = member.filename.replace('/', os.path.sep) 1643 1644 if os.path.altsep: 1645 arcname = arcname.replace(os.path.altsep, os.path.sep) 1646 # interpret absolute pathname as relative, remove drive letter or 1647 # UNC path, redundant separators, "." and ".." components. 1648 arcname = os.path.splitdrive(arcname)[1] 1649 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1650 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1651 if x not in invalid_path_parts) 1652 if os.path.sep == '\\': 1653 # filter illegal characters on Windows 1654 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1655 1656 targetpath = os.path.join(targetpath, arcname) 1657 targetpath = os.path.normpath(targetpath) 1658 1659 # Create all upper directories if necessary. 1660 upperdirs = os.path.dirname(targetpath) 1661 if upperdirs and not os.path.exists(upperdirs): 1662 os.makedirs(upperdirs) 1663 1664 if member.is_dir(): 1665 if not os.path.isdir(targetpath): 1666 os.mkdir(targetpath) 1667 return targetpath 1668 1669 with self.open(member, pwd=pwd) as source, \ 1670 open(targetpath, "wb") as target: 1671 shutil.copyfileobj(source, target) 1672 1673 return targetpath 1674 1675 def _writecheck(self, zinfo): 1676 """Check for errors before writing a file to the archive.""" 1677 if zinfo.filename in self.NameToInfo: 1678 import warnings 1679 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1680 if self.mode not in ('w', 'x', 'a'): 1681 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1682 if not self.fp: 1683 raise ValueError( 1684 "Attempt to write ZIP archive that was already closed") 1685 _check_compression(zinfo.compress_type) 1686 if not self._allowZip64: 1687 requires_zip64 = None 1688 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1689 requires_zip64 = "Files count" 1690 elif zinfo.file_size > ZIP64_LIMIT: 1691 requires_zip64 = "Filesize" 1692 elif zinfo.header_offset > ZIP64_LIMIT: 1693 requires_zip64 = "Zipfile size" 1694 if requires_zip64: 1695 raise LargeZipFile(requires_zip64 + 1696 " would require ZIP64 extensions") 1697 1698 def write(self, filename, arcname=None, 1699 compress_type=None, compresslevel=None): 1700 """Put the bytes from filename into the archive under the name 1701 arcname.""" 1702 if not self.fp: 1703 raise ValueError( 1704 "Attempt to write to ZIP archive that was already closed") 1705 if self._writing: 1706 raise ValueError( 1707 "Can't write to ZIP archive while an open writing handle exists" 1708 ) 1709 1710 zinfo = ZipInfo.from_file(filename, arcname) 1711 1712 if zinfo.is_dir(): 1713 zinfo.compress_size = 0 1714 zinfo.CRC = 0 1715 else: 1716 if compress_type is not None: 1717 zinfo.compress_type = compress_type 1718 else: 1719 zinfo.compress_type = self.compression 1720 1721 if compresslevel is not None: 1722 zinfo._compresslevel = compresslevel 1723 else: 1724 zinfo._compresslevel = self.compresslevel 1725 1726 if zinfo.is_dir(): 1727 with self._lock: 1728 if self._seekable: 1729 self.fp.seek(self.start_dir) 1730 zinfo.header_offset = self.fp.tell() # Start of header bytes 1731 if zinfo.compress_type == ZIP_LZMA: 1732 # Compressed data includes an end-of-stream (EOS) marker 1733 zinfo.flag_bits |= 0x02 1734 1735 self._writecheck(zinfo) 1736 self._didModify = True 1737 1738 self.filelist.append(zinfo) 1739 self.NameToInfo[zinfo.filename] = zinfo 1740 self.fp.write(zinfo.FileHeader(False)) 1741 self.start_dir = self.fp.tell() 1742 else: 1743 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1744 shutil.copyfileobj(src, dest, 1024*8) 1745 1746 def writestr(self, zinfo_or_arcname, data, 1747 compress_type=None, compresslevel=None): 1748 """Write a file into the archive. The contents is 'data', which 1749 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1750 it is encoded as UTF-8 first. 1751 'zinfo_or_arcname' is either a ZipInfo instance or 1752 the name of the file in the archive.""" 1753 if isinstance(data, str): 1754 data = data.encode("utf-8") 1755 if not isinstance(zinfo_or_arcname, ZipInfo): 1756 zinfo = ZipInfo(filename=zinfo_or_arcname, 1757 date_time=time.localtime(time.time())[:6]) 1758 zinfo.compress_type = self.compression 1759 zinfo._compresslevel = self.compresslevel 1760 if zinfo.filename[-1] == '/': 1761 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1762 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1763 else: 1764 zinfo.external_attr = 0o600 << 16 # ?rw------- 1765 else: 1766 zinfo = zinfo_or_arcname 1767 1768 if not self.fp: 1769 raise ValueError( 1770 "Attempt to write to ZIP archive that was already closed") 1771 if self._writing: 1772 raise ValueError( 1773 "Can't write to ZIP archive while an open writing handle exists." 1774 ) 1775 1776 if compress_type is not None: 1777 zinfo.compress_type = compress_type 1778 1779 if compresslevel is not None: 1780 zinfo._compresslevel = compresslevel 1781 1782 zinfo.file_size = len(data) # Uncompressed size 1783 with self._lock: 1784 with self.open(zinfo, mode='w') as dest: 1785 dest.write(data) 1786 1787 def __del__(self): 1788 """Call the "close()" method in case the user forgot.""" 1789 self.close() 1790 1791 def close(self): 1792 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1793 records.""" 1794 if self.fp is None: 1795 return 1796 1797 if self._writing: 1798 raise ValueError("Can't close the ZIP file while there is " 1799 "an open writing handle on it. " 1800 "Close the writing handle before closing the zip.") 1801 1802 try: 1803 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1804 with self._lock: 1805 if self._seekable: 1806 self.fp.seek(self.start_dir) 1807 self._write_end_record() 1808 finally: 1809 fp = self.fp 1810 self.fp = None 1811 self._fpclose(fp) 1812 1813 def _write_end_record(self): 1814 for zinfo in self.filelist: # write central directory 1815 dt = zinfo.date_time 1816 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1817 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1818 extra = [] 1819 if zinfo.file_size > ZIP64_LIMIT \ 1820 or zinfo.compress_size > ZIP64_LIMIT: 1821 extra.append(zinfo.file_size) 1822 extra.append(zinfo.compress_size) 1823 file_size = 0xffffffff 1824 compress_size = 0xffffffff 1825 else: 1826 file_size = zinfo.file_size 1827 compress_size = zinfo.compress_size 1828 1829 if zinfo.header_offset > ZIP64_LIMIT: 1830 extra.append(zinfo.header_offset) 1831 header_offset = 0xffffffff 1832 else: 1833 header_offset = zinfo.header_offset 1834 1835 extra_data = zinfo.extra 1836 min_version = 0 1837 if extra: 1838 # Append a ZIP64 field to the extra's 1839 extra_data = _strip_extra(extra_data, (1,)) 1840 extra_data = struct.pack( 1841 '<HH' + 'Q'*len(extra), 1842 1, 8*len(extra), *extra) + extra_data 1843 1844 min_version = ZIP64_VERSION 1845 1846 if zinfo.compress_type == ZIP_BZIP2: 1847 min_version = max(BZIP2_VERSION, min_version) 1848 elif zinfo.compress_type == ZIP_LZMA: 1849 min_version = max(LZMA_VERSION, min_version) 1850 1851 extract_version = max(min_version, zinfo.extract_version) 1852 create_version = max(min_version, zinfo.create_version) 1853 try: 1854 filename, flag_bits = zinfo._encodeFilenameFlags() 1855 centdir = struct.pack(structCentralDir, 1856 stringCentralDir, create_version, 1857 zinfo.create_system, extract_version, zinfo.reserved, 1858 flag_bits, zinfo.compress_type, dostime, dosdate, 1859 zinfo.CRC, compress_size, file_size, 1860 len(filename), len(extra_data), len(zinfo.comment), 1861 0, zinfo.internal_attr, zinfo.external_attr, 1862 header_offset) 1863 except DeprecationWarning: 1864 print((structCentralDir, stringCentralDir, create_version, 1865 zinfo.create_system, extract_version, zinfo.reserved, 1866 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, 1867 zinfo.CRC, compress_size, file_size, 1868 len(zinfo.filename), len(extra_data), len(zinfo.comment), 1869 0, zinfo.internal_attr, zinfo.external_attr, 1870 header_offset), file=sys.stderr) 1871 raise 1872 self.fp.write(centdir) 1873 self.fp.write(filename) 1874 self.fp.write(extra_data) 1875 self.fp.write(zinfo.comment) 1876 1877 pos2 = self.fp.tell() 1878 # Write end-of-zip-archive record 1879 centDirCount = len(self.filelist) 1880 centDirSize = pos2 - self.start_dir 1881 centDirOffset = self.start_dir 1882 requires_zip64 = None 1883 if centDirCount > ZIP_FILECOUNT_LIMIT: 1884 requires_zip64 = "Files count" 1885 elif centDirOffset > ZIP64_LIMIT: 1886 requires_zip64 = "Central directory offset" 1887 elif centDirSize > ZIP64_LIMIT: 1888 requires_zip64 = "Central directory size" 1889 if requires_zip64: 1890 # Need to write the ZIP64 end-of-archive records 1891 if not self._allowZip64: 1892 raise LargeZipFile(requires_zip64 + 1893 " would require ZIP64 extensions") 1894 zip64endrec = struct.pack( 1895 structEndArchive64, stringEndArchive64, 1896 44, 45, 45, 0, 0, centDirCount, centDirCount, 1897 centDirSize, centDirOffset) 1898 self.fp.write(zip64endrec) 1899 1900 zip64locrec = struct.pack( 1901 structEndArchive64Locator, 1902 stringEndArchive64Locator, 0, pos2, 1) 1903 self.fp.write(zip64locrec) 1904 centDirCount = min(centDirCount, 0xFFFF) 1905 centDirSize = min(centDirSize, 0xFFFFFFFF) 1906 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1907 1908 endrec = struct.pack(structEndArchive, stringEndArchive, 1909 0, 0, centDirCount, centDirCount, 1910 centDirSize, centDirOffset, len(self._comment)) 1911 self.fp.write(endrec) 1912 self.fp.write(self._comment) 1913 self.fp.flush() 1914 1915 def _fpclose(self, fp): 1916 assert self._fileRefCnt > 0 1917 self._fileRefCnt -= 1 1918 if not self._fileRefCnt and not self._filePassed: 1919 fp.close() 1920 1921 1922class PyZipFile(ZipFile): 1923 """Class to create ZIP archives with Python library files and packages.""" 1924 1925 def __init__(self, file, mode="r", compression=ZIP_STORED, 1926 allowZip64=True, optimize=-1): 1927 ZipFile.__init__(self, file, mode=mode, compression=compression, 1928 allowZip64=allowZip64) 1929 self._optimize = optimize 1930 1931 def writepy(self, pathname, basename="", filterfunc=None): 1932 """Add all files from "pathname" to the ZIP archive. 1933 1934 If pathname is a package directory, search the directory and 1935 all package subdirectories recursively for all *.py and enter 1936 the modules into the archive. If pathname is a plain 1937 directory, listdir *.py and enter all modules. Else, pathname 1938 must be a Python *.py file and the module will be put into the 1939 archive. Added modules are always module.pyc. 1940 This method will compile the module.py into module.pyc if 1941 necessary. 1942 If filterfunc(pathname) is given, it is called with every argument. 1943 When it is False, the file or directory is skipped. 1944 """ 1945 pathname = os.fspath(pathname) 1946 if filterfunc and not filterfunc(pathname): 1947 if self.debug: 1948 label = 'path' if os.path.isdir(pathname) else 'file' 1949 print('%s %r skipped by filterfunc' % (label, pathname)) 1950 return 1951 dir, name = os.path.split(pathname) 1952 if os.path.isdir(pathname): 1953 initname = os.path.join(pathname, "__init__.py") 1954 if os.path.isfile(initname): 1955 # This is a package directory, add it 1956 if basename: 1957 basename = "%s/%s" % (basename, name) 1958 else: 1959 basename = name 1960 if self.debug: 1961 print("Adding package in", pathname, "as", basename) 1962 fname, arcname = self._get_codename(initname[0:-3], basename) 1963 if self.debug: 1964 print("Adding", arcname) 1965 self.write(fname, arcname) 1966 dirlist = sorted(os.listdir(pathname)) 1967 dirlist.remove("__init__.py") 1968 # Add all *.py files and package subdirectories 1969 for filename in dirlist: 1970 path = os.path.join(pathname, filename) 1971 root, ext = os.path.splitext(filename) 1972 if os.path.isdir(path): 1973 if os.path.isfile(os.path.join(path, "__init__.py")): 1974 # This is a package directory, add it 1975 self.writepy(path, basename, 1976 filterfunc=filterfunc) # Recursive call 1977 elif ext == ".py": 1978 if filterfunc and not filterfunc(path): 1979 if self.debug: 1980 print('file %r skipped by filterfunc' % path) 1981 continue 1982 fname, arcname = self._get_codename(path[0:-3], 1983 basename) 1984 if self.debug: 1985 print("Adding", arcname) 1986 self.write(fname, arcname) 1987 else: 1988 # This is NOT a package directory, add its files at top level 1989 if self.debug: 1990 print("Adding files from directory", pathname) 1991 for filename in sorted(os.listdir(pathname)): 1992 path = os.path.join(pathname, filename) 1993 root, ext = os.path.splitext(filename) 1994 if ext == ".py": 1995 if filterfunc and not filterfunc(path): 1996 if self.debug: 1997 print('file %r skipped by filterfunc' % path) 1998 continue 1999 fname, arcname = self._get_codename(path[0:-3], 2000 basename) 2001 if self.debug: 2002 print("Adding", arcname) 2003 self.write(fname, arcname) 2004 else: 2005 if pathname[-3:] != ".py": 2006 raise RuntimeError( 2007 'Files added with writepy() must end with ".py"') 2008 fname, arcname = self._get_codename(pathname[0:-3], basename) 2009 if self.debug: 2010 print("Adding file", arcname) 2011 self.write(fname, arcname) 2012 2013 def _get_codename(self, pathname, basename): 2014 """Return (filename, archivename) for the path. 2015 2016 Given a module name path, return the correct file path and 2017 archive name, compiling if necessary. For example, given 2018 /python/lib/string, return (/python/lib/string.pyc, string). 2019 """ 2020 def _compile(file, optimize=-1): 2021 import py_compile 2022 if self.debug: 2023 print("Compiling", file) 2024 try: 2025 py_compile.compile(file, doraise=True, optimize=optimize) 2026 except py_compile.PyCompileError as err: 2027 print(err.msg) 2028 return False 2029 return True 2030 2031 file_py = pathname + ".py" 2032 file_pyc = pathname + ".pyc" 2033 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2034 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2035 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2036 if self._optimize == -1: 2037 # legacy mode: use whatever file is present 2038 if (os.path.isfile(file_pyc) and 2039 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2040 # Use .pyc file. 2041 arcname = fname = file_pyc 2042 elif (os.path.isfile(pycache_opt0) and 2043 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2044 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2045 # file name in the archive. 2046 fname = pycache_opt0 2047 arcname = file_pyc 2048 elif (os.path.isfile(pycache_opt1) and 2049 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2050 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2051 # file name in the archive. 2052 fname = pycache_opt1 2053 arcname = file_pyc 2054 elif (os.path.isfile(pycache_opt2) and 2055 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2056 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2057 # file name in the archive. 2058 fname = pycache_opt2 2059 arcname = file_pyc 2060 else: 2061 # Compile py into PEP 3147 pyc file. 2062 if _compile(file_py): 2063 if sys.flags.optimize == 0: 2064 fname = pycache_opt0 2065 elif sys.flags.optimize == 1: 2066 fname = pycache_opt1 2067 else: 2068 fname = pycache_opt2 2069 arcname = file_pyc 2070 else: 2071 fname = arcname = file_py 2072 else: 2073 # new mode: use given optimization level 2074 if self._optimize == 0: 2075 fname = pycache_opt0 2076 arcname = file_pyc 2077 else: 2078 arcname = file_pyc 2079 if self._optimize == 1: 2080 fname = pycache_opt1 2081 elif self._optimize == 2: 2082 fname = pycache_opt2 2083 else: 2084 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2085 raise ValueError(msg) 2086 if not (os.path.isfile(fname) and 2087 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2088 if not _compile(file_py, optimize=self._optimize): 2089 fname = arcname = file_py 2090 archivename = os.path.split(arcname)[1] 2091 if basename: 2092 archivename = "%s/%s" % (basename, archivename) 2093 return (fname, archivename) 2094 2095 2096def main(args=None): 2097 import argparse 2098 2099 description = 'A simple command-line interface for zipfile module.' 2100 parser = argparse.ArgumentParser(description=description) 2101 group = parser.add_mutually_exclusive_group(required=True) 2102 group.add_argument('-l', '--list', metavar='<zipfile>', 2103 help='Show listing of a zipfile') 2104 group.add_argument('-e', '--extract', nargs=2, 2105 metavar=('<zipfile>', '<output_dir>'), 2106 help='Extract zipfile into target dir') 2107 group.add_argument('-c', '--create', nargs='+', 2108 metavar=('<name>', '<file>'), 2109 help='Create zipfile from sources') 2110 group.add_argument('-t', '--test', metavar='<zipfile>', 2111 help='Test if a zipfile is valid') 2112 args = parser.parse_args(args) 2113 2114 if args.test is not None: 2115 src = args.test 2116 with ZipFile(src, 'r') as zf: 2117 badfile = zf.testzip() 2118 if badfile: 2119 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2120 print("Done testing") 2121 2122 elif args.list is not None: 2123 src = args.list 2124 with ZipFile(src, 'r') as zf: 2125 zf.printdir() 2126 2127 elif args.extract is not None: 2128 src, curdir = args.extract 2129 with ZipFile(src, 'r') as zf: 2130 zf.extractall(curdir) 2131 2132 elif args.create is not None: 2133 zip_name = args.create.pop(0) 2134 files = args.create 2135 2136 def addToZip(zf, path, zippath): 2137 if os.path.isfile(path): 2138 zf.write(path, zippath, ZIP_DEFLATED) 2139 elif os.path.isdir(path): 2140 if zippath: 2141 zf.write(path, zippath) 2142 for nm in sorted(os.listdir(path)): 2143 addToZip(zf, 2144 os.path.join(path, nm), os.path.join(zippath, nm)) 2145 # else: ignore 2146 2147 with ZipFile(zip_name, 'w') as zf: 2148 for path in files: 2149 zippath = os.path.basename(path) 2150 if not zippath: 2151 zippath = os.path.basename(os.path.dirname(path)) 2152 if zippath in ('', os.curdir, os.pardir): 2153 zippath = '' 2154 addToZip(zf, path, zippath) 2155 2156if __name__ == "__main__": 2157 main() 2158