1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import binascii 7import importlib.util 8import io 9import itertools 10import os 11import posixpath 12import shutil 13import stat 14import struct 15import sys 16import threading 17import time 18import contextlib 19 20try: 21 import zlib # We may need its compression method 22 crc32 = zlib.crc32 23except ImportError: 24 zlib = None 25 crc32 = binascii.crc32 26 27try: 28 import bz2 # We may need its compression method 29except ImportError: 30 bz2 = None 31 32try: 33 import lzma # We may need its compression method 34except ImportError: 35 lzma = None 36 37__all__ = ["BadZipFile", "BadZipfile", "error", 38 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 39 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 40 "Path"] 41 42class BadZipFile(Exception): 43 pass 44 45 46class LargeZipFile(Exception): 47 """ 48 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 49 and those extensions are disabled. 50 """ 51 52error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 53 54 55ZIP64_LIMIT = (1 << 31) - 1 56ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 57ZIP_MAX_COMMENT = (1 << 16) - 1 58 59# constants for Zip file compression methods 60ZIP_STORED = 0 61ZIP_DEFLATED = 8 62ZIP_BZIP2 = 12 63ZIP_LZMA = 14 64# Other ZIP compression methods not supported 65 66DEFAULT_VERSION = 20 67ZIP64_VERSION = 45 68BZIP2_VERSION = 46 69LZMA_VERSION = 63 70# we recognize (but not necessarily support) all features up to that version 71MAX_EXTRACT_VERSION = 63 72 73# Below are some formats and associated data for reading/writing headers using 74# the struct module. The names and structures of headers/records are those used 75# in the PKWARE description of the ZIP file format: 76# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 77# (URL valid as of January 2008) 78 79# The "end of central directory" structure, magic number, size, and indices 80# (section V.I in the format document) 81structEndArchive = b"<4s4H2LH" 82stringEndArchive = b"PK\005\006" 83sizeEndCentDir = struct.calcsize(structEndArchive) 84 85_ECD_SIGNATURE = 0 86_ECD_DISK_NUMBER = 1 87_ECD_DISK_START = 2 88_ECD_ENTRIES_THIS_DISK = 3 89_ECD_ENTRIES_TOTAL = 4 90_ECD_SIZE = 5 91_ECD_OFFSET = 6 92_ECD_COMMENT_SIZE = 7 93# These last two indices are not part of the structure as defined in the 94# spec, but they are used internally by this module as a convenience 95_ECD_COMMENT = 8 96_ECD_LOCATION = 9 97 98# The "central directory" structure, magic number, size, and indices 99# of entries in the structure (section V.F in the format document) 100structCentralDir = "<4s4B4HL2L5H2L" 101stringCentralDir = b"PK\001\002" 102sizeCentralDir = struct.calcsize(structCentralDir) 103 104# indexes of entries in the central directory structure 105_CD_SIGNATURE = 0 106_CD_CREATE_VERSION = 1 107_CD_CREATE_SYSTEM = 2 108_CD_EXTRACT_VERSION = 3 109_CD_EXTRACT_SYSTEM = 4 110_CD_FLAG_BITS = 5 111_CD_COMPRESS_TYPE = 6 112_CD_TIME = 7 113_CD_DATE = 8 114_CD_CRC = 9 115_CD_COMPRESSED_SIZE = 10 116_CD_UNCOMPRESSED_SIZE = 11 117_CD_FILENAME_LENGTH = 12 118_CD_EXTRA_FIELD_LENGTH = 13 119_CD_COMMENT_LENGTH = 14 120_CD_DISK_NUMBER_START = 15 121_CD_INTERNAL_FILE_ATTRIBUTES = 16 122_CD_EXTERNAL_FILE_ATTRIBUTES = 17 123_CD_LOCAL_HEADER_OFFSET = 18 124 125# The "local file header" structure, magic number, size, and indices 126# (section V.A in the format document) 127structFileHeader = "<4s2B4HL2L2H" 128stringFileHeader = b"PK\003\004" 129sizeFileHeader = struct.calcsize(structFileHeader) 130 131_FH_SIGNATURE = 0 132_FH_EXTRACT_VERSION = 1 133_FH_EXTRACT_SYSTEM = 2 134_FH_GENERAL_PURPOSE_FLAG_BITS = 3 135_FH_COMPRESSION_METHOD = 4 136_FH_LAST_MOD_TIME = 5 137_FH_LAST_MOD_DATE = 6 138_FH_CRC = 7 139_FH_COMPRESSED_SIZE = 8 140_FH_UNCOMPRESSED_SIZE = 9 141_FH_FILENAME_LENGTH = 10 142_FH_EXTRA_FIELD_LENGTH = 11 143 144# The "Zip64 end of central directory locator" structure, magic number, and size 145structEndArchive64Locator = "<4sLQL" 146stringEndArchive64Locator = b"PK\x06\x07" 147sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 148 149# The "Zip64 end of central directory" record, magic number, size, and indices 150# (section V.G in the format document) 151structEndArchive64 = "<4sQ2H2L4Q" 152stringEndArchive64 = b"PK\x06\x06" 153sizeEndCentDir64 = struct.calcsize(structEndArchive64) 154 155_CD64_SIGNATURE = 0 156_CD64_DIRECTORY_RECSIZE = 1 157_CD64_CREATE_VERSION = 2 158_CD64_EXTRACT_VERSION = 3 159_CD64_DISK_NUMBER = 4 160_CD64_DISK_NUMBER_START = 5 161_CD64_NUMBER_ENTRIES_THIS_DISK = 6 162_CD64_NUMBER_ENTRIES_TOTAL = 7 163_CD64_DIRECTORY_SIZE = 8 164_CD64_OFFSET_START_CENTDIR = 9 165 166_DD_SIGNATURE = 0x08074b50 167 168_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 169 170def _strip_extra(extra, xids): 171 # Remove Extra Fields with specified IDs. 172 unpack = _EXTRA_FIELD_STRUCT.unpack 173 modified = False 174 buffer = [] 175 start = i = 0 176 while i + 4 <= len(extra): 177 xid, xlen = unpack(extra[i : i + 4]) 178 j = i + 4 + xlen 179 if xid in xids: 180 if i != start: 181 buffer.append(extra[start : i]) 182 start = j 183 modified = True 184 i = j 185 if not modified: 186 return extra 187 return b''.join(buffer) 188 189def _check_zipfile(fp): 190 try: 191 if _EndRecData(fp): 192 return True # file has correct magic number 193 except OSError: 194 pass 195 return False 196 197def is_zipfile(filename): 198 """Quickly see if a file is a ZIP file by checking the magic number. 199 200 The filename argument may be a file or file-like object too. 201 """ 202 result = False 203 try: 204 if hasattr(filename, "read"): 205 result = _check_zipfile(fp=filename) 206 else: 207 with open(filename, "rb") as fp: 208 result = _check_zipfile(fp) 209 except OSError: 210 pass 211 return result 212 213def _EndRecData64(fpin, offset, endrec): 214 """ 215 Read the ZIP64 end-of-archive records and use that to update endrec 216 """ 217 try: 218 fpin.seek(offset - sizeEndCentDir64Locator, 2) 219 except OSError: 220 # If the seek fails, the file is not large enough to contain a ZIP64 221 # end-of-archive record, so just return the end record we were given. 222 return endrec 223 224 data = fpin.read(sizeEndCentDir64Locator) 225 if len(data) != sizeEndCentDir64Locator: 226 return endrec 227 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 228 if sig != stringEndArchive64Locator: 229 return endrec 230 231 if diskno != 0 or disks > 1: 232 raise BadZipFile("zipfiles that span multiple disks are not supported") 233 234 # Assume no 'zip64 extensible data' 235 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 236 data = fpin.read(sizeEndCentDir64) 237 if len(data) != sizeEndCentDir64: 238 return endrec 239 sig, sz, create_version, read_version, disk_num, disk_dir, \ 240 dircount, dircount2, dirsize, diroffset = \ 241 struct.unpack(structEndArchive64, data) 242 if sig != stringEndArchive64: 243 return endrec 244 245 # Update the original endrec using data from the ZIP64 record 246 endrec[_ECD_SIGNATURE] = sig 247 endrec[_ECD_DISK_NUMBER] = disk_num 248 endrec[_ECD_DISK_START] = disk_dir 249 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 250 endrec[_ECD_ENTRIES_TOTAL] = dircount2 251 endrec[_ECD_SIZE] = dirsize 252 endrec[_ECD_OFFSET] = diroffset 253 return endrec 254 255 256def _EndRecData(fpin): 257 """Return data from the "End of Central Directory" record, or None. 258 259 The data is a list of the nine items in the ZIP "End of central dir" 260 record followed by a tenth item, the file seek offset of this record.""" 261 262 # Determine file size 263 fpin.seek(0, 2) 264 filesize = fpin.tell() 265 266 # Check to see if this is ZIP file with no archive comment (the 267 # "end of central directory" structure should be the last item in the 268 # file if this is the case). 269 try: 270 fpin.seek(-sizeEndCentDir, 2) 271 except OSError: 272 return None 273 data = fpin.read() 274 if (len(data) == sizeEndCentDir and 275 data[0:4] == stringEndArchive and 276 data[-2:] == b"\000\000"): 277 # the signature is correct and there's no comment, unpack structure 278 endrec = struct.unpack(structEndArchive, data) 279 endrec=list(endrec) 280 281 # Append a blank comment and record start offset 282 endrec.append(b"") 283 endrec.append(filesize - sizeEndCentDir) 284 285 # Try to read the "Zip64 end of central directory" structure 286 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 287 288 # Either this is not a ZIP file, or it is a ZIP file with an archive 289 # comment. Search the end of the file for the "end of central directory" 290 # record signature. The comment is the last item in the ZIP file and may be 291 # up to 64K long. It is assumed that the "end of central directory" magic 292 # number does not appear in the comment. 293 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 294 fpin.seek(maxCommentStart, 0) 295 data = fpin.read() 296 start = data.rfind(stringEndArchive) 297 if start >= 0: 298 # found the magic number; attempt to unpack and interpret 299 recData = data[start:start+sizeEndCentDir] 300 if len(recData) != sizeEndCentDir: 301 # Zip file is corrupted. 302 return None 303 endrec = list(struct.unpack(structEndArchive, recData)) 304 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 305 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 306 endrec.append(comment) 307 endrec.append(maxCommentStart + start) 308 309 # Try to read the "Zip64 end of central directory" structure 310 return _EndRecData64(fpin, maxCommentStart + start - filesize, 311 endrec) 312 313 # Unable to find a valid end of central directory structure 314 return None 315 316 317class ZipInfo (object): 318 """Class with attributes describing each file in the ZIP archive.""" 319 320 __slots__ = ( 321 'orig_filename', 322 'filename', 323 'date_time', 324 'compress_type', 325 '_compresslevel', 326 'comment', 327 'extra', 328 'create_system', 329 'create_version', 330 'extract_version', 331 'reserved', 332 'flag_bits', 333 'volume', 334 'internal_attr', 335 'external_attr', 336 'header_offset', 337 'CRC', 338 'compress_size', 339 'file_size', 340 '_raw_time', 341 ) 342 343 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 344 self.orig_filename = filename # Original file name in archive 345 346 # Terminate the file name at the first null byte. Null bytes in file 347 # names are used as tricks by viruses in archives. 348 null_byte = filename.find(chr(0)) 349 if null_byte >= 0: 350 filename = filename[0:null_byte] 351 # This is used to ensure paths in generated ZIP files always use 352 # forward slashes as the directory separator, as required by the 353 # ZIP format specification. 354 if os.sep != "/" and os.sep in filename: 355 filename = filename.replace(os.sep, "/") 356 357 self.filename = filename # Normalized file name 358 self.date_time = date_time # year, month, day, hour, min, sec 359 360 if date_time[0] < 1980: 361 raise ValueError('ZIP does not support timestamps before 1980') 362 363 # Standard values: 364 self.compress_type = ZIP_STORED # Type of compression for the file 365 self._compresslevel = None # Level for the compressor 366 self.comment = b"" # Comment for each file 367 self.extra = b"" # ZIP extra data 368 if sys.platform == 'win32': 369 self.create_system = 0 # System which created ZIP archive 370 else: 371 # Assume everything else is unix-y 372 self.create_system = 3 # System which created ZIP archive 373 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 374 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 375 self.reserved = 0 # Must be zero 376 self.flag_bits = 0 # ZIP flag bits 377 self.volume = 0 # Volume number of file header 378 self.internal_attr = 0 # Internal attributes 379 self.external_attr = 0 # External file attributes 380 self.compress_size = 0 # Size of the compressed file 381 self.file_size = 0 # Size of the uncompressed file 382 # Other attributes are set by class ZipFile: 383 # header_offset Byte offset to the file header 384 # CRC CRC-32 of the uncompressed file 385 386 def __repr__(self): 387 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 388 if self.compress_type != ZIP_STORED: 389 result.append(' compress_type=%s' % 390 compressor_names.get(self.compress_type, 391 self.compress_type)) 392 hi = self.external_attr >> 16 393 lo = self.external_attr & 0xFFFF 394 if hi: 395 result.append(' filemode=%r' % stat.filemode(hi)) 396 if lo: 397 result.append(' external_attr=%#x' % lo) 398 isdir = self.is_dir() 399 if not isdir or self.file_size: 400 result.append(' file_size=%r' % self.file_size) 401 if ((not isdir or self.compress_size) and 402 (self.compress_type != ZIP_STORED or 403 self.file_size != self.compress_size)): 404 result.append(' compress_size=%r' % self.compress_size) 405 result.append('>') 406 return ''.join(result) 407 408 def FileHeader(self, zip64=None): 409 """Return the per-file header as a bytes object.""" 410 dt = self.date_time 411 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 412 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 413 if self.flag_bits & 0x08: 414 # Set these to zero because we write them after the file data 415 CRC = compress_size = file_size = 0 416 else: 417 CRC = self.CRC 418 compress_size = self.compress_size 419 file_size = self.file_size 420 421 extra = self.extra 422 423 min_version = 0 424 if zip64 is None: 425 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 426 if zip64: 427 fmt = '<HHQQ' 428 extra = extra + struct.pack(fmt, 429 1, struct.calcsize(fmt)-4, file_size, compress_size) 430 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 431 if not zip64: 432 raise LargeZipFile("Filesize would require ZIP64 extensions") 433 # File is larger than what fits into a 4 byte integer, 434 # fall back to the ZIP64 extension 435 file_size = 0xffffffff 436 compress_size = 0xffffffff 437 min_version = ZIP64_VERSION 438 439 if self.compress_type == ZIP_BZIP2: 440 min_version = max(BZIP2_VERSION, min_version) 441 elif self.compress_type == ZIP_LZMA: 442 min_version = max(LZMA_VERSION, min_version) 443 444 self.extract_version = max(min_version, self.extract_version) 445 self.create_version = max(min_version, self.create_version) 446 filename, flag_bits = self._encodeFilenameFlags() 447 header = struct.pack(structFileHeader, stringFileHeader, 448 self.extract_version, self.reserved, flag_bits, 449 self.compress_type, dostime, dosdate, CRC, 450 compress_size, file_size, 451 len(filename), len(extra)) 452 return header + filename + extra 453 454 def _encodeFilenameFlags(self): 455 try: 456 return self.filename.encode('ascii'), self.flag_bits 457 except UnicodeEncodeError: 458 return self.filename.encode('utf-8'), self.flag_bits | 0x800 459 460 def _decodeExtra(self): 461 # Try to decode the extra field. 462 extra = self.extra 463 unpack = struct.unpack 464 while len(extra) >= 4: 465 tp, ln = unpack('<HH', extra[:4]) 466 if ln+4 > len(extra): 467 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 468 if tp == 0x0001: 469 data = extra[4:ln+4] 470 # ZIP64 extension (large files and/or large archives) 471 try: 472 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF): 473 field = "File size" 474 self.file_size, = unpack('<Q', data[:8]) 475 data = data[8:] 476 if self.compress_size == 0xFFFF_FFFF: 477 field = "Compress size" 478 self.compress_size, = unpack('<Q', data[:8]) 479 data = data[8:] 480 if self.header_offset == 0xFFFF_FFFF: 481 field = "Header offset" 482 self.header_offset, = unpack('<Q', data[:8]) 483 except struct.error: 484 raise BadZipFile(f"Corrupt zip64 extra field. " 485 f"{field} not found.") from None 486 487 extra = extra[ln+4:] 488 489 @classmethod 490 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 491 """Construct an appropriate ZipInfo for a file on the filesystem. 492 493 filename should be the path to a file or directory on the filesystem. 494 495 arcname is the name which it will have within the archive (by default, 496 this will be the same as filename, but without a drive letter and with 497 leading path separators removed). 498 """ 499 if isinstance(filename, os.PathLike): 500 filename = os.fspath(filename) 501 st = os.stat(filename) 502 isdir = stat.S_ISDIR(st.st_mode) 503 mtime = time.localtime(st.st_mtime) 504 date_time = mtime[0:6] 505 if not strict_timestamps and date_time[0] < 1980: 506 date_time = (1980, 1, 1, 0, 0, 0) 507 elif not strict_timestamps and date_time[0] > 2107: 508 date_time = (2107, 12, 31, 23, 59, 59) 509 # Create ZipInfo instance to store file information 510 if arcname is None: 511 arcname = filename 512 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 513 while arcname[0] in (os.sep, os.altsep): 514 arcname = arcname[1:] 515 if isdir: 516 arcname += '/' 517 zinfo = cls(arcname, date_time) 518 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 519 if isdir: 520 zinfo.file_size = 0 521 zinfo.external_attr |= 0x10 # MS-DOS directory flag 522 else: 523 zinfo.file_size = st.st_size 524 525 return zinfo 526 527 def is_dir(self): 528 """Return True if this archive member is a directory.""" 529 return self.filename[-1] == '/' 530 531 532# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 533# internal keys. We noticed that a direct implementation is faster than 534# relying on binascii.crc32(). 535 536_crctable = None 537def _gen_crc(crc): 538 for j in range(8): 539 if crc & 1: 540 crc = (crc >> 1) ^ 0xEDB88320 541 else: 542 crc >>= 1 543 return crc 544 545# ZIP supports a password-based form of encryption. Even though known 546# plaintext attacks have been found against it, it is still useful 547# to be able to get data out of such a file. 548# 549# Usage: 550# zd = _ZipDecrypter(mypwd) 551# plain_bytes = zd(cypher_bytes) 552 553def _ZipDecrypter(pwd): 554 key0 = 305419896 555 key1 = 591751049 556 key2 = 878082192 557 558 global _crctable 559 if _crctable is None: 560 _crctable = list(map(_gen_crc, range(256))) 561 crctable = _crctable 562 563 def crc32(ch, crc): 564 """Compute the CRC32 primitive on one byte.""" 565 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 566 567 def update_keys(c): 568 nonlocal key0, key1, key2 569 key0 = crc32(c, key0) 570 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 571 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 572 key2 = crc32(key1 >> 24, key2) 573 574 for p in pwd: 575 update_keys(p) 576 577 def decrypter(data): 578 """Decrypt a bytes object.""" 579 result = bytearray() 580 append = result.append 581 for c in data: 582 k = key2 | 2 583 c ^= ((k * (k^1)) >> 8) & 0xFF 584 update_keys(c) 585 append(c) 586 return bytes(result) 587 588 return decrypter 589 590 591class LZMACompressor: 592 593 def __init__(self): 594 self._comp = None 595 596 def _init(self): 597 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 598 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 599 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 600 ]) 601 return struct.pack('<BBH', 9, 4, len(props)) + props 602 603 def compress(self, data): 604 if self._comp is None: 605 return self._init() + self._comp.compress(data) 606 return self._comp.compress(data) 607 608 def flush(self): 609 if self._comp is None: 610 return self._init() + self._comp.flush() 611 return self._comp.flush() 612 613 614class LZMADecompressor: 615 616 def __init__(self): 617 self._decomp = None 618 self._unconsumed = b'' 619 self.eof = False 620 621 def decompress(self, data): 622 if self._decomp is None: 623 self._unconsumed += data 624 if len(self._unconsumed) <= 4: 625 return b'' 626 psize, = struct.unpack('<H', self._unconsumed[2:4]) 627 if len(self._unconsumed) <= 4 + psize: 628 return b'' 629 630 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 631 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 632 self._unconsumed[4:4 + psize]) 633 ]) 634 data = self._unconsumed[4 + psize:] 635 del self._unconsumed 636 637 result = self._decomp.decompress(data) 638 self.eof = self._decomp.eof 639 return result 640 641 642compressor_names = { 643 0: 'store', 644 1: 'shrink', 645 2: 'reduce', 646 3: 'reduce', 647 4: 'reduce', 648 5: 'reduce', 649 6: 'implode', 650 7: 'tokenize', 651 8: 'deflate', 652 9: 'deflate64', 653 10: 'implode', 654 12: 'bzip2', 655 14: 'lzma', 656 18: 'terse', 657 19: 'lz77', 658 97: 'wavpack', 659 98: 'ppmd', 660} 661 662def _check_compression(compression): 663 if compression == ZIP_STORED: 664 pass 665 elif compression == ZIP_DEFLATED: 666 if not zlib: 667 raise RuntimeError( 668 "Compression requires the (missing) zlib module") 669 elif compression == ZIP_BZIP2: 670 if not bz2: 671 raise RuntimeError( 672 "Compression requires the (missing) bz2 module") 673 elif compression == ZIP_LZMA: 674 if not lzma: 675 raise RuntimeError( 676 "Compression requires the (missing) lzma module") 677 else: 678 raise NotImplementedError("That compression method is not supported") 679 680 681def _get_compressor(compress_type, compresslevel=None): 682 if compress_type == ZIP_DEFLATED: 683 if compresslevel is not None: 684 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 685 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 686 elif compress_type == ZIP_BZIP2: 687 if compresslevel is not None: 688 return bz2.BZ2Compressor(compresslevel) 689 return bz2.BZ2Compressor() 690 # compresslevel is ignored for ZIP_LZMA 691 elif compress_type == ZIP_LZMA: 692 return LZMACompressor() 693 else: 694 return None 695 696 697def _get_decompressor(compress_type): 698 _check_compression(compress_type) 699 if compress_type == ZIP_STORED: 700 return None 701 elif compress_type == ZIP_DEFLATED: 702 return zlib.decompressobj(-15) 703 elif compress_type == ZIP_BZIP2: 704 return bz2.BZ2Decompressor() 705 elif compress_type == ZIP_LZMA: 706 return LZMADecompressor() 707 else: 708 descr = compressor_names.get(compress_type) 709 if descr: 710 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 711 else: 712 raise NotImplementedError("compression type %d" % (compress_type,)) 713 714 715class _SharedFile: 716 def __init__(self, file, pos, close, lock, writing): 717 self._file = file 718 self._pos = pos 719 self._close = close 720 self._lock = lock 721 self._writing = writing 722 self.seekable = file.seekable 723 self.tell = file.tell 724 725 def seek(self, offset, whence=0): 726 with self._lock: 727 if self._writing(): 728 raise ValueError("Can't reposition in the ZIP file while " 729 "there is an open writing handle on it. " 730 "Close the writing handle before trying to read.") 731 self._file.seek(offset, whence) 732 self._pos = self._file.tell() 733 return self._pos 734 735 def read(self, n=-1): 736 with self._lock: 737 if self._writing(): 738 raise ValueError("Can't read from the ZIP file while there " 739 "is an open writing handle on it. " 740 "Close the writing handle before trying to read.") 741 self._file.seek(self._pos) 742 data = self._file.read(n) 743 self._pos = self._file.tell() 744 return data 745 746 def close(self): 747 if self._file is not None: 748 fileobj = self._file 749 self._file = None 750 self._close(fileobj) 751 752# Provide the tell method for unseekable stream 753class _Tellable: 754 def __init__(self, fp): 755 self.fp = fp 756 self.offset = 0 757 758 def write(self, data): 759 n = self.fp.write(data) 760 self.offset += n 761 return n 762 763 def tell(self): 764 return self.offset 765 766 def flush(self): 767 self.fp.flush() 768 769 def close(self): 770 self.fp.close() 771 772 773class ZipExtFile(io.BufferedIOBase): 774 """File-like object for reading an archive member. 775 Is returned by ZipFile.open(). 776 """ 777 778 # Max size supported by decompressor. 779 MAX_N = 1 << 31 - 1 780 781 # Read from compressed files in 4k blocks. 782 MIN_READ_SIZE = 4096 783 784 # Chunk size to read during seek 785 MAX_SEEK_READ = 1 << 24 786 787 def __init__(self, fileobj, mode, zipinfo, pwd=None, 788 close_fileobj=False): 789 self._fileobj = fileobj 790 self._pwd = pwd 791 self._close_fileobj = close_fileobj 792 793 self._compress_type = zipinfo.compress_type 794 self._compress_left = zipinfo.compress_size 795 self._left = zipinfo.file_size 796 797 self._decompressor = _get_decompressor(self._compress_type) 798 799 self._eof = False 800 self._readbuffer = b'' 801 self._offset = 0 802 803 self.newlines = None 804 805 self.mode = mode 806 self.name = zipinfo.filename 807 808 if hasattr(zipinfo, 'CRC'): 809 self._expected_crc = zipinfo.CRC 810 self._running_crc = crc32(b'') 811 else: 812 self._expected_crc = None 813 814 self._seekable = False 815 try: 816 if fileobj.seekable(): 817 self._orig_compress_start = fileobj.tell() 818 self._orig_compress_size = zipinfo.compress_size 819 self._orig_file_size = zipinfo.file_size 820 self._orig_start_crc = self._running_crc 821 self._seekable = True 822 except AttributeError: 823 pass 824 825 self._decrypter = None 826 if pwd: 827 if zipinfo.flag_bits & 0x8: 828 # compare against the file type from extended local headers 829 check_byte = (zipinfo._raw_time >> 8) & 0xff 830 else: 831 # compare against the CRC otherwise 832 check_byte = (zipinfo.CRC >> 24) & 0xff 833 h = self._init_decrypter() 834 if h != check_byte: 835 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 836 837 838 def _init_decrypter(self): 839 self._decrypter = _ZipDecrypter(self._pwd) 840 # The first 12 bytes in the cypher stream is an encryption header 841 # used to strengthen the algorithm. The first 11 bytes are 842 # completely random, while the 12th contains the MSB of the CRC, 843 # or the MSB of the file time depending on the header type 844 # and is used to check the correctness of the password. 845 header = self._fileobj.read(12) 846 self._compress_left -= 12 847 return self._decrypter(header)[11] 848 849 def __repr__(self): 850 result = ['<%s.%s' % (self.__class__.__module__, 851 self.__class__.__qualname__)] 852 if not self.closed: 853 result.append(' name=%r mode=%r' % (self.name, self.mode)) 854 if self._compress_type != ZIP_STORED: 855 result.append(' compress_type=%s' % 856 compressor_names.get(self._compress_type, 857 self._compress_type)) 858 else: 859 result.append(' [closed]') 860 result.append('>') 861 return ''.join(result) 862 863 def readline(self, limit=-1): 864 """Read and return a line from the stream. 865 866 If limit is specified, at most limit bytes will be read. 867 """ 868 869 if limit < 0: 870 # Shortcut common case - newline found in buffer. 871 i = self._readbuffer.find(b'\n', self._offset) + 1 872 if i > 0: 873 line = self._readbuffer[self._offset: i] 874 self._offset = i 875 return line 876 877 return io.BufferedIOBase.readline(self, limit) 878 879 def peek(self, n=1): 880 """Returns buffered bytes without advancing the position.""" 881 if n > len(self._readbuffer) - self._offset: 882 chunk = self.read(n) 883 if len(chunk) > self._offset: 884 self._readbuffer = chunk + self._readbuffer[self._offset:] 885 self._offset = 0 886 else: 887 self._offset -= len(chunk) 888 889 # Return up to 512 bytes to reduce allocation overhead for tight loops. 890 return self._readbuffer[self._offset: self._offset + 512] 891 892 def readable(self): 893 if self.closed: 894 raise ValueError("I/O operation on closed file.") 895 return True 896 897 def read(self, n=-1): 898 """Read and return up to n bytes. 899 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 900 """ 901 if self.closed: 902 raise ValueError("read from closed file.") 903 if n is None or n < 0: 904 buf = self._readbuffer[self._offset:] 905 self._readbuffer = b'' 906 self._offset = 0 907 while not self._eof: 908 buf += self._read1(self.MAX_N) 909 return buf 910 911 end = n + self._offset 912 if end < len(self._readbuffer): 913 buf = self._readbuffer[self._offset:end] 914 self._offset = end 915 return buf 916 917 n = end - len(self._readbuffer) 918 buf = self._readbuffer[self._offset:] 919 self._readbuffer = b'' 920 self._offset = 0 921 while n > 0 and not self._eof: 922 data = self._read1(n) 923 if n < len(data): 924 self._readbuffer = data 925 self._offset = n 926 buf += data[:n] 927 break 928 buf += data 929 n -= len(data) 930 return buf 931 932 def _update_crc(self, newdata): 933 # Update the CRC using the given data. 934 if self._expected_crc is None: 935 # No need to compute the CRC if we don't have a reference value 936 return 937 self._running_crc = crc32(newdata, self._running_crc) 938 # Check the CRC if we're at the end of the file 939 if self._eof and self._running_crc != self._expected_crc: 940 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 941 942 def read1(self, n): 943 """Read up to n bytes with at most one read() system call.""" 944 945 if n is None or n < 0: 946 buf = self._readbuffer[self._offset:] 947 self._readbuffer = b'' 948 self._offset = 0 949 while not self._eof: 950 data = self._read1(self.MAX_N) 951 if data: 952 buf += data 953 break 954 return buf 955 956 end = n + self._offset 957 if end < len(self._readbuffer): 958 buf = self._readbuffer[self._offset:end] 959 self._offset = end 960 return buf 961 962 n = end - len(self._readbuffer) 963 buf = self._readbuffer[self._offset:] 964 self._readbuffer = b'' 965 self._offset = 0 966 if n > 0: 967 while not self._eof: 968 data = self._read1(n) 969 if n < len(data): 970 self._readbuffer = data 971 self._offset = n 972 buf += data[:n] 973 break 974 if data: 975 buf += data 976 break 977 return buf 978 979 def _read1(self, n): 980 # Read up to n compressed bytes with at most one read() system call, 981 # decrypt and decompress them. 982 if self._eof or n <= 0: 983 return b'' 984 985 # Read from file. 986 if self._compress_type == ZIP_DEFLATED: 987 ## Handle unconsumed data. 988 data = self._decompressor.unconsumed_tail 989 if n > len(data): 990 data += self._read2(n - len(data)) 991 else: 992 data = self._read2(n) 993 994 if self._compress_type == ZIP_STORED: 995 self._eof = self._compress_left <= 0 996 elif self._compress_type == ZIP_DEFLATED: 997 n = max(n, self.MIN_READ_SIZE) 998 data = self._decompressor.decompress(data, n) 999 self._eof = (self._decompressor.eof or 1000 self._compress_left <= 0 and 1001 not self._decompressor.unconsumed_tail) 1002 if self._eof: 1003 data += self._decompressor.flush() 1004 else: 1005 data = self._decompressor.decompress(data) 1006 self._eof = self._decompressor.eof or self._compress_left <= 0 1007 1008 data = data[:self._left] 1009 self._left -= len(data) 1010 if self._left <= 0: 1011 self._eof = True 1012 self._update_crc(data) 1013 return data 1014 1015 def _read2(self, n): 1016 if self._compress_left <= 0: 1017 return b'' 1018 1019 n = max(n, self.MIN_READ_SIZE) 1020 n = min(n, self._compress_left) 1021 1022 data = self._fileobj.read(n) 1023 self._compress_left -= len(data) 1024 if not data: 1025 raise EOFError 1026 1027 if self._decrypter is not None: 1028 data = self._decrypter(data) 1029 return data 1030 1031 def close(self): 1032 try: 1033 if self._close_fileobj: 1034 self._fileobj.close() 1035 finally: 1036 super().close() 1037 1038 def seekable(self): 1039 if self.closed: 1040 raise ValueError("I/O operation on closed file.") 1041 return self._seekable 1042 1043 def seek(self, offset, whence=0): 1044 if self.closed: 1045 raise ValueError("seek on closed file.") 1046 if not self._seekable: 1047 raise io.UnsupportedOperation("underlying stream is not seekable") 1048 curr_pos = self.tell() 1049 if whence == 0: # Seek from start of file 1050 new_pos = offset 1051 elif whence == 1: # Seek from current position 1052 new_pos = curr_pos + offset 1053 elif whence == 2: # Seek from EOF 1054 new_pos = self._orig_file_size + offset 1055 else: 1056 raise ValueError("whence must be os.SEEK_SET (0), " 1057 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1058 1059 if new_pos > self._orig_file_size: 1060 new_pos = self._orig_file_size 1061 1062 if new_pos < 0: 1063 new_pos = 0 1064 1065 read_offset = new_pos - curr_pos 1066 buff_offset = read_offset + self._offset 1067 1068 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1069 # Just move the _offset index if the new position is in the _readbuffer 1070 self._offset = buff_offset 1071 read_offset = 0 1072 elif read_offset < 0: 1073 # Position is before the current position. Reset the ZipExtFile 1074 self._fileobj.seek(self._orig_compress_start) 1075 self._running_crc = self._orig_start_crc 1076 self._compress_left = self._orig_compress_size 1077 self._left = self._orig_file_size 1078 self._readbuffer = b'' 1079 self._offset = 0 1080 self._decompressor = _get_decompressor(self._compress_type) 1081 self._eof = False 1082 read_offset = new_pos 1083 if self._decrypter is not None: 1084 self._init_decrypter() 1085 1086 while read_offset > 0: 1087 read_len = min(self.MAX_SEEK_READ, read_offset) 1088 self.read(read_len) 1089 read_offset -= read_len 1090 1091 return self.tell() 1092 1093 def tell(self): 1094 if self.closed: 1095 raise ValueError("tell on closed file.") 1096 if not self._seekable: 1097 raise io.UnsupportedOperation("underlying stream is not seekable") 1098 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1099 return filepos 1100 1101 1102class _ZipWriteFile(io.BufferedIOBase): 1103 def __init__(self, zf, zinfo, zip64): 1104 self._zinfo = zinfo 1105 self._zip64 = zip64 1106 self._zipfile = zf 1107 self._compressor = _get_compressor(zinfo.compress_type, 1108 zinfo._compresslevel) 1109 self._file_size = 0 1110 self._compress_size = 0 1111 self._crc = 0 1112 1113 @property 1114 def _fileobj(self): 1115 return self._zipfile.fp 1116 1117 def writable(self): 1118 return True 1119 1120 def write(self, data): 1121 if self.closed: 1122 raise ValueError('I/O operation on closed file.') 1123 nbytes = len(data) 1124 self._file_size += nbytes 1125 self._crc = crc32(data, self._crc) 1126 if self._compressor: 1127 data = self._compressor.compress(data) 1128 self._compress_size += len(data) 1129 self._fileobj.write(data) 1130 return nbytes 1131 1132 def close(self): 1133 if self.closed: 1134 return 1135 try: 1136 super().close() 1137 # Flush any data from the compressor, and update header info 1138 if self._compressor: 1139 buf = self._compressor.flush() 1140 self._compress_size += len(buf) 1141 self._fileobj.write(buf) 1142 self._zinfo.compress_size = self._compress_size 1143 else: 1144 self._zinfo.compress_size = self._file_size 1145 self._zinfo.CRC = self._crc 1146 self._zinfo.file_size = self._file_size 1147 1148 # Write updated header info 1149 if self._zinfo.flag_bits & 0x08: 1150 # Write CRC and file sizes after the file data 1151 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1152 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1153 self._zinfo.compress_size, self._zinfo.file_size)) 1154 self._zipfile.start_dir = self._fileobj.tell() 1155 else: 1156 if not self._zip64: 1157 if self._file_size > ZIP64_LIMIT: 1158 raise RuntimeError( 1159 'File size unexpectedly exceeded ZIP64 limit') 1160 if self._compress_size > ZIP64_LIMIT: 1161 raise RuntimeError( 1162 'Compressed size unexpectedly exceeded ZIP64 limit') 1163 # Seek backwards and write file header (which will now include 1164 # correct CRC and file sizes) 1165 1166 # Preserve current position in file 1167 self._zipfile.start_dir = self._fileobj.tell() 1168 self._fileobj.seek(self._zinfo.header_offset) 1169 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1170 self._fileobj.seek(self._zipfile.start_dir) 1171 1172 # Successfully written: Add file to our caches 1173 self._zipfile.filelist.append(self._zinfo) 1174 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1175 finally: 1176 self._zipfile._writing = False 1177 1178 1179 1180class ZipFile: 1181 """ Class with methods to open, read, write, close, list zip files. 1182 1183 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1184 compresslevel=None) 1185 1186 file: Either the path to the file, or a file-like object. 1187 If it is a path, the file will be opened and closed by ZipFile. 1188 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1189 or append 'a'. 1190 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1191 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1192 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1193 needed, otherwise it will raise an exception when this would 1194 be necessary. 1195 compresslevel: None (default for the given compression type) or an integer 1196 specifying the level to pass to the compressor. 1197 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1198 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1199 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1200 1201 """ 1202 1203 fp = None # Set here since __del__ checks it 1204 _windows_illegal_name_trans_table = None 1205 1206 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1207 compresslevel=None, *, strict_timestamps=True): 1208 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1209 or append 'a'.""" 1210 if mode not in ('r', 'w', 'x', 'a'): 1211 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1212 1213 _check_compression(compression) 1214 1215 self._allowZip64 = allowZip64 1216 self._didModify = False 1217 self.debug = 0 # Level of printing: 0 through 3 1218 self.NameToInfo = {} # Find file info given name 1219 self.filelist = [] # List of ZipInfo instances for archive 1220 self.compression = compression # Method of compression 1221 self.compresslevel = compresslevel 1222 self.mode = mode 1223 self.pwd = None 1224 self._comment = b'' 1225 self._strict_timestamps = strict_timestamps 1226 1227 # Check if we were passed a file-like object 1228 if isinstance(file, os.PathLike): 1229 file = os.fspath(file) 1230 if isinstance(file, str): 1231 # No, it's a filename 1232 self._filePassed = 0 1233 self.filename = file 1234 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1235 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1236 filemode = modeDict[mode] 1237 while True: 1238 try: 1239 self.fp = io.open(file, filemode) 1240 except OSError: 1241 if filemode in modeDict: 1242 filemode = modeDict[filemode] 1243 continue 1244 raise 1245 break 1246 else: 1247 self._filePassed = 1 1248 self.fp = file 1249 self.filename = getattr(file, 'name', None) 1250 self._fileRefCnt = 1 1251 self._lock = threading.RLock() 1252 self._seekable = True 1253 self._writing = False 1254 1255 try: 1256 if mode == 'r': 1257 self._RealGetContents() 1258 elif mode in ('w', 'x'): 1259 # set the modified flag so central directory gets written 1260 # even if no files are added to the archive 1261 self._didModify = True 1262 try: 1263 self.start_dir = self.fp.tell() 1264 except (AttributeError, OSError): 1265 self.fp = _Tellable(self.fp) 1266 self.start_dir = 0 1267 self._seekable = False 1268 else: 1269 # Some file-like objects can provide tell() but not seek() 1270 try: 1271 self.fp.seek(self.start_dir) 1272 except (AttributeError, OSError): 1273 self._seekable = False 1274 elif mode == 'a': 1275 try: 1276 # See if file is a zip file 1277 self._RealGetContents() 1278 # seek to start of directory and overwrite 1279 self.fp.seek(self.start_dir) 1280 except BadZipFile: 1281 # file is not a zip file, just append 1282 self.fp.seek(0, 2) 1283 1284 # set the modified flag so central directory gets written 1285 # even if no files are added to the archive 1286 self._didModify = True 1287 self.start_dir = self.fp.tell() 1288 else: 1289 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1290 except: 1291 fp = self.fp 1292 self.fp = None 1293 self._fpclose(fp) 1294 raise 1295 1296 def __enter__(self): 1297 return self 1298 1299 def __exit__(self, type, value, traceback): 1300 self.close() 1301 1302 def __repr__(self): 1303 result = ['<%s.%s' % (self.__class__.__module__, 1304 self.__class__.__qualname__)] 1305 if self.fp is not None: 1306 if self._filePassed: 1307 result.append(' file=%r' % self.fp) 1308 elif self.filename is not None: 1309 result.append(' filename=%r' % self.filename) 1310 result.append(' mode=%r' % self.mode) 1311 else: 1312 result.append(' [closed]') 1313 result.append('>') 1314 return ''.join(result) 1315 1316 def _RealGetContents(self): 1317 """Read in the table of contents for the ZIP file.""" 1318 fp = self.fp 1319 try: 1320 endrec = _EndRecData(fp) 1321 except OSError: 1322 raise BadZipFile("File is not a zip file") 1323 if not endrec: 1324 raise BadZipFile("File is not a zip file") 1325 if self.debug > 1: 1326 print(endrec) 1327 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1328 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1329 self._comment = endrec[_ECD_COMMENT] # archive comment 1330 1331 # "concat" is zero, unless zip was concatenated to another file 1332 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1333 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1334 # If Zip64 extension structures are present, account for them 1335 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1336 1337 if self.debug > 2: 1338 inferred = concat + offset_cd 1339 print("given, inferred, offset", offset_cd, inferred, concat) 1340 # self.start_dir: Position of start of central directory 1341 self.start_dir = offset_cd + concat 1342 fp.seek(self.start_dir, 0) 1343 data = fp.read(size_cd) 1344 fp = io.BytesIO(data) 1345 total = 0 1346 while total < size_cd: 1347 centdir = fp.read(sizeCentralDir) 1348 if len(centdir) != sizeCentralDir: 1349 raise BadZipFile("Truncated central directory") 1350 centdir = struct.unpack(structCentralDir, centdir) 1351 if centdir[_CD_SIGNATURE] != stringCentralDir: 1352 raise BadZipFile("Bad magic number for central directory") 1353 if self.debug > 2: 1354 print(centdir) 1355 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1356 flags = centdir[5] 1357 if flags & 0x800: 1358 # UTF-8 file names extension 1359 filename = filename.decode('utf-8') 1360 else: 1361 # Historical ZIP filename encoding 1362 filename = filename.decode('cp437') 1363 # Create ZipInfo instance to store file information 1364 x = ZipInfo(filename) 1365 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1366 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1367 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1368 (x.create_version, x.create_system, x.extract_version, x.reserved, 1369 x.flag_bits, x.compress_type, t, d, 1370 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1371 if x.extract_version > MAX_EXTRACT_VERSION: 1372 raise NotImplementedError("zip file version %.1f" % 1373 (x.extract_version / 10)) 1374 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1375 # Convert date/time code to (year, month, day, hour, min, sec) 1376 x._raw_time = t 1377 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1378 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1379 1380 x._decodeExtra() 1381 x.header_offset = x.header_offset + concat 1382 self.filelist.append(x) 1383 self.NameToInfo[x.filename] = x 1384 1385 # update total bytes read from central directory 1386 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1387 + centdir[_CD_EXTRA_FIELD_LENGTH] 1388 + centdir[_CD_COMMENT_LENGTH]) 1389 1390 if self.debug > 2: 1391 print("total", total) 1392 1393 1394 def namelist(self): 1395 """Return a list of file names in the archive.""" 1396 return [data.filename for data in self.filelist] 1397 1398 def infolist(self): 1399 """Return a list of class ZipInfo instances for files in the 1400 archive.""" 1401 return self.filelist 1402 1403 def printdir(self, file=None): 1404 """Print a table of contents for the zip file.""" 1405 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1406 file=file) 1407 for zinfo in self.filelist: 1408 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1409 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1410 file=file) 1411 1412 def testzip(self): 1413 """Read all the files and check the CRC.""" 1414 chunk_size = 2 ** 20 1415 for zinfo in self.filelist: 1416 try: 1417 # Read by chunks, to avoid an OverflowError or a 1418 # MemoryError with very large embedded files. 1419 with self.open(zinfo.filename, "r") as f: 1420 while f.read(chunk_size): # Check CRC-32 1421 pass 1422 except BadZipFile: 1423 return zinfo.filename 1424 1425 def getinfo(self, name): 1426 """Return the instance of ZipInfo given 'name'.""" 1427 info = self.NameToInfo.get(name) 1428 if info is None: 1429 raise KeyError( 1430 'There is no item named %r in the archive' % name) 1431 1432 return info 1433 1434 def setpassword(self, pwd): 1435 """Set default password for encrypted files.""" 1436 if pwd and not isinstance(pwd, bytes): 1437 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1438 if pwd: 1439 self.pwd = pwd 1440 else: 1441 self.pwd = None 1442 1443 @property 1444 def comment(self): 1445 """The comment text associated with the ZIP file.""" 1446 return self._comment 1447 1448 @comment.setter 1449 def comment(self, comment): 1450 if not isinstance(comment, bytes): 1451 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1452 # check for valid comment length 1453 if len(comment) > ZIP_MAX_COMMENT: 1454 import warnings 1455 warnings.warn('Archive comment is too long; truncating to %d bytes' 1456 % ZIP_MAX_COMMENT, stacklevel=2) 1457 comment = comment[:ZIP_MAX_COMMENT] 1458 self._comment = comment 1459 self._didModify = True 1460 1461 def read(self, name, pwd=None): 1462 """Return file bytes for name.""" 1463 with self.open(name, "r", pwd) as fp: 1464 return fp.read() 1465 1466 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1467 """Return file-like object for 'name'. 1468 1469 name is a string for the file name within the ZIP file, or a ZipInfo 1470 object. 1471 1472 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1473 write to a file newly added to the archive. 1474 1475 pwd is the password to decrypt files (only used for reading). 1476 1477 When writing, if the file size is not known in advance but may exceed 1478 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1479 files. If the size is known in advance, it is best to pass a ZipInfo 1480 instance for name, with zinfo.file_size set. 1481 """ 1482 if mode not in {"r", "w"}: 1483 raise ValueError('open() requires mode "r" or "w"') 1484 if pwd and not isinstance(pwd, bytes): 1485 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1486 if pwd and (mode == "w"): 1487 raise ValueError("pwd is only supported for reading files") 1488 if not self.fp: 1489 raise ValueError( 1490 "Attempt to use ZIP archive that was already closed") 1491 1492 # Make sure we have an info object 1493 if isinstance(name, ZipInfo): 1494 # 'name' is already an info object 1495 zinfo = name 1496 elif mode == 'w': 1497 zinfo = ZipInfo(name) 1498 zinfo.compress_type = self.compression 1499 zinfo._compresslevel = self.compresslevel 1500 else: 1501 # Get info object for name 1502 zinfo = self.getinfo(name) 1503 1504 if mode == 'w': 1505 return self._open_to_write(zinfo, force_zip64=force_zip64) 1506 1507 if self._writing: 1508 raise ValueError("Can't read from the ZIP file while there " 1509 "is an open writing handle on it. " 1510 "Close the writing handle before trying to read.") 1511 1512 # Open for reading: 1513 self._fileRefCnt += 1 1514 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1515 self._fpclose, self._lock, lambda: self._writing) 1516 try: 1517 # Skip the file header: 1518 fheader = zef_file.read(sizeFileHeader) 1519 if len(fheader) != sizeFileHeader: 1520 raise BadZipFile("Truncated file header") 1521 fheader = struct.unpack(structFileHeader, fheader) 1522 if fheader[_FH_SIGNATURE] != stringFileHeader: 1523 raise BadZipFile("Bad magic number for file header") 1524 1525 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1526 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1527 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1528 1529 if zinfo.flag_bits & 0x20: 1530 # Zip 2.7: compressed patched data 1531 raise NotImplementedError("compressed patched data (flag bit 5)") 1532 1533 if zinfo.flag_bits & 0x40: 1534 # strong encryption 1535 raise NotImplementedError("strong encryption (flag bit 6)") 1536 1537 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800: 1538 # UTF-8 filename 1539 fname_str = fname.decode("utf-8") 1540 else: 1541 fname_str = fname.decode("cp437") 1542 1543 if fname_str != zinfo.orig_filename: 1544 raise BadZipFile( 1545 'File name in directory %r and header %r differ.' 1546 % (zinfo.orig_filename, fname)) 1547 1548 # check for encrypted flag & handle password 1549 is_encrypted = zinfo.flag_bits & 0x1 1550 if is_encrypted: 1551 if not pwd: 1552 pwd = self.pwd 1553 if not pwd: 1554 raise RuntimeError("File %r is encrypted, password " 1555 "required for extraction" % name) 1556 else: 1557 pwd = None 1558 1559 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1560 except: 1561 zef_file.close() 1562 raise 1563 1564 def _open_to_write(self, zinfo, force_zip64=False): 1565 if force_zip64 and not self._allowZip64: 1566 raise ValueError( 1567 "force_zip64 is True, but allowZip64 was False when opening " 1568 "the ZIP file." 1569 ) 1570 if self._writing: 1571 raise ValueError("Can't write to the ZIP file while there is " 1572 "another write handle open on it. " 1573 "Close the first handle before opening another.") 1574 1575 # Size and CRC are overwritten with correct data after processing the file 1576 zinfo.compress_size = 0 1577 zinfo.CRC = 0 1578 1579 zinfo.flag_bits = 0x00 1580 if zinfo.compress_type == ZIP_LZMA: 1581 # Compressed data includes an end-of-stream (EOS) marker 1582 zinfo.flag_bits |= 0x02 1583 if not self._seekable: 1584 zinfo.flag_bits |= 0x08 1585 1586 if not zinfo.external_attr: 1587 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1588 1589 # Compressed size can be larger than uncompressed size 1590 zip64 = self._allowZip64 and \ 1591 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) 1592 1593 if self._seekable: 1594 self.fp.seek(self.start_dir) 1595 zinfo.header_offset = self.fp.tell() 1596 1597 self._writecheck(zinfo) 1598 self._didModify = True 1599 1600 self.fp.write(zinfo.FileHeader(zip64)) 1601 1602 self._writing = True 1603 return _ZipWriteFile(self, zinfo, zip64) 1604 1605 def extract(self, member, path=None, pwd=None): 1606 """Extract a member from the archive to the current working directory, 1607 using its full name. Its file information is extracted as accurately 1608 as possible. `member' may be a filename or a ZipInfo object. You can 1609 specify a different directory using `path'. 1610 """ 1611 if path is None: 1612 path = os.getcwd() 1613 else: 1614 path = os.fspath(path) 1615 1616 return self._extract_member(member, path, pwd) 1617 1618 def extractall(self, path=None, members=None, pwd=None): 1619 """Extract all members from the archive to the current working 1620 directory. `path' specifies a different directory to extract to. 1621 `members' is optional and must be a subset of the list returned 1622 by namelist(). 1623 """ 1624 if members is None: 1625 members = self.namelist() 1626 1627 if path is None: 1628 path = os.getcwd() 1629 else: 1630 path = os.fspath(path) 1631 1632 for zipinfo in members: 1633 self._extract_member(zipinfo, path, pwd) 1634 1635 @classmethod 1636 def _sanitize_windows_name(cls, arcname, pathsep): 1637 """Replace bad characters and remove trailing dots from parts.""" 1638 table = cls._windows_illegal_name_trans_table 1639 if not table: 1640 illegal = ':<>|"?*' 1641 table = str.maketrans(illegal, '_' * len(illegal)) 1642 cls._windows_illegal_name_trans_table = table 1643 arcname = arcname.translate(table) 1644 # remove trailing dots 1645 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1646 # rejoin, removing empty parts. 1647 arcname = pathsep.join(x for x in arcname if x) 1648 return arcname 1649 1650 def _extract_member(self, member, targetpath, pwd): 1651 """Extract the ZipInfo object 'member' to a physical 1652 file on the path targetpath. 1653 """ 1654 if not isinstance(member, ZipInfo): 1655 member = self.getinfo(member) 1656 1657 # build the destination pathname, replacing 1658 # forward slashes to platform specific separators. 1659 arcname = member.filename.replace('/', os.path.sep) 1660 1661 if os.path.altsep: 1662 arcname = arcname.replace(os.path.altsep, os.path.sep) 1663 # interpret absolute pathname as relative, remove drive letter or 1664 # UNC path, redundant separators, "." and ".." components. 1665 arcname = os.path.splitdrive(arcname)[1] 1666 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1667 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1668 if x not in invalid_path_parts) 1669 if os.path.sep == '\\': 1670 # filter illegal characters on Windows 1671 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1672 1673 targetpath = os.path.join(targetpath, arcname) 1674 targetpath = os.path.normpath(targetpath) 1675 1676 # Create all upper directories if necessary. 1677 upperdirs = os.path.dirname(targetpath) 1678 if upperdirs and not os.path.exists(upperdirs): 1679 os.makedirs(upperdirs) 1680 1681 if member.is_dir(): 1682 if not os.path.isdir(targetpath): 1683 os.mkdir(targetpath) 1684 return targetpath 1685 1686 with self.open(member, pwd=pwd) as source, \ 1687 open(targetpath, "wb") as target: 1688 shutil.copyfileobj(source, target) 1689 1690 return targetpath 1691 1692 def _writecheck(self, zinfo): 1693 """Check for errors before writing a file to the archive.""" 1694 if zinfo.filename in self.NameToInfo: 1695 import warnings 1696 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1697 if self.mode not in ('w', 'x', 'a'): 1698 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1699 if not self.fp: 1700 raise ValueError( 1701 "Attempt to write ZIP archive that was already closed") 1702 _check_compression(zinfo.compress_type) 1703 if not self._allowZip64: 1704 requires_zip64 = None 1705 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1706 requires_zip64 = "Files count" 1707 elif zinfo.file_size > ZIP64_LIMIT: 1708 requires_zip64 = "Filesize" 1709 elif zinfo.header_offset > ZIP64_LIMIT: 1710 requires_zip64 = "Zipfile size" 1711 if requires_zip64: 1712 raise LargeZipFile(requires_zip64 + 1713 " would require ZIP64 extensions") 1714 1715 def write(self, filename, arcname=None, 1716 compress_type=None, compresslevel=None): 1717 """Put the bytes from filename into the archive under the name 1718 arcname.""" 1719 if not self.fp: 1720 raise ValueError( 1721 "Attempt to write to ZIP archive that was already closed") 1722 if self._writing: 1723 raise ValueError( 1724 "Can't write to ZIP archive while an open writing handle exists" 1725 ) 1726 1727 zinfo = ZipInfo.from_file(filename, arcname, 1728 strict_timestamps=self._strict_timestamps) 1729 1730 if zinfo.is_dir(): 1731 zinfo.compress_size = 0 1732 zinfo.CRC = 0 1733 else: 1734 if compress_type is not None: 1735 zinfo.compress_type = compress_type 1736 else: 1737 zinfo.compress_type = self.compression 1738 1739 if compresslevel is not None: 1740 zinfo._compresslevel = compresslevel 1741 else: 1742 zinfo._compresslevel = self.compresslevel 1743 1744 if zinfo.is_dir(): 1745 with self._lock: 1746 if self._seekable: 1747 self.fp.seek(self.start_dir) 1748 zinfo.header_offset = self.fp.tell() # Start of header bytes 1749 if zinfo.compress_type == ZIP_LZMA: 1750 # Compressed data includes an end-of-stream (EOS) marker 1751 zinfo.flag_bits |= 0x02 1752 1753 self._writecheck(zinfo) 1754 self._didModify = True 1755 1756 self.filelist.append(zinfo) 1757 self.NameToInfo[zinfo.filename] = zinfo 1758 self.fp.write(zinfo.FileHeader(False)) 1759 self.start_dir = self.fp.tell() 1760 else: 1761 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1762 shutil.copyfileobj(src, dest, 1024*8) 1763 1764 def writestr(self, zinfo_or_arcname, data, 1765 compress_type=None, compresslevel=None): 1766 """Write a file into the archive. The contents is 'data', which 1767 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1768 it is encoded as UTF-8 first. 1769 'zinfo_or_arcname' is either a ZipInfo instance or 1770 the name of the file in the archive.""" 1771 if isinstance(data, str): 1772 data = data.encode("utf-8") 1773 if not isinstance(zinfo_or_arcname, ZipInfo): 1774 zinfo = ZipInfo(filename=zinfo_or_arcname, 1775 date_time=time.localtime(time.time())[:6]) 1776 zinfo.compress_type = self.compression 1777 zinfo._compresslevel = self.compresslevel 1778 if zinfo.filename[-1] == '/': 1779 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1780 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1781 else: 1782 zinfo.external_attr = 0o600 << 16 # ?rw------- 1783 else: 1784 zinfo = zinfo_or_arcname 1785 1786 if not self.fp: 1787 raise ValueError( 1788 "Attempt to write to ZIP archive that was already closed") 1789 if self._writing: 1790 raise ValueError( 1791 "Can't write to ZIP archive while an open writing handle exists." 1792 ) 1793 1794 if compress_type is not None: 1795 zinfo.compress_type = compress_type 1796 1797 if compresslevel is not None: 1798 zinfo._compresslevel = compresslevel 1799 1800 zinfo.file_size = len(data) # Uncompressed size 1801 with self._lock: 1802 with self.open(zinfo, mode='w') as dest: 1803 dest.write(data) 1804 1805 def __del__(self): 1806 """Call the "close()" method in case the user forgot.""" 1807 self.close() 1808 1809 def close(self): 1810 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1811 records.""" 1812 if self.fp is None: 1813 return 1814 1815 if self._writing: 1816 raise ValueError("Can't close the ZIP file while there is " 1817 "an open writing handle on it. " 1818 "Close the writing handle before closing the zip.") 1819 1820 try: 1821 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1822 with self._lock: 1823 if self._seekable: 1824 self.fp.seek(self.start_dir) 1825 self._write_end_record() 1826 finally: 1827 fp = self.fp 1828 self.fp = None 1829 self._fpclose(fp) 1830 1831 def _write_end_record(self): 1832 for zinfo in self.filelist: # write central directory 1833 dt = zinfo.date_time 1834 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1835 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1836 extra = [] 1837 if zinfo.file_size > ZIP64_LIMIT \ 1838 or zinfo.compress_size > ZIP64_LIMIT: 1839 extra.append(zinfo.file_size) 1840 extra.append(zinfo.compress_size) 1841 file_size = 0xffffffff 1842 compress_size = 0xffffffff 1843 else: 1844 file_size = zinfo.file_size 1845 compress_size = zinfo.compress_size 1846 1847 if zinfo.header_offset > ZIP64_LIMIT: 1848 extra.append(zinfo.header_offset) 1849 header_offset = 0xffffffff 1850 else: 1851 header_offset = zinfo.header_offset 1852 1853 extra_data = zinfo.extra 1854 min_version = 0 1855 if extra: 1856 # Append a ZIP64 field to the extra's 1857 extra_data = _strip_extra(extra_data, (1,)) 1858 extra_data = struct.pack( 1859 '<HH' + 'Q'*len(extra), 1860 1, 8*len(extra), *extra) + extra_data 1861 1862 min_version = ZIP64_VERSION 1863 1864 if zinfo.compress_type == ZIP_BZIP2: 1865 min_version = max(BZIP2_VERSION, min_version) 1866 elif zinfo.compress_type == ZIP_LZMA: 1867 min_version = max(LZMA_VERSION, min_version) 1868 1869 extract_version = max(min_version, zinfo.extract_version) 1870 create_version = max(min_version, zinfo.create_version) 1871 filename, flag_bits = zinfo._encodeFilenameFlags() 1872 centdir = struct.pack(structCentralDir, 1873 stringCentralDir, create_version, 1874 zinfo.create_system, extract_version, zinfo.reserved, 1875 flag_bits, zinfo.compress_type, dostime, dosdate, 1876 zinfo.CRC, compress_size, file_size, 1877 len(filename), len(extra_data), len(zinfo.comment), 1878 0, zinfo.internal_attr, zinfo.external_attr, 1879 header_offset) 1880 self.fp.write(centdir) 1881 self.fp.write(filename) 1882 self.fp.write(extra_data) 1883 self.fp.write(zinfo.comment) 1884 1885 pos2 = self.fp.tell() 1886 # Write end-of-zip-archive record 1887 centDirCount = len(self.filelist) 1888 centDirSize = pos2 - self.start_dir 1889 centDirOffset = self.start_dir 1890 requires_zip64 = None 1891 if centDirCount > ZIP_FILECOUNT_LIMIT: 1892 requires_zip64 = "Files count" 1893 elif centDirOffset > ZIP64_LIMIT: 1894 requires_zip64 = "Central directory offset" 1895 elif centDirSize > ZIP64_LIMIT: 1896 requires_zip64 = "Central directory size" 1897 if requires_zip64: 1898 # Need to write the ZIP64 end-of-archive records 1899 if not self._allowZip64: 1900 raise LargeZipFile(requires_zip64 + 1901 " would require ZIP64 extensions") 1902 zip64endrec = struct.pack( 1903 structEndArchive64, stringEndArchive64, 1904 44, 45, 45, 0, 0, centDirCount, centDirCount, 1905 centDirSize, centDirOffset) 1906 self.fp.write(zip64endrec) 1907 1908 zip64locrec = struct.pack( 1909 structEndArchive64Locator, 1910 stringEndArchive64Locator, 0, pos2, 1) 1911 self.fp.write(zip64locrec) 1912 centDirCount = min(centDirCount, 0xFFFF) 1913 centDirSize = min(centDirSize, 0xFFFFFFFF) 1914 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1915 1916 endrec = struct.pack(structEndArchive, stringEndArchive, 1917 0, 0, centDirCount, centDirCount, 1918 centDirSize, centDirOffset, len(self._comment)) 1919 self.fp.write(endrec) 1920 self.fp.write(self._comment) 1921 if self.mode == "a": 1922 self.fp.truncate() 1923 self.fp.flush() 1924 1925 def _fpclose(self, fp): 1926 assert self._fileRefCnt > 0 1927 self._fileRefCnt -= 1 1928 if not self._fileRefCnt and not self._filePassed: 1929 fp.close() 1930 1931 1932class PyZipFile(ZipFile): 1933 """Class to create ZIP archives with Python library files and packages.""" 1934 1935 def __init__(self, file, mode="r", compression=ZIP_STORED, 1936 allowZip64=True, optimize=-1): 1937 ZipFile.__init__(self, file, mode=mode, compression=compression, 1938 allowZip64=allowZip64) 1939 self._optimize = optimize 1940 1941 def writepy(self, pathname, basename="", filterfunc=None): 1942 """Add all files from "pathname" to the ZIP archive. 1943 1944 If pathname is a package directory, search the directory and 1945 all package subdirectories recursively for all *.py and enter 1946 the modules into the archive. If pathname is a plain 1947 directory, listdir *.py and enter all modules. Else, pathname 1948 must be a Python *.py file and the module will be put into the 1949 archive. Added modules are always module.pyc. 1950 This method will compile the module.py into module.pyc if 1951 necessary. 1952 If filterfunc(pathname) is given, it is called with every argument. 1953 When it is False, the file or directory is skipped. 1954 """ 1955 pathname = os.fspath(pathname) 1956 if filterfunc and not filterfunc(pathname): 1957 if self.debug: 1958 label = 'path' if os.path.isdir(pathname) else 'file' 1959 print('%s %r skipped by filterfunc' % (label, pathname)) 1960 return 1961 dir, name = os.path.split(pathname) 1962 if os.path.isdir(pathname): 1963 initname = os.path.join(pathname, "__init__.py") 1964 if os.path.isfile(initname): 1965 # This is a package directory, add it 1966 if basename: 1967 basename = "%s/%s" % (basename, name) 1968 else: 1969 basename = name 1970 if self.debug: 1971 print("Adding package in", pathname, "as", basename) 1972 fname, arcname = self._get_codename(initname[0:-3], basename) 1973 if self.debug: 1974 print("Adding", arcname) 1975 self.write(fname, arcname) 1976 dirlist = sorted(os.listdir(pathname)) 1977 dirlist.remove("__init__.py") 1978 # Add all *.py files and package subdirectories 1979 for filename in dirlist: 1980 path = os.path.join(pathname, filename) 1981 root, ext = os.path.splitext(filename) 1982 if os.path.isdir(path): 1983 if os.path.isfile(os.path.join(path, "__init__.py")): 1984 # This is a package directory, add it 1985 self.writepy(path, basename, 1986 filterfunc=filterfunc) # Recursive call 1987 elif ext == ".py": 1988 if filterfunc and not filterfunc(path): 1989 if self.debug: 1990 print('file %r skipped by filterfunc' % path) 1991 continue 1992 fname, arcname = self._get_codename(path[0:-3], 1993 basename) 1994 if self.debug: 1995 print("Adding", arcname) 1996 self.write(fname, arcname) 1997 else: 1998 # This is NOT a package directory, add its files at top level 1999 if self.debug: 2000 print("Adding files from directory", pathname) 2001 for filename in sorted(os.listdir(pathname)): 2002 path = os.path.join(pathname, filename) 2003 root, ext = os.path.splitext(filename) 2004 if ext == ".py": 2005 if filterfunc and not filterfunc(path): 2006 if self.debug: 2007 print('file %r skipped by filterfunc' % path) 2008 continue 2009 fname, arcname = self._get_codename(path[0:-3], 2010 basename) 2011 if self.debug: 2012 print("Adding", arcname) 2013 self.write(fname, arcname) 2014 else: 2015 if pathname[-3:] != ".py": 2016 raise RuntimeError( 2017 'Files added with writepy() must end with ".py"') 2018 fname, arcname = self._get_codename(pathname[0:-3], basename) 2019 if self.debug: 2020 print("Adding file", arcname) 2021 self.write(fname, arcname) 2022 2023 def _get_codename(self, pathname, basename): 2024 """Return (filename, archivename) for the path. 2025 2026 Given a module name path, return the correct file path and 2027 archive name, compiling if necessary. For example, given 2028 /python/lib/string, return (/python/lib/string.pyc, string). 2029 """ 2030 def _compile(file, optimize=-1): 2031 import py_compile 2032 if self.debug: 2033 print("Compiling", file) 2034 try: 2035 py_compile.compile(file, doraise=True, optimize=optimize) 2036 except py_compile.PyCompileError as err: 2037 print(err.msg) 2038 return False 2039 return True 2040 2041 file_py = pathname + ".py" 2042 file_pyc = pathname + ".pyc" 2043 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2044 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2045 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2046 if self._optimize == -1: 2047 # legacy mode: use whatever file is present 2048 if (os.path.isfile(file_pyc) and 2049 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2050 # Use .pyc file. 2051 arcname = fname = file_pyc 2052 elif (os.path.isfile(pycache_opt0) and 2053 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2054 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2055 # file name in the archive. 2056 fname = pycache_opt0 2057 arcname = file_pyc 2058 elif (os.path.isfile(pycache_opt1) and 2059 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2060 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2061 # file name in the archive. 2062 fname = pycache_opt1 2063 arcname = file_pyc 2064 elif (os.path.isfile(pycache_opt2) and 2065 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2066 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2067 # file name in the archive. 2068 fname = pycache_opt2 2069 arcname = file_pyc 2070 else: 2071 # Compile py into PEP 3147 pyc file. 2072 if _compile(file_py): 2073 if sys.flags.optimize == 0: 2074 fname = pycache_opt0 2075 elif sys.flags.optimize == 1: 2076 fname = pycache_opt1 2077 else: 2078 fname = pycache_opt2 2079 arcname = file_pyc 2080 else: 2081 fname = arcname = file_py 2082 else: 2083 # new mode: use given optimization level 2084 if self._optimize == 0: 2085 fname = pycache_opt0 2086 arcname = file_pyc 2087 else: 2088 arcname = file_pyc 2089 if self._optimize == 1: 2090 fname = pycache_opt1 2091 elif self._optimize == 2: 2092 fname = pycache_opt2 2093 else: 2094 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2095 raise ValueError(msg) 2096 if not (os.path.isfile(fname) and 2097 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2098 if not _compile(file_py, optimize=self._optimize): 2099 fname = arcname = file_py 2100 archivename = os.path.split(arcname)[1] 2101 if basename: 2102 archivename = "%s/%s" % (basename, archivename) 2103 return (fname, archivename) 2104 2105 2106def _parents(path): 2107 """ 2108 Given a path with elements separated by 2109 posixpath.sep, generate all parents of that path. 2110 2111 >>> list(_parents('b/d')) 2112 ['b'] 2113 >>> list(_parents('/b/d/')) 2114 ['/b'] 2115 >>> list(_parents('b/d/f/')) 2116 ['b/d', 'b'] 2117 >>> list(_parents('b')) 2118 [] 2119 >>> list(_parents('')) 2120 [] 2121 """ 2122 return itertools.islice(_ancestry(path), 1, None) 2123 2124 2125def _ancestry(path): 2126 """ 2127 Given a path with elements separated by 2128 posixpath.sep, generate all elements of that path 2129 2130 >>> list(_ancestry('b/d')) 2131 ['b/d', 'b'] 2132 >>> list(_ancestry('/b/d/')) 2133 ['/b/d', '/b'] 2134 >>> list(_ancestry('b/d/f/')) 2135 ['b/d/f', 'b/d', 'b'] 2136 >>> list(_ancestry('b')) 2137 ['b'] 2138 >>> list(_ancestry('')) 2139 [] 2140 """ 2141 path = path.rstrip(posixpath.sep) 2142 while path and path != posixpath.sep: 2143 yield path 2144 path, tail = posixpath.split(path) 2145 2146 2147_dedupe = dict.fromkeys 2148"""Deduplicate an iterable in original order""" 2149 2150 2151def _difference(minuend, subtrahend): 2152 """ 2153 Return items in minuend not in subtrahend, retaining order 2154 with O(1) lookup. 2155 """ 2156 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 2157 2158 2159class CompleteDirs(ZipFile): 2160 """ 2161 A ZipFile subclass that ensures that implied directories 2162 are always included in the namelist. 2163 """ 2164 2165 @staticmethod 2166 def _implied_dirs(names): 2167 parents = itertools.chain.from_iterable(map(_parents, names)) 2168 as_dirs = (p + posixpath.sep for p in parents) 2169 return _dedupe(_difference(as_dirs, names)) 2170 2171 def namelist(self): 2172 names = super(CompleteDirs, self).namelist() 2173 return names + list(self._implied_dirs(names)) 2174 2175 def _name_set(self): 2176 return set(self.namelist()) 2177 2178 def resolve_dir(self, name): 2179 """ 2180 If the name represents a directory, return that name 2181 as a directory (with the trailing slash). 2182 """ 2183 names = self._name_set() 2184 dirname = name + '/' 2185 dir_match = name not in names and dirname in names 2186 return dirname if dir_match else name 2187 2188 @classmethod 2189 def make(cls, source): 2190 """ 2191 Given a source (filename or zipfile), return an 2192 appropriate CompleteDirs subclass. 2193 """ 2194 if isinstance(source, CompleteDirs): 2195 return source 2196 2197 if not isinstance(source, ZipFile): 2198 return cls(source) 2199 2200 # Only allow for FastPath when supplied zipfile is read-only 2201 if 'r' not in source.mode: 2202 cls = CompleteDirs 2203 2204 res = cls.__new__(cls) 2205 vars(res).update(vars(source)) 2206 return res 2207 2208 2209class FastLookup(CompleteDirs): 2210 """ 2211 ZipFile subclass to ensure implicit 2212 dirs exist and are resolved rapidly. 2213 """ 2214 def namelist(self): 2215 with contextlib.suppress(AttributeError): 2216 return self.__names 2217 self.__names = super(FastLookup, self).namelist() 2218 return self.__names 2219 2220 def _name_set(self): 2221 with contextlib.suppress(AttributeError): 2222 return self.__lookup 2223 self.__lookup = super(FastLookup, self)._name_set() 2224 return self.__lookup 2225 2226 2227class Path: 2228 """ 2229 A pathlib-compatible interface for zip files. 2230 2231 Consider a zip file with this structure:: 2232 2233 . 2234 ├── a.txt 2235 └── b 2236 ├── c.txt 2237 └── d 2238 └── e.txt 2239 2240 >>> data = io.BytesIO() 2241 >>> zf = ZipFile(data, 'w') 2242 >>> zf.writestr('a.txt', 'content of a') 2243 >>> zf.writestr('b/c.txt', 'content of c') 2244 >>> zf.writestr('b/d/e.txt', 'content of e') 2245 >>> zf.filename = 'abcde.zip' 2246 2247 Path accepts the zipfile object itself or a filename 2248 2249 >>> root = Path(zf) 2250 2251 From there, several path operations are available. 2252 2253 Directory iteration (including the zip file itself): 2254 2255 >>> a, b = root.iterdir() 2256 >>> a 2257 Path('abcde.zip', 'a.txt') 2258 >>> b 2259 Path('abcde.zip', 'b/') 2260 2261 name property: 2262 2263 >>> b.name 2264 'b' 2265 2266 join with divide operator: 2267 2268 >>> c = b / 'c.txt' 2269 >>> c 2270 Path('abcde.zip', 'b/c.txt') 2271 >>> c.name 2272 'c.txt' 2273 2274 Read text: 2275 2276 >>> c.read_text() 2277 'content of c' 2278 2279 existence: 2280 2281 >>> c.exists() 2282 True 2283 >>> (b / 'missing.txt').exists() 2284 False 2285 2286 Coercion to string: 2287 2288 >>> str(c) 2289 'abcde.zip/b/c.txt' 2290 """ 2291 2292 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 2293 2294 def __init__(self, root, at=""): 2295 self.root = FastLookup.make(root) 2296 self.at = at 2297 2298 def open(self, mode='r', *args, **kwargs): 2299 """ 2300 Open this entry as text or binary following the semantics 2301 of ``pathlib.Path.open()`` by passing arguments through 2302 to io.TextIOWrapper(). 2303 """ 2304 pwd = kwargs.pop('pwd', None) 2305 zip_mode = mode[0] 2306 stream = self.root.open(self.at, zip_mode, pwd=pwd) 2307 if 'b' in mode: 2308 if args or kwargs: 2309 raise ValueError("encoding args invalid for binary operation") 2310 return stream 2311 return io.TextIOWrapper(stream, *args, **kwargs) 2312 2313 @property 2314 def name(self): 2315 return posixpath.basename(self.at.rstrip("/")) 2316 2317 def read_text(self, *args, **kwargs): 2318 with self.open('r', *args, **kwargs) as strm: 2319 return strm.read() 2320 2321 def read_bytes(self): 2322 with self.open('rb') as strm: 2323 return strm.read() 2324 2325 def _is_child(self, path): 2326 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 2327 2328 def _next(self, at): 2329 return Path(self.root, at) 2330 2331 def is_dir(self): 2332 return not self.at or self.at.endswith("/") 2333 2334 def is_file(self): 2335 return not self.is_dir() 2336 2337 def exists(self): 2338 return self.at in self.root._name_set() 2339 2340 def iterdir(self): 2341 if not self.is_dir(): 2342 raise ValueError("Can't listdir a file") 2343 subs = map(self._next, self.root.namelist()) 2344 return filter(self._is_child, subs) 2345 2346 def __str__(self): 2347 return posixpath.join(self.root.filename, self.at) 2348 2349 def __repr__(self): 2350 return self.__repr.format(self=self) 2351 2352 def joinpath(self, add): 2353 next = posixpath.join(self.at, add) 2354 return self._next(self.root.resolve_dir(next)) 2355 2356 __truediv__ = joinpath 2357 2358 @property 2359 def parent(self): 2360 parent_at = posixpath.dirname(self.at.rstrip('/')) 2361 if parent_at: 2362 parent_at += '/' 2363 return self._next(parent_at) 2364 2365 2366def main(args=None): 2367 import argparse 2368 2369 description = 'A simple command-line interface for zipfile module.' 2370 parser = argparse.ArgumentParser(description=description) 2371 group = parser.add_mutually_exclusive_group(required=True) 2372 group.add_argument('-l', '--list', metavar='<zipfile>', 2373 help='Show listing of a zipfile') 2374 group.add_argument('-e', '--extract', nargs=2, 2375 metavar=('<zipfile>', '<output_dir>'), 2376 help='Extract zipfile into target dir') 2377 group.add_argument('-c', '--create', nargs='+', 2378 metavar=('<name>', '<file>'), 2379 help='Create zipfile from sources') 2380 group.add_argument('-t', '--test', metavar='<zipfile>', 2381 help='Test if a zipfile is valid') 2382 args = parser.parse_args(args) 2383 2384 if args.test is not None: 2385 src = args.test 2386 with ZipFile(src, 'r') as zf: 2387 badfile = zf.testzip() 2388 if badfile: 2389 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2390 print("Done testing") 2391 2392 elif args.list is not None: 2393 src = args.list 2394 with ZipFile(src, 'r') as zf: 2395 zf.printdir() 2396 2397 elif args.extract is not None: 2398 src, curdir = args.extract 2399 with ZipFile(src, 'r') as zf: 2400 zf.extractall(curdir) 2401 2402 elif args.create is not None: 2403 zip_name = args.create.pop(0) 2404 files = args.create 2405 2406 def addToZip(zf, path, zippath): 2407 if os.path.isfile(path): 2408 zf.write(path, zippath, ZIP_DEFLATED) 2409 elif os.path.isdir(path): 2410 if zippath: 2411 zf.write(path, zippath) 2412 for nm in sorted(os.listdir(path)): 2413 addToZip(zf, 2414 os.path.join(path, nm), os.path.join(zippath, nm)) 2415 # else: ignore 2416 2417 with ZipFile(zip_name, 'w') as zf: 2418 for path in files: 2419 zippath = os.path.basename(path) 2420 if not zippath: 2421 zippath = os.path.basename(os.path.dirname(path)) 2422 if zippath in ('', os.curdir, os.pardir): 2423 zippath = '' 2424 addToZip(zf, path, zippath) 2425 2426 2427if __name__ == "__main__": 2428 main() 2429