1""" 2Python implementation of the io module. 3""" 4 5import os 6import abc 7import codecs 8import errno 9import stat 10import sys 11# Import _thread instead of threading to reduce startup cost 12from _thread import allocate_lock as Lock 13if sys.platform in {'win32', 'cygwin'}: 14 from msvcrt import setmode as _setmode 15else: 16 _setmode = None 17 18import io 19from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) 20 21valid_seek_flags = {0, 1, 2} # Hardwired values 22if hasattr(os, 'SEEK_HOLE') : 23 valid_seek_flags.add(os.SEEK_HOLE) 24 valid_seek_flags.add(os.SEEK_DATA) 25 26# open() uses st_blksize whenever we can 27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes 28 29# NOTE: Base classes defined here are registered with the "official" ABCs 30# defined in io.py. We don't use real inheritance though, because we don't want 31# to inherit the C implementations. 32 33# Rebind for compatibility 34BlockingIOError = BlockingIOError 35 36# Does io.IOBase finalizer log the exception if the close() method fails? 37# The exception is ignored silently by default in release build. 38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) 39# Does open() check its 'errors' argument? 40_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE 41 42 43def open(file, mode="r", buffering=-1, encoding=None, errors=None, 44 newline=None, closefd=True, opener=None): 45 46 r"""Open file and return a stream. Raise OSError upon failure. 47 48 file is either a text or byte string giving the name (and the path 49 if the file isn't in the current working directory) of the file to 50 be opened or an integer file descriptor of the file to be 51 wrapped. (If a file descriptor is given, it is closed when the 52 returned I/O object is closed, unless closefd is set to False.) 53 54 mode is an optional string that specifies the mode in which the file is 55 opened. It defaults to 'r' which means open for reading in text mode. Other 56 common values are 'w' for writing (truncating the file if it already 57 exists), 'x' for exclusive creation of a new file, and 'a' for appending 58 (which on some Unix systems, means that all writes append to the end of the 59 file regardless of the current seek position). In text mode, if encoding is 60 not specified the encoding used is platform dependent. (For reading and 61 writing raw bytes use binary mode and leave encoding unspecified.) The 62 available modes are: 63 64 ========= =============================================================== 65 Character Meaning 66 --------- --------------------------------------------------------------- 67 'r' open for reading (default) 68 'w' open for writing, truncating the file first 69 'x' create a new file and open it for writing 70 'a' open for writing, appending to the end of the file if it exists 71 'b' binary mode 72 't' text mode (default) 73 '+' open a disk file for updating (reading and writing) 74 'U' universal newline mode (deprecated) 75 ========= =============================================================== 76 77 The default mode is 'rt' (open for reading text). For binary random 78 access, the mode 'w+b' opens and truncates the file to 0 bytes, while 79 'r+b' opens the file without truncation. The 'x' mode implies 'w' and 80 raises an `FileExistsError` if the file already exists. 81 82 Python distinguishes between files opened in binary and text modes, 83 even when the underlying operating system doesn't. Files opened in 84 binary mode (appending 'b' to the mode argument) return contents as 85 bytes objects without any decoding. In text mode (the default, or when 86 't' is appended to the mode argument), the contents of the file are 87 returned as strings, the bytes having been first decoded using a 88 platform-dependent encoding or using the specified encoding if given. 89 90 'U' mode is deprecated and will raise an exception in future versions 91 of Python. It has no effect in Python 3. Use newline to control 92 universal newlines mode. 93 94 buffering is an optional integer used to set the buffering policy. 95 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select 96 line buffering (only usable in text mode), and an integer > 1 to indicate 97 the size of a fixed-size chunk buffer. When no buffering argument is 98 given, the default buffering policy works as follows: 99 100 * Binary files are buffered in fixed-size chunks; the size of the buffer 101 is chosen using a heuristic trying to determine the underlying device's 102 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. 103 On many systems, the buffer will typically be 4096 or 8192 bytes long. 104 105 * "Interactive" text files (files for which isatty() returns True) 106 use line buffering. Other text files use the policy described above 107 for binary files. 108 109 encoding is the str name of the encoding used to decode or encode the 110 file. This should only be used in text mode. The default encoding is 111 platform dependent, but any encoding supported by Python can be 112 passed. See the codecs module for the list of supported encodings. 113 114 errors is an optional string that specifies how encoding errors are to 115 be handled---this argument should not be used in binary mode. Pass 116 'strict' to raise a ValueError exception if there is an encoding error 117 (the default of None has the same effect), or pass 'ignore' to ignore 118 errors. (Note that ignoring encoding errors can lead to data loss.) 119 See the documentation for codecs.register for a list of the permitted 120 encoding error strings. 121 122 newline is a string controlling how universal newlines works (it only 123 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works 124 as follows: 125 126 * On input, if newline is None, universal newlines mode is 127 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and 128 these are translated into '\n' before being returned to the 129 caller. If it is '', universal newline mode is enabled, but line 130 endings are returned to the caller untranslated. If it has any of 131 the other legal values, input lines are only terminated by the given 132 string, and the line ending is returned to the caller untranslated. 133 134 * On output, if newline is None, any '\n' characters written are 135 translated to the system default line separator, os.linesep. If 136 newline is '', no translation takes place. If newline is any of the 137 other legal values, any '\n' characters written are translated to 138 the given string. 139 140 closedfd is a bool. If closefd is False, the underlying file descriptor will 141 be kept open when the file is closed. This does not work when a file name is 142 given and must be True in that case. 143 144 The newly created file is non-inheritable. 145 146 A custom opener can be used by passing a callable as *opener*. The 147 underlying file descriptor for the file object is then obtained by calling 148 *opener* with (*file*, *flags*). *opener* must return an open file 149 descriptor (passing os.open as *opener* results in functionality similar to 150 passing None). 151 152 open() returns a file object whose type depends on the mode, and 153 through which the standard file operations such as reading and writing 154 are performed. When open() is used to open a file in a text mode ('w', 155 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open 156 a file in a binary mode, the returned class varies: in read binary 157 mode, it returns a BufferedReader; in write binary and append binary 158 modes, it returns a BufferedWriter, and in read/write mode, it returns 159 a BufferedRandom. 160 161 It is also possible to use a string or bytearray as a file for both 162 reading and writing. For strings StringIO can be used like a file 163 opened in a text mode, and for bytes a BytesIO can be used like a file 164 opened in a binary mode. 165 """ 166 if not isinstance(file, int): 167 file = os.fspath(file) 168 if not isinstance(file, (str, bytes, int)): 169 raise TypeError("invalid file: %r" % file) 170 if not isinstance(mode, str): 171 raise TypeError("invalid mode: %r" % mode) 172 if not isinstance(buffering, int): 173 raise TypeError("invalid buffering: %r" % buffering) 174 if encoding is not None and not isinstance(encoding, str): 175 raise TypeError("invalid encoding: %r" % encoding) 176 if errors is not None and not isinstance(errors, str): 177 raise TypeError("invalid errors: %r" % errors) 178 modes = set(mode) 179 if modes - set("axrwb+tU") or len(mode) > len(modes): 180 raise ValueError("invalid mode: %r" % mode) 181 creating = "x" in modes 182 reading = "r" in modes 183 writing = "w" in modes 184 appending = "a" in modes 185 updating = "+" in modes 186 text = "t" in modes 187 binary = "b" in modes 188 if "U" in modes: 189 if creating or writing or appending or updating: 190 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'") 191 import warnings 192 warnings.warn("'U' mode is deprecated", 193 DeprecationWarning, 2) 194 reading = True 195 if text and binary: 196 raise ValueError("can't have text and binary mode at once") 197 if creating + reading + writing + appending > 1: 198 raise ValueError("can't have read/write/append mode at once") 199 if not (creating or reading or writing or appending): 200 raise ValueError("must have exactly one of read/write/append mode") 201 if binary and encoding is not None: 202 raise ValueError("binary mode doesn't take an encoding argument") 203 if binary and errors is not None: 204 raise ValueError("binary mode doesn't take an errors argument") 205 if binary and newline is not None: 206 raise ValueError("binary mode doesn't take a newline argument") 207 if binary and buffering == 1: 208 import warnings 209 warnings.warn("line buffering (buffering=1) isn't supported in binary " 210 "mode, the default buffer size will be used", 211 RuntimeWarning, 2) 212 raw = FileIO(file, 213 (creating and "x" or "") + 214 (reading and "r" or "") + 215 (writing and "w" or "") + 216 (appending and "a" or "") + 217 (updating and "+" or ""), 218 closefd, opener=opener) 219 result = raw 220 try: 221 line_buffering = False 222 if buffering == 1 or buffering < 0 and raw.isatty(): 223 buffering = -1 224 line_buffering = True 225 if buffering < 0: 226 buffering = DEFAULT_BUFFER_SIZE 227 try: 228 bs = os.fstat(raw.fileno()).st_blksize 229 except (OSError, AttributeError): 230 pass 231 else: 232 if bs > 1: 233 buffering = bs 234 if buffering < 0: 235 raise ValueError("invalid buffering size") 236 if buffering == 0: 237 if binary: 238 return result 239 raise ValueError("can't have unbuffered text I/O") 240 if updating: 241 buffer = BufferedRandom(raw, buffering) 242 elif creating or writing or appending: 243 buffer = BufferedWriter(raw, buffering) 244 elif reading: 245 buffer = BufferedReader(raw, buffering) 246 else: 247 raise ValueError("unknown mode: %r" % mode) 248 result = buffer 249 if binary: 250 return result 251 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) 252 result = text 253 text.mode = mode 254 return result 255 except: 256 result.close() 257 raise 258 259# Define a default pure-Python implementation for open_code() 260# that does not allow hooks. Warn on first use. Defined for tests. 261def _open_code_with_warning(path): 262 """Opens the provided file with mode ``'rb'``. This function 263 should be used when the intent is to treat the contents as 264 executable code. 265 266 ``path`` should be an absolute path. 267 268 When supported by the runtime, this function can be hooked 269 in order to allow embedders more control over code files. 270 This functionality is not supported on the current runtime. 271 """ 272 import warnings 273 warnings.warn("_pyio.open_code() may not be using hooks", 274 RuntimeWarning, 2) 275 return open(path, "rb") 276 277try: 278 open_code = io.open_code 279except AttributeError: 280 open_code = _open_code_with_warning 281 282 283class DocDescriptor: 284 """Helper for builtins.open.__doc__ 285 """ 286 def __get__(self, obj, typ=None): 287 return ( 288 "open(file, mode='r', buffering=-1, encoding=None, " 289 "errors=None, newline=None, closefd=True)\n\n" + 290 open.__doc__) 291 292class OpenWrapper: 293 """Wrapper for builtins.open 294 295 Trick so that open won't become a bound method when stored 296 as a class variable (as dbm.dumb does). 297 298 See initstdio() in Python/pylifecycle.c. 299 """ 300 __doc__ = DocDescriptor() 301 302 def __new__(cls, *args, **kwargs): 303 return open(*args, **kwargs) 304 305 306# In normal operation, both `UnsupportedOperation`s should be bound to the 307# same object. 308try: 309 UnsupportedOperation = io.UnsupportedOperation 310except AttributeError: 311 class UnsupportedOperation(OSError, ValueError): 312 pass 313 314 315class IOBase(metaclass=abc.ABCMeta): 316 317 """The abstract base class for all I/O classes, acting on streams of 318 bytes. There is no public constructor. 319 320 This class provides dummy implementations for many methods that 321 derived classes can override selectively; the default implementations 322 represent a file that cannot be read, written or seeked. 323 324 Even though IOBase does not declare read or write because 325 their signatures will vary, implementations and clients should 326 consider those methods part of the interface. Also, implementations 327 may raise UnsupportedOperation when operations they do not support are 328 called. 329 330 The basic type used for binary data read from or written to a file is 331 bytes. Other bytes-like objects are accepted as method arguments too. 332 Text I/O classes work with str data. 333 334 Note that calling any method (even inquiries) on a closed stream is 335 undefined. Implementations may raise OSError in this case. 336 337 IOBase (and its subclasses) support the iterator protocol, meaning 338 that an IOBase object can be iterated over yielding the lines in a 339 stream. 340 341 IOBase also supports the :keyword:`with` statement. In this example, 342 fp is closed after the suite of the with statement is complete: 343 344 with open('spam.txt', 'r') as fp: 345 fp.write('Spam and eggs!') 346 """ 347 348 ### Internal ### 349 350 def _unsupported(self, name): 351 """Internal: raise an OSError exception for unsupported operations.""" 352 raise UnsupportedOperation("%s.%s() not supported" % 353 (self.__class__.__name__, name)) 354 355 ### Positioning ### 356 357 def seek(self, pos, whence=0): 358 """Change stream position. 359 360 Change the stream position to byte offset pos. Argument pos is 361 interpreted relative to the position indicated by whence. Values 362 for whence are ints: 363 364 * 0 -- start of stream (the default); offset should be zero or positive 365 * 1 -- current stream position; offset may be negative 366 * 2 -- end of stream; offset is usually negative 367 Some operating systems / file systems could provide additional values. 368 369 Return an int indicating the new absolute position. 370 """ 371 self._unsupported("seek") 372 373 def tell(self): 374 """Return an int indicating the current stream position.""" 375 return self.seek(0, 1) 376 377 def truncate(self, pos=None): 378 """Truncate file to size bytes. 379 380 Size defaults to the current IO position as reported by tell(). Return 381 the new size. 382 """ 383 self._unsupported("truncate") 384 385 ### Flush and close ### 386 387 def flush(self): 388 """Flush write buffers, if applicable. 389 390 This is not implemented for read-only and non-blocking streams. 391 """ 392 self._checkClosed() 393 # XXX Should this return the number of bytes written??? 394 395 __closed = False 396 397 def close(self): 398 """Flush and close the IO object. 399 400 This method has no effect if the file is already closed. 401 """ 402 if not self.__closed: 403 try: 404 self.flush() 405 finally: 406 self.__closed = True 407 408 def __del__(self): 409 """Destructor. Calls close().""" 410 try: 411 closed = self.closed 412 except AttributeError: 413 # If getting closed fails, then the object is probably 414 # in an unusable state, so ignore. 415 return 416 417 if closed: 418 return 419 420 if _IOBASE_EMITS_UNRAISABLE: 421 self.close() 422 else: 423 # The try/except block is in case this is called at program 424 # exit time, when it's possible that globals have already been 425 # deleted, and then the close() call might fail. Since 426 # there's nothing we can do about such failures and they annoy 427 # the end users, we suppress the traceback. 428 try: 429 self.close() 430 except: 431 pass 432 433 ### Inquiries ### 434 435 def seekable(self): 436 """Return a bool indicating whether object supports random access. 437 438 If False, seek(), tell() and truncate() will raise OSError. 439 This method may need to do a test seek(). 440 """ 441 return False 442 443 def _checkSeekable(self, msg=None): 444 """Internal: raise UnsupportedOperation if file is not seekable 445 """ 446 if not self.seekable(): 447 raise UnsupportedOperation("File or stream is not seekable." 448 if msg is None else msg) 449 450 def readable(self): 451 """Return a bool indicating whether object was opened for reading. 452 453 If False, read() will raise OSError. 454 """ 455 return False 456 457 def _checkReadable(self, msg=None): 458 """Internal: raise UnsupportedOperation if file is not readable 459 """ 460 if not self.readable(): 461 raise UnsupportedOperation("File or stream is not readable." 462 if msg is None else msg) 463 464 def writable(self): 465 """Return a bool indicating whether object was opened for writing. 466 467 If False, write() and truncate() will raise OSError. 468 """ 469 return False 470 471 def _checkWritable(self, msg=None): 472 """Internal: raise UnsupportedOperation if file is not writable 473 """ 474 if not self.writable(): 475 raise UnsupportedOperation("File or stream is not writable." 476 if msg is None else msg) 477 478 @property 479 def closed(self): 480 """closed: bool. True iff the file has been closed. 481 482 For backwards compatibility, this is a property, not a predicate. 483 """ 484 return self.__closed 485 486 def _checkClosed(self, msg=None): 487 """Internal: raise a ValueError if file is closed 488 """ 489 if self.closed: 490 raise ValueError("I/O operation on closed file." 491 if msg is None else msg) 492 493 ### Context manager ### 494 495 def __enter__(self): # That's a forward reference 496 """Context management protocol. Returns self (an instance of IOBase).""" 497 self._checkClosed() 498 return self 499 500 def __exit__(self, *args): 501 """Context management protocol. Calls close()""" 502 self.close() 503 504 ### Lower-level APIs ### 505 506 # XXX Should these be present even if unimplemented? 507 508 def fileno(self): 509 """Returns underlying file descriptor (an int) if one exists. 510 511 An OSError is raised if the IO object does not use a file descriptor. 512 """ 513 self._unsupported("fileno") 514 515 def isatty(self): 516 """Return a bool indicating whether this is an 'interactive' stream. 517 518 Return False if it can't be determined. 519 """ 520 self._checkClosed() 521 return False 522 523 ### Readline[s] and writelines ### 524 525 def readline(self, size=-1): 526 r"""Read and return a line of bytes from the stream. 527 528 If size is specified, at most size bytes will be read. 529 Size should be an int. 530 531 The line terminator is always b'\n' for binary files; for text 532 files, the newlines argument to open can be used to select the line 533 terminator(s) recognized. 534 """ 535 # For backwards compatibility, a (slowish) readline(). 536 if hasattr(self, "peek"): 537 def nreadahead(): 538 readahead = self.peek(1) 539 if not readahead: 540 return 1 541 n = (readahead.find(b"\n") + 1) or len(readahead) 542 if size >= 0: 543 n = min(n, size) 544 return n 545 else: 546 def nreadahead(): 547 return 1 548 if size is None: 549 size = -1 550 else: 551 try: 552 size_index = size.__index__ 553 except AttributeError: 554 raise TypeError(f"{size!r} is not an integer") 555 else: 556 size = size_index() 557 res = bytearray() 558 while size < 0 or len(res) < size: 559 b = self.read(nreadahead()) 560 if not b: 561 break 562 res += b 563 if res.endswith(b"\n"): 564 break 565 return bytes(res) 566 567 def __iter__(self): 568 self._checkClosed() 569 return self 570 571 def __next__(self): 572 line = self.readline() 573 if not line: 574 raise StopIteration 575 return line 576 577 def readlines(self, hint=None): 578 """Return a list of lines from the stream. 579 580 hint can be specified to control the number of lines read: no more 581 lines will be read if the total size (in bytes/characters) of all 582 lines so far exceeds hint. 583 """ 584 if hint is None or hint <= 0: 585 return list(self) 586 n = 0 587 lines = [] 588 for line in self: 589 lines.append(line) 590 n += len(line) 591 if n >= hint: 592 break 593 return lines 594 595 def writelines(self, lines): 596 """Write a list of lines to the stream. 597 598 Line separators are not added, so it is usual for each of the lines 599 provided to have a line separator at the end. 600 """ 601 self._checkClosed() 602 for line in lines: 603 self.write(line) 604 605io.IOBase.register(IOBase) 606 607 608class RawIOBase(IOBase): 609 610 """Base class for raw binary I/O.""" 611 612 # The read() method is implemented by calling readinto(); derived 613 # classes that want to support read() only need to implement 614 # readinto() as a primitive operation. In general, readinto() can be 615 # more efficient than read(). 616 617 # (It would be tempting to also provide an implementation of 618 # readinto() in terms of read(), in case the latter is a more suitable 619 # primitive operation, but that would lead to nasty recursion in case 620 # a subclass doesn't implement either.) 621 622 def read(self, size=-1): 623 """Read and return up to size bytes, where size is an int. 624 625 Returns an empty bytes object on EOF, or None if the object is 626 set not to block and has no data to read. 627 """ 628 if size is None: 629 size = -1 630 if size < 0: 631 return self.readall() 632 b = bytearray(size.__index__()) 633 n = self.readinto(b) 634 if n is None: 635 return None 636 del b[n:] 637 return bytes(b) 638 639 def readall(self): 640 """Read until EOF, using multiple read() call.""" 641 res = bytearray() 642 while True: 643 data = self.read(DEFAULT_BUFFER_SIZE) 644 if not data: 645 break 646 res += data 647 if res: 648 return bytes(res) 649 else: 650 # b'' or None 651 return data 652 653 def readinto(self, b): 654 """Read bytes into a pre-allocated bytes-like object b. 655 656 Returns an int representing the number of bytes read (0 for EOF), or 657 None if the object is set not to block and has no data to read. 658 """ 659 self._unsupported("readinto") 660 661 def write(self, b): 662 """Write the given buffer to the IO stream. 663 664 Returns the number of bytes written, which may be less than the 665 length of b in bytes. 666 """ 667 self._unsupported("write") 668 669io.RawIOBase.register(RawIOBase) 670from _io import FileIO 671RawIOBase.register(FileIO) 672 673 674class BufferedIOBase(IOBase): 675 676 """Base class for buffered IO objects. 677 678 The main difference with RawIOBase is that the read() method 679 supports omitting the size argument, and does not have a default 680 implementation that defers to readinto(). 681 682 In addition, read(), readinto() and write() may raise 683 BlockingIOError if the underlying raw stream is in non-blocking 684 mode and not ready; unlike their raw counterparts, they will never 685 return None. 686 687 A typical implementation should not inherit from a RawIOBase 688 implementation, but wrap one. 689 """ 690 691 def read(self, size=-1): 692 """Read and return up to size bytes, where size is an int. 693 694 If the argument is omitted, None, or negative, reads and 695 returns all data until EOF. 696 697 If the argument is positive, and the underlying raw stream is 698 not 'interactive', multiple raw reads may be issued to satisfy 699 the byte count (unless EOF is reached first). But for 700 interactive raw streams (XXX and for pipes?), at most one raw 701 read will be issued, and a short result does not imply that 702 EOF is imminent. 703 704 Returns an empty bytes array on EOF. 705 706 Raises BlockingIOError if the underlying raw stream has no 707 data at the moment. 708 """ 709 self._unsupported("read") 710 711 def read1(self, size=-1): 712 """Read up to size bytes with at most one read() system call, 713 where size is an int. 714 """ 715 self._unsupported("read1") 716 717 def readinto(self, b): 718 """Read bytes into a pre-allocated bytes-like object b. 719 720 Like read(), this may issue multiple reads to the underlying raw 721 stream, unless the latter is 'interactive'. 722 723 Returns an int representing the number of bytes read (0 for EOF). 724 725 Raises BlockingIOError if the underlying raw stream has no 726 data at the moment. 727 """ 728 729 return self._readinto(b, read1=False) 730 731 def readinto1(self, b): 732 """Read bytes into buffer *b*, using at most one system call 733 734 Returns an int representing the number of bytes read (0 for EOF). 735 736 Raises BlockingIOError if the underlying raw stream has no 737 data at the moment. 738 """ 739 740 return self._readinto(b, read1=True) 741 742 def _readinto(self, b, read1): 743 if not isinstance(b, memoryview): 744 b = memoryview(b) 745 b = b.cast('B') 746 747 if read1: 748 data = self.read1(len(b)) 749 else: 750 data = self.read(len(b)) 751 n = len(data) 752 753 b[:n] = data 754 755 return n 756 757 def write(self, b): 758 """Write the given bytes buffer to the IO stream. 759 760 Return the number of bytes written, which is always the length of b 761 in bytes. 762 763 Raises BlockingIOError if the buffer is full and the 764 underlying raw stream cannot accept more data at the moment. 765 """ 766 self._unsupported("write") 767 768 def detach(self): 769 """ 770 Separate the underlying raw stream from the buffer and return it. 771 772 After the raw stream has been detached, the buffer is in an unusable 773 state. 774 """ 775 self._unsupported("detach") 776 777io.BufferedIOBase.register(BufferedIOBase) 778 779 780class _BufferedIOMixin(BufferedIOBase): 781 782 """A mixin implementation of BufferedIOBase with an underlying raw stream. 783 784 This passes most requests on to the underlying raw stream. It 785 does *not* provide implementations of read(), readinto() or 786 write(). 787 """ 788 789 def __init__(self, raw): 790 self._raw = raw 791 792 ### Positioning ### 793 794 def seek(self, pos, whence=0): 795 new_position = self.raw.seek(pos, whence) 796 if new_position < 0: 797 raise OSError("seek() returned an invalid position") 798 return new_position 799 800 def tell(self): 801 pos = self.raw.tell() 802 if pos < 0: 803 raise OSError("tell() returned an invalid position") 804 return pos 805 806 def truncate(self, pos=None): 807 self._checkClosed() 808 self._checkWritable() 809 810 # Flush the stream. We're mixing buffered I/O with lower-level I/O, 811 # and a flush may be necessary to synch both views of the current 812 # file state. 813 self.flush() 814 815 if pos is None: 816 pos = self.tell() 817 # XXX: Should seek() be used, instead of passing the position 818 # XXX directly to truncate? 819 return self.raw.truncate(pos) 820 821 ### Flush and close ### 822 823 def flush(self): 824 if self.closed: 825 raise ValueError("flush on closed file") 826 self.raw.flush() 827 828 def close(self): 829 if self.raw is not None and not self.closed: 830 try: 831 # may raise BlockingIOError or BrokenPipeError etc 832 self.flush() 833 finally: 834 self.raw.close() 835 836 def detach(self): 837 if self.raw is None: 838 raise ValueError("raw stream already detached") 839 self.flush() 840 raw = self._raw 841 self._raw = None 842 return raw 843 844 ### Inquiries ### 845 846 def seekable(self): 847 return self.raw.seekable() 848 849 @property 850 def raw(self): 851 return self._raw 852 853 @property 854 def closed(self): 855 return self.raw.closed 856 857 @property 858 def name(self): 859 return self.raw.name 860 861 @property 862 def mode(self): 863 return self.raw.mode 864 865 def __getstate__(self): 866 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 867 868 def __repr__(self): 869 modname = self.__class__.__module__ 870 clsname = self.__class__.__qualname__ 871 try: 872 name = self.name 873 except AttributeError: 874 return "<{}.{}>".format(modname, clsname) 875 else: 876 return "<{}.{} name={!r}>".format(modname, clsname, name) 877 878 ### Lower-level APIs ### 879 880 def fileno(self): 881 return self.raw.fileno() 882 883 def isatty(self): 884 return self.raw.isatty() 885 886 887class BytesIO(BufferedIOBase): 888 889 """Buffered I/O implementation using an in-memory bytes buffer.""" 890 891 # Initialize _buffer as soon as possible since it's used by __del__() 892 # which calls close() 893 _buffer = None 894 895 def __init__(self, initial_bytes=None): 896 buf = bytearray() 897 if initial_bytes is not None: 898 buf += initial_bytes 899 self._buffer = buf 900 self._pos = 0 901 902 def __getstate__(self): 903 if self.closed: 904 raise ValueError("__getstate__ on closed file") 905 return self.__dict__.copy() 906 907 def getvalue(self): 908 """Return the bytes value (contents) of the buffer 909 """ 910 if self.closed: 911 raise ValueError("getvalue on closed file") 912 return bytes(self._buffer) 913 914 def getbuffer(self): 915 """Return a readable and writable view of the buffer. 916 """ 917 if self.closed: 918 raise ValueError("getbuffer on closed file") 919 return memoryview(self._buffer) 920 921 def close(self): 922 if self._buffer is not None: 923 self._buffer.clear() 924 super().close() 925 926 def read(self, size=-1): 927 if self.closed: 928 raise ValueError("read from closed file") 929 if size is None: 930 size = -1 931 else: 932 try: 933 size_index = size.__index__ 934 except AttributeError: 935 raise TypeError(f"{size!r} is not an integer") 936 else: 937 size = size_index() 938 if size < 0: 939 size = len(self._buffer) 940 if len(self._buffer) <= self._pos: 941 return b"" 942 newpos = min(len(self._buffer), self._pos + size) 943 b = self._buffer[self._pos : newpos] 944 self._pos = newpos 945 return bytes(b) 946 947 def read1(self, size=-1): 948 """This is the same as read. 949 """ 950 return self.read(size) 951 952 def write(self, b): 953 if self.closed: 954 raise ValueError("write to closed file") 955 if isinstance(b, str): 956 raise TypeError("can't write str to binary stream") 957 with memoryview(b) as view: 958 n = view.nbytes # Size of any bytes-like object 959 if n == 0: 960 return 0 961 pos = self._pos 962 if pos > len(self._buffer): 963 # Inserts null bytes between the current end of the file 964 # and the new write position. 965 padding = b'\x00' * (pos - len(self._buffer)) 966 self._buffer += padding 967 self._buffer[pos:pos + n] = b 968 self._pos += n 969 return n 970 971 def seek(self, pos, whence=0): 972 if self.closed: 973 raise ValueError("seek on closed file") 974 try: 975 pos_index = pos.__index__ 976 except AttributeError: 977 raise TypeError(f"{pos!r} is not an integer") 978 else: 979 pos = pos_index() 980 if whence == 0: 981 if pos < 0: 982 raise ValueError("negative seek position %r" % (pos,)) 983 self._pos = pos 984 elif whence == 1: 985 self._pos = max(0, self._pos + pos) 986 elif whence == 2: 987 self._pos = max(0, len(self._buffer) + pos) 988 else: 989 raise ValueError("unsupported whence value") 990 return self._pos 991 992 def tell(self): 993 if self.closed: 994 raise ValueError("tell on closed file") 995 return self._pos 996 997 def truncate(self, pos=None): 998 if self.closed: 999 raise ValueError("truncate on closed file") 1000 if pos is None: 1001 pos = self._pos 1002 else: 1003 try: 1004 pos_index = pos.__index__ 1005 except AttributeError: 1006 raise TypeError(f"{pos!r} is not an integer") 1007 else: 1008 pos = pos_index() 1009 if pos < 0: 1010 raise ValueError("negative truncate position %r" % (pos,)) 1011 del self._buffer[pos:] 1012 return pos 1013 1014 def readable(self): 1015 if self.closed: 1016 raise ValueError("I/O operation on closed file.") 1017 return True 1018 1019 def writable(self): 1020 if self.closed: 1021 raise ValueError("I/O operation on closed file.") 1022 return True 1023 1024 def seekable(self): 1025 if self.closed: 1026 raise ValueError("I/O operation on closed file.") 1027 return True 1028 1029 1030class BufferedReader(_BufferedIOMixin): 1031 1032 """BufferedReader(raw[, buffer_size]) 1033 1034 A buffer for a readable, sequential BaseRawIO object. 1035 1036 The constructor creates a BufferedReader for the given readable raw 1037 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE 1038 is used. 1039 """ 1040 1041 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1042 """Create a new buffered reader using the given readable raw IO object. 1043 """ 1044 if not raw.readable(): 1045 raise OSError('"raw" argument must be readable.') 1046 1047 _BufferedIOMixin.__init__(self, raw) 1048 if buffer_size <= 0: 1049 raise ValueError("invalid buffer size") 1050 self.buffer_size = buffer_size 1051 self._reset_read_buf() 1052 self._read_lock = Lock() 1053 1054 def readable(self): 1055 return self.raw.readable() 1056 1057 def _reset_read_buf(self): 1058 self._read_buf = b"" 1059 self._read_pos = 0 1060 1061 def read(self, size=None): 1062 """Read size bytes. 1063 1064 Returns exactly size bytes of data unless the underlying raw IO 1065 stream reaches EOF or if the call would block in non-blocking 1066 mode. If size is negative, read until EOF or until read() would 1067 block. 1068 """ 1069 if size is not None and size < -1: 1070 raise ValueError("invalid number of bytes to read") 1071 with self._read_lock: 1072 return self._read_unlocked(size) 1073 1074 def _read_unlocked(self, n=None): 1075 nodata_val = b"" 1076 empty_values = (b"", None) 1077 buf = self._read_buf 1078 pos = self._read_pos 1079 1080 # Special case for when the number of bytes to read is unspecified. 1081 if n is None or n == -1: 1082 self._reset_read_buf() 1083 if hasattr(self.raw, 'readall'): 1084 chunk = self.raw.readall() 1085 if chunk is None: 1086 return buf[pos:] or None 1087 else: 1088 return buf[pos:] + chunk 1089 chunks = [buf[pos:]] # Strip the consumed bytes. 1090 current_size = 0 1091 while True: 1092 # Read until EOF or until read() would block. 1093 chunk = self.raw.read() 1094 if chunk in empty_values: 1095 nodata_val = chunk 1096 break 1097 current_size += len(chunk) 1098 chunks.append(chunk) 1099 return b"".join(chunks) or nodata_val 1100 1101 # The number of bytes to read is specified, return at most n bytes. 1102 avail = len(buf) - pos # Length of the available buffered data. 1103 if n <= avail: 1104 # Fast path: the data to read is fully buffered. 1105 self._read_pos += n 1106 return buf[pos:pos+n] 1107 # Slow path: read from the stream until enough bytes are read, 1108 # or until an EOF occurs or until read() would block. 1109 chunks = [buf[pos:]] 1110 wanted = max(self.buffer_size, n) 1111 while avail < n: 1112 chunk = self.raw.read(wanted) 1113 if chunk in empty_values: 1114 nodata_val = chunk 1115 break 1116 avail += len(chunk) 1117 chunks.append(chunk) 1118 # n is more than avail only when an EOF occurred or when 1119 # read() would have blocked. 1120 n = min(n, avail) 1121 out = b"".join(chunks) 1122 self._read_buf = out[n:] # Save the extra data in the buffer. 1123 self._read_pos = 0 1124 return out[:n] if out else nodata_val 1125 1126 def peek(self, size=0): 1127 """Returns buffered bytes without advancing the position. 1128 1129 The argument indicates a desired minimal number of bytes; we 1130 do at most one raw read to satisfy it. We never return more 1131 than self.buffer_size. 1132 """ 1133 with self._read_lock: 1134 return self._peek_unlocked(size) 1135 1136 def _peek_unlocked(self, n=0): 1137 want = min(n, self.buffer_size) 1138 have = len(self._read_buf) - self._read_pos 1139 if have < want or have <= 0: 1140 to_read = self.buffer_size - have 1141 current = self.raw.read(to_read) 1142 if current: 1143 self._read_buf = self._read_buf[self._read_pos:] + current 1144 self._read_pos = 0 1145 return self._read_buf[self._read_pos:] 1146 1147 def read1(self, size=-1): 1148 """Reads up to size bytes, with at most one read() system call.""" 1149 # Returns up to size bytes. If at least one byte is buffered, we 1150 # only return buffered bytes. Otherwise, we do one raw read. 1151 if size < 0: 1152 size = self.buffer_size 1153 if size == 0: 1154 return b"" 1155 with self._read_lock: 1156 self._peek_unlocked(1) 1157 return self._read_unlocked( 1158 min(size, len(self._read_buf) - self._read_pos)) 1159 1160 # Implementing readinto() and readinto1() is not strictly necessary (we 1161 # could rely on the base class that provides an implementation in terms of 1162 # read() and read1()). We do it anyway to keep the _pyio implementation 1163 # similar to the io implementation (which implements the methods for 1164 # performance reasons). 1165 def _readinto(self, buf, read1): 1166 """Read data into *buf* with at most one system call.""" 1167 1168 # Need to create a memoryview object of type 'b', otherwise 1169 # we may not be able to assign bytes to it, and slicing it 1170 # would create a new object. 1171 if not isinstance(buf, memoryview): 1172 buf = memoryview(buf) 1173 if buf.nbytes == 0: 1174 return 0 1175 buf = buf.cast('B') 1176 1177 written = 0 1178 with self._read_lock: 1179 while written < len(buf): 1180 1181 # First try to read from internal buffer 1182 avail = min(len(self._read_buf) - self._read_pos, len(buf)) 1183 if avail: 1184 buf[written:written+avail] = \ 1185 self._read_buf[self._read_pos:self._read_pos+avail] 1186 self._read_pos += avail 1187 written += avail 1188 if written == len(buf): 1189 break 1190 1191 # If remaining space in callers buffer is larger than 1192 # internal buffer, read directly into callers buffer 1193 if len(buf) - written > self.buffer_size: 1194 n = self.raw.readinto(buf[written:]) 1195 if not n: 1196 break # eof 1197 written += n 1198 1199 # Otherwise refill internal buffer - unless we're 1200 # in read1 mode and already got some data 1201 elif not (read1 and written): 1202 if not self._peek_unlocked(1): 1203 break # eof 1204 1205 # In readinto1 mode, return as soon as we have some data 1206 if read1 and written: 1207 break 1208 1209 return written 1210 1211 def tell(self): 1212 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos 1213 1214 def seek(self, pos, whence=0): 1215 if whence not in valid_seek_flags: 1216 raise ValueError("invalid whence value") 1217 with self._read_lock: 1218 if whence == 1: 1219 pos -= len(self._read_buf) - self._read_pos 1220 pos = _BufferedIOMixin.seek(self, pos, whence) 1221 self._reset_read_buf() 1222 return pos 1223 1224class BufferedWriter(_BufferedIOMixin): 1225 1226 """A buffer for a writeable sequential RawIO object. 1227 1228 The constructor creates a BufferedWriter for the given writeable raw 1229 stream. If the buffer_size is not given, it defaults to 1230 DEFAULT_BUFFER_SIZE. 1231 """ 1232 1233 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1234 if not raw.writable(): 1235 raise OSError('"raw" argument must be writable.') 1236 1237 _BufferedIOMixin.__init__(self, raw) 1238 if buffer_size <= 0: 1239 raise ValueError("invalid buffer size") 1240 self.buffer_size = buffer_size 1241 self._write_buf = bytearray() 1242 self._write_lock = Lock() 1243 1244 def writable(self): 1245 return self.raw.writable() 1246 1247 def write(self, b): 1248 if isinstance(b, str): 1249 raise TypeError("can't write str to binary stream") 1250 with self._write_lock: 1251 if self.closed: 1252 raise ValueError("write to closed file") 1253 # XXX we can implement some more tricks to try and avoid 1254 # partial writes 1255 if len(self._write_buf) > self.buffer_size: 1256 # We're full, so let's pre-flush the buffer. (This may 1257 # raise BlockingIOError with characters_written == 0.) 1258 self._flush_unlocked() 1259 before = len(self._write_buf) 1260 self._write_buf.extend(b) 1261 written = len(self._write_buf) - before 1262 if len(self._write_buf) > self.buffer_size: 1263 try: 1264 self._flush_unlocked() 1265 except BlockingIOError as e: 1266 if len(self._write_buf) > self.buffer_size: 1267 # We've hit the buffer_size. We have to accept a partial 1268 # write and cut back our buffer. 1269 overage = len(self._write_buf) - self.buffer_size 1270 written -= overage 1271 self._write_buf = self._write_buf[:self.buffer_size] 1272 raise BlockingIOError(e.errno, e.strerror, written) 1273 return written 1274 1275 def truncate(self, pos=None): 1276 with self._write_lock: 1277 self._flush_unlocked() 1278 if pos is None: 1279 pos = self.raw.tell() 1280 return self.raw.truncate(pos) 1281 1282 def flush(self): 1283 with self._write_lock: 1284 self._flush_unlocked() 1285 1286 def _flush_unlocked(self): 1287 if self.closed: 1288 raise ValueError("flush on closed file") 1289 while self._write_buf: 1290 try: 1291 n = self.raw.write(self._write_buf) 1292 except BlockingIOError: 1293 raise RuntimeError("self.raw should implement RawIOBase: it " 1294 "should not raise BlockingIOError") 1295 if n is None: 1296 raise BlockingIOError( 1297 errno.EAGAIN, 1298 "write could not complete without blocking", 0) 1299 if n > len(self._write_buf) or n < 0: 1300 raise OSError("write() returned incorrect number of bytes") 1301 del self._write_buf[:n] 1302 1303 def tell(self): 1304 return _BufferedIOMixin.tell(self) + len(self._write_buf) 1305 1306 def seek(self, pos, whence=0): 1307 if whence not in valid_seek_flags: 1308 raise ValueError("invalid whence value") 1309 with self._write_lock: 1310 self._flush_unlocked() 1311 return _BufferedIOMixin.seek(self, pos, whence) 1312 1313 def close(self): 1314 with self._write_lock: 1315 if self.raw is None or self.closed: 1316 return 1317 # We have to release the lock and call self.flush() (which will 1318 # probably just re-take the lock) in case flush has been overridden in 1319 # a subclass or the user set self.flush to something. This is the same 1320 # behavior as the C implementation. 1321 try: 1322 # may raise BlockingIOError or BrokenPipeError etc 1323 self.flush() 1324 finally: 1325 with self._write_lock: 1326 self.raw.close() 1327 1328 1329class BufferedRWPair(BufferedIOBase): 1330 1331 """A buffered reader and writer object together. 1332 1333 A buffered reader object and buffered writer object put together to 1334 form a sequential IO object that can read and write. This is typically 1335 used with a socket or two-way pipe. 1336 1337 reader and writer are RawIOBase objects that are readable and 1338 writeable respectively. If the buffer_size is omitted it defaults to 1339 DEFAULT_BUFFER_SIZE. 1340 """ 1341 1342 # XXX The usefulness of this (compared to having two separate IO 1343 # objects) is questionable. 1344 1345 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE): 1346 """Constructor. 1347 1348 The arguments are two RawIO instances. 1349 """ 1350 if not reader.readable(): 1351 raise OSError('"reader" argument must be readable.') 1352 1353 if not writer.writable(): 1354 raise OSError('"writer" argument must be writable.') 1355 1356 self.reader = BufferedReader(reader, buffer_size) 1357 self.writer = BufferedWriter(writer, buffer_size) 1358 1359 def read(self, size=-1): 1360 if size is None: 1361 size = -1 1362 return self.reader.read(size) 1363 1364 def readinto(self, b): 1365 return self.reader.readinto(b) 1366 1367 def write(self, b): 1368 return self.writer.write(b) 1369 1370 def peek(self, size=0): 1371 return self.reader.peek(size) 1372 1373 def read1(self, size=-1): 1374 return self.reader.read1(size) 1375 1376 def readinto1(self, b): 1377 return self.reader.readinto1(b) 1378 1379 def readable(self): 1380 return self.reader.readable() 1381 1382 def writable(self): 1383 return self.writer.writable() 1384 1385 def flush(self): 1386 return self.writer.flush() 1387 1388 def close(self): 1389 try: 1390 self.writer.close() 1391 finally: 1392 self.reader.close() 1393 1394 def isatty(self): 1395 return self.reader.isatty() or self.writer.isatty() 1396 1397 @property 1398 def closed(self): 1399 return self.writer.closed 1400 1401 1402class BufferedRandom(BufferedWriter, BufferedReader): 1403 1404 """A buffered interface to random access streams. 1405 1406 The constructor creates a reader and writer for a seekable stream, 1407 raw, given in the first argument. If the buffer_size is omitted it 1408 defaults to DEFAULT_BUFFER_SIZE. 1409 """ 1410 1411 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1412 raw._checkSeekable() 1413 BufferedReader.__init__(self, raw, buffer_size) 1414 BufferedWriter.__init__(self, raw, buffer_size) 1415 1416 def seek(self, pos, whence=0): 1417 if whence not in valid_seek_flags: 1418 raise ValueError("invalid whence value") 1419 self.flush() 1420 if self._read_buf: 1421 # Undo read ahead. 1422 with self._read_lock: 1423 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1424 # First do the raw seek, then empty the read buffer, so that 1425 # if the raw seek fails, we don't lose buffered data forever. 1426 pos = self.raw.seek(pos, whence) 1427 with self._read_lock: 1428 self._reset_read_buf() 1429 if pos < 0: 1430 raise OSError("seek() returned invalid position") 1431 return pos 1432 1433 def tell(self): 1434 if self._write_buf: 1435 return BufferedWriter.tell(self) 1436 else: 1437 return BufferedReader.tell(self) 1438 1439 def truncate(self, pos=None): 1440 if pos is None: 1441 pos = self.tell() 1442 # Use seek to flush the read buffer. 1443 return BufferedWriter.truncate(self, pos) 1444 1445 def read(self, size=None): 1446 if size is None: 1447 size = -1 1448 self.flush() 1449 return BufferedReader.read(self, size) 1450 1451 def readinto(self, b): 1452 self.flush() 1453 return BufferedReader.readinto(self, b) 1454 1455 def peek(self, size=0): 1456 self.flush() 1457 return BufferedReader.peek(self, size) 1458 1459 def read1(self, size=-1): 1460 self.flush() 1461 return BufferedReader.read1(self, size) 1462 1463 def readinto1(self, b): 1464 self.flush() 1465 return BufferedReader.readinto1(self, b) 1466 1467 def write(self, b): 1468 if self._read_buf: 1469 # Undo readahead 1470 with self._read_lock: 1471 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1472 self._reset_read_buf() 1473 return BufferedWriter.write(self, b) 1474 1475 1476class FileIO(RawIOBase): 1477 _fd = -1 1478 _created = False 1479 _readable = False 1480 _writable = False 1481 _appending = False 1482 _seekable = None 1483 _closefd = True 1484 1485 def __init__(self, file, mode='r', closefd=True, opener=None): 1486 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading, 1487 writing, exclusive creation or appending. The file will be created if it 1488 doesn't exist when opened for writing or appending; it will be truncated 1489 when opened for writing. A FileExistsError will be raised if it already 1490 exists when opened for creating. Opening a file for creating implies 1491 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode 1492 to allow simultaneous reading and writing. A custom opener can be used by 1493 passing a callable as *opener*. The underlying file descriptor for the file 1494 object is then obtained by calling opener with (*name*, *flags*). 1495 *opener* must return an open file descriptor (passing os.open as *opener* 1496 results in functionality similar to passing None). 1497 """ 1498 if self._fd >= 0: 1499 # Have to close the existing file first. 1500 try: 1501 if self._closefd: 1502 os.close(self._fd) 1503 finally: 1504 self._fd = -1 1505 1506 if isinstance(file, float): 1507 raise TypeError('integer argument expected, got float') 1508 if isinstance(file, int): 1509 fd = file 1510 if fd < 0: 1511 raise ValueError('negative file descriptor') 1512 else: 1513 fd = -1 1514 1515 if not isinstance(mode, str): 1516 raise TypeError('invalid mode: %s' % (mode,)) 1517 if not set(mode) <= set('xrwab+'): 1518 raise ValueError('invalid mode: %s' % (mode,)) 1519 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1: 1520 raise ValueError('Must have exactly one of create/read/write/append ' 1521 'mode and at most one plus') 1522 1523 if 'x' in mode: 1524 self._created = True 1525 self._writable = True 1526 flags = os.O_EXCL | os.O_CREAT 1527 elif 'r' in mode: 1528 self._readable = True 1529 flags = 0 1530 elif 'w' in mode: 1531 self._writable = True 1532 flags = os.O_CREAT | os.O_TRUNC 1533 elif 'a' in mode: 1534 self._writable = True 1535 self._appending = True 1536 flags = os.O_APPEND | os.O_CREAT 1537 1538 if '+' in mode: 1539 self._readable = True 1540 self._writable = True 1541 1542 if self._readable and self._writable: 1543 flags |= os.O_RDWR 1544 elif self._readable: 1545 flags |= os.O_RDONLY 1546 else: 1547 flags |= os.O_WRONLY 1548 1549 flags |= getattr(os, 'O_BINARY', 0) 1550 1551 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or 1552 getattr(os, 'O_CLOEXEC', 0)) 1553 flags |= noinherit_flag 1554 1555 owned_fd = None 1556 try: 1557 if fd < 0: 1558 if not closefd: 1559 raise ValueError('Cannot use closefd=False with file name') 1560 if opener is None: 1561 fd = os.open(file, flags, 0o666) 1562 else: 1563 fd = opener(file, flags) 1564 if not isinstance(fd, int): 1565 raise TypeError('expected integer from opener') 1566 if fd < 0: 1567 raise OSError('Negative file descriptor') 1568 owned_fd = fd 1569 if not noinherit_flag: 1570 os.set_inheritable(fd, False) 1571 1572 self._closefd = closefd 1573 fdfstat = os.fstat(fd) 1574 try: 1575 if stat.S_ISDIR(fdfstat.st_mode): 1576 raise IsADirectoryError(errno.EISDIR, 1577 os.strerror(errno.EISDIR), file) 1578 except AttributeError: 1579 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR 1580 # don't exist. 1581 pass 1582 self._blksize = getattr(fdfstat, 'st_blksize', 0) 1583 if self._blksize <= 1: 1584 self._blksize = DEFAULT_BUFFER_SIZE 1585 1586 if _setmode: 1587 # don't translate newlines (\r\n <=> \n) 1588 _setmode(fd, os.O_BINARY) 1589 1590 self.name = file 1591 if self._appending: 1592 # For consistent behaviour, we explicitly seek to the 1593 # end of file (otherwise, it might be done only on the 1594 # first write()). 1595 try: 1596 os.lseek(fd, 0, SEEK_END) 1597 except OSError as e: 1598 if e.errno != errno.ESPIPE: 1599 raise 1600 except: 1601 if owned_fd is not None: 1602 os.close(owned_fd) 1603 raise 1604 self._fd = fd 1605 1606 def __del__(self): 1607 if self._fd >= 0 and self._closefd and not self.closed: 1608 import warnings 1609 warnings.warn('unclosed file %r' % (self,), ResourceWarning, 1610 stacklevel=2, source=self) 1611 self.close() 1612 1613 def __getstate__(self): 1614 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 1615 1616 def __repr__(self): 1617 class_name = '%s.%s' % (self.__class__.__module__, 1618 self.__class__.__qualname__) 1619 if self.closed: 1620 return '<%s [closed]>' % class_name 1621 try: 1622 name = self.name 1623 except AttributeError: 1624 return ('<%s fd=%d mode=%r closefd=%r>' % 1625 (class_name, self._fd, self.mode, self._closefd)) 1626 else: 1627 return ('<%s name=%r mode=%r closefd=%r>' % 1628 (class_name, name, self.mode, self._closefd)) 1629 1630 def _checkReadable(self): 1631 if not self._readable: 1632 raise UnsupportedOperation('File not open for reading') 1633 1634 def _checkWritable(self, msg=None): 1635 if not self._writable: 1636 raise UnsupportedOperation('File not open for writing') 1637 1638 def read(self, size=None): 1639 """Read at most size bytes, returned as bytes. 1640 1641 Only makes one system call, so less data may be returned than requested 1642 In non-blocking mode, returns None if no data is available. 1643 Return an empty bytes object at EOF. 1644 """ 1645 self._checkClosed() 1646 self._checkReadable() 1647 if size is None or size < 0: 1648 return self.readall() 1649 try: 1650 return os.read(self._fd, size) 1651 except BlockingIOError: 1652 return None 1653 1654 def readall(self): 1655 """Read all data from the file, returned as bytes. 1656 1657 In non-blocking mode, returns as much as is immediately available, 1658 or None if no data is available. Return an empty bytes object at EOF. 1659 """ 1660 self._checkClosed() 1661 self._checkReadable() 1662 bufsize = DEFAULT_BUFFER_SIZE 1663 try: 1664 pos = os.lseek(self._fd, 0, SEEK_CUR) 1665 end = os.fstat(self._fd).st_size 1666 if end >= pos: 1667 bufsize = end - pos + 1 1668 except OSError: 1669 pass 1670 1671 result = bytearray() 1672 while True: 1673 if len(result) >= bufsize: 1674 bufsize = len(result) 1675 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) 1676 n = bufsize - len(result) 1677 try: 1678 chunk = os.read(self._fd, n) 1679 except BlockingIOError: 1680 if result: 1681 break 1682 return None 1683 if not chunk: # reached the end of the file 1684 break 1685 result += chunk 1686 1687 return bytes(result) 1688 1689 def readinto(self, b): 1690 """Same as RawIOBase.readinto().""" 1691 m = memoryview(b).cast('B') 1692 data = self.read(len(m)) 1693 n = len(data) 1694 m[:n] = data 1695 return n 1696 1697 def write(self, b): 1698 """Write bytes b to file, return number written. 1699 1700 Only makes one system call, so not all of the data may be written. 1701 The number of bytes actually written is returned. In non-blocking mode, 1702 returns None if the write would block. 1703 """ 1704 self._checkClosed() 1705 self._checkWritable() 1706 try: 1707 return os.write(self._fd, b) 1708 except BlockingIOError: 1709 return None 1710 1711 def seek(self, pos, whence=SEEK_SET): 1712 """Move to new file position. 1713 1714 Argument offset is a byte count. Optional argument whence defaults to 1715 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values 1716 are SEEK_CUR or 1 (move relative to current position, positive or negative), 1717 and SEEK_END or 2 (move relative to end of file, usually negative, although 1718 many platforms allow seeking beyond the end of a file). 1719 1720 Note that not all file objects are seekable. 1721 """ 1722 if isinstance(pos, float): 1723 raise TypeError('an integer is required') 1724 self._checkClosed() 1725 return os.lseek(self._fd, pos, whence) 1726 1727 def tell(self): 1728 """tell() -> int. Current file position. 1729 1730 Can raise OSError for non seekable files.""" 1731 self._checkClosed() 1732 return os.lseek(self._fd, 0, SEEK_CUR) 1733 1734 def truncate(self, size=None): 1735 """Truncate the file to at most size bytes. 1736 1737 Size defaults to the current file position, as returned by tell(). 1738 The current file position is changed to the value of size. 1739 """ 1740 self._checkClosed() 1741 self._checkWritable() 1742 if size is None: 1743 size = self.tell() 1744 os.ftruncate(self._fd, size) 1745 return size 1746 1747 def close(self): 1748 """Close the file. 1749 1750 A closed file cannot be used for further I/O operations. close() may be 1751 called more than once without error. 1752 """ 1753 if not self.closed: 1754 try: 1755 if self._closefd: 1756 os.close(self._fd) 1757 finally: 1758 super().close() 1759 1760 def seekable(self): 1761 """True if file supports random-access.""" 1762 self._checkClosed() 1763 if self._seekable is None: 1764 try: 1765 self.tell() 1766 except OSError: 1767 self._seekable = False 1768 else: 1769 self._seekable = True 1770 return self._seekable 1771 1772 def readable(self): 1773 """True if file was opened in a read mode.""" 1774 self._checkClosed() 1775 return self._readable 1776 1777 def writable(self): 1778 """True if file was opened in a write mode.""" 1779 self._checkClosed() 1780 return self._writable 1781 1782 def fileno(self): 1783 """Return the underlying file descriptor (an integer).""" 1784 self._checkClosed() 1785 return self._fd 1786 1787 def isatty(self): 1788 """True if the file is connected to a TTY device.""" 1789 self._checkClosed() 1790 return os.isatty(self._fd) 1791 1792 @property 1793 def closefd(self): 1794 """True if the file descriptor will be closed by close().""" 1795 return self._closefd 1796 1797 @property 1798 def mode(self): 1799 """String giving the file mode""" 1800 if self._created: 1801 if self._readable: 1802 return 'xb+' 1803 else: 1804 return 'xb' 1805 elif self._appending: 1806 if self._readable: 1807 return 'ab+' 1808 else: 1809 return 'ab' 1810 elif self._readable: 1811 if self._writable: 1812 return 'rb+' 1813 else: 1814 return 'rb' 1815 else: 1816 return 'wb' 1817 1818 1819class TextIOBase(IOBase): 1820 1821 """Base class for text I/O. 1822 1823 This class provides a character and line based interface to stream 1824 I/O. There is no public constructor. 1825 """ 1826 1827 def read(self, size=-1): 1828 """Read at most size characters from stream, where size is an int. 1829 1830 Read from underlying buffer until we have size characters or we hit EOF. 1831 If size is negative or omitted, read until EOF. 1832 1833 Returns a string. 1834 """ 1835 self._unsupported("read") 1836 1837 def write(self, s): 1838 """Write string s to stream and returning an int.""" 1839 self._unsupported("write") 1840 1841 def truncate(self, pos=None): 1842 """Truncate size to pos, where pos is an int.""" 1843 self._unsupported("truncate") 1844 1845 def readline(self): 1846 """Read until newline or EOF. 1847 1848 Returns an empty string if EOF is hit immediately. 1849 """ 1850 self._unsupported("readline") 1851 1852 def detach(self): 1853 """ 1854 Separate the underlying buffer from the TextIOBase and return it. 1855 1856 After the underlying buffer has been detached, the TextIO is in an 1857 unusable state. 1858 """ 1859 self._unsupported("detach") 1860 1861 @property 1862 def encoding(self): 1863 """Subclasses should override.""" 1864 return None 1865 1866 @property 1867 def newlines(self): 1868 """Line endings translated so far. 1869 1870 Only line endings translated during reading are considered. 1871 1872 Subclasses should override. 1873 """ 1874 return None 1875 1876 @property 1877 def errors(self): 1878 """Error setting of the decoder or encoder. 1879 1880 Subclasses should override.""" 1881 return None 1882 1883io.TextIOBase.register(TextIOBase) 1884 1885 1886class IncrementalNewlineDecoder(codecs.IncrementalDecoder): 1887 r"""Codec used when reading a file in universal newlines mode. It wraps 1888 another incremental decoder, translating \r\n and \r into \n. It also 1889 records the types of newlines encountered. When used with 1890 translate=False, it ensures that the newline sequence is returned in 1891 one piece. 1892 """ 1893 def __init__(self, decoder, translate, errors='strict'): 1894 codecs.IncrementalDecoder.__init__(self, errors=errors) 1895 self.translate = translate 1896 self.decoder = decoder 1897 self.seennl = 0 1898 self.pendingcr = False 1899 1900 def decode(self, input, final=False): 1901 # decode input (with the eventual \r from a previous pass) 1902 if self.decoder is None: 1903 output = input 1904 else: 1905 output = self.decoder.decode(input, final=final) 1906 if self.pendingcr and (output or final): 1907 output = "\r" + output 1908 self.pendingcr = False 1909 1910 # retain last \r even when not translating data: 1911 # then readline() is sure to get \r\n in one pass 1912 if output.endswith("\r") and not final: 1913 output = output[:-1] 1914 self.pendingcr = True 1915 1916 # Record which newlines are read 1917 crlf = output.count('\r\n') 1918 cr = output.count('\r') - crlf 1919 lf = output.count('\n') - crlf 1920 self.seennl |= (lf and self._LF) | (cr and self._CR) \ 1921 | (crlf and self._CRLF) 1922 1923 if self.translate: 1924 if crlf: 1925 output = output.replace("\r\n", "\n") 1926 if cr: 1927 output = output.replace("\r", "\n") 1928 1929 return output 1930 1931 def getstate(self): 1932 if self.decoder is None: 1933 buf = b"" 1934 flag = 0 1935 else: 1936 buf, flag = self.decoder.getstate() 1937 flag <<= 1 1938 if self.pendingcr: 1939 flag |= 1 1940 return buf, flag 1941 1942 def setstate(self, state): 1943 buf, flag = state 1944 self.pendingcr = bool(flag & 1) 1945 if self.decoder is not None: 1946 self.decoder.setstate((buf, flag >> 1)) 1947 1948 def reset(self): 1949 self.seennl = 0 1950 self.pendingcr = False 1951 if self.decoder is not None: 1952 self.decoder.reset() 1953 1954 _LF = 1 1955 _CR = 2 1956 _CRLF = 4 1957 1958 @property 1959 def newlines(self): 1960 return (None, 1961 "\n", 1962 "\r", 1963 ("\r", "\n"), 1964 "\r\n", 1965 ("\n", "\r\n"), 1966 ("\r", "\r\n"), 1967 ("\r", "\n", "\r\n") 1968 )[self.seennl] 1969 1970 1971class TextIOWrapper(TextIOBase): 1972 1973 r"""Character and line based layer over a BufferedIOBase object, buffer. 1974 1975 encoding gives the name of the encoding that the stream will be 1976 decoded or encoded with. It defaults to locale.getpreferredencoding(False). 1977 1978 errors determines the strictness of encoding and decoding (see the 1979 codecs.register) and defaults to "strict". 1980 1981 newline can be None, '', '\n', '\r', or '\r\n'. It controls the 1982 handling of line endings. If it is None, universal newlines is 1983 enabled. With this enabled, on input, the lines endings '\n', '\r', 1984 or '\r\n' are translated to '\n' before being returned to the 1985 caller. Conversely, on output, '\n' is translated to the system 1986 default line separator, os.linesep. If newline is any other of its 1987 legal values, that newline becomes the newline when the file is read 1988 and it is returned untranslated. On output, '\n' is converted to the 1989 newline. 1990 1991 If line_buffering is True, a call to flush is implied when a call to 1992 write contains a newline character. 1993 """ 1994 1995 _CHUNK_SIZE = 2048 1996 1997 # Initialize _buffer as soon as possible since it's used by __del__() 1998 # which calls close() 1999 _buffer = None 2000 2001 # The write_through argument has no effect here since this 2002 # implementation always writes through. The argument is present only 2003 # so that the signature can match the signature of the C version. 2004 def __init__(self, buffer, encoding=None, errors=None, newline=None, 2005 line_buffering=False, write_through=False): 2006 self._check_newline(newline) 2007 if encoding is None: 2008 try: 2009 encoding = os.device_encoding(buffer.fileno()) 2010 except (AttributeError, UnsupportedOperation): 2011 pass 2012 if encoding is None: 2013 try: 2014 import locale 2015 except ImportError: 2016 # Importing locale may fail if Python is being built 2017 encoding = "ascii" 2018 else: 2019 encoding = locale.getpreferredencoding(False) 2020 2021 if not isinstance(encoding, str): 2022 raise ValueError("invalid encoding: %r" % encoding) 2023 2024 if not codecs.lookup(encoding)._is_text_encoding: 2025 msg = ("%r is not a text encoding; " 2026 "use codecs.open() to handle arbitrary codecs") 2027 raise LookupError(msg % encoding) 2028 2029 if errors is None: 2030 errors = "strict" 2031 else: 2032 if not isinstance(errors, str): 2033 raise ValueError("invalid errors: %r" % errors) 2034 if _CHECK_ERRORS: 2035 codecs.lookup_error(errors) 2036 2037 self._buffer = buffer 2038 self._decoded_chars = '' # buffer for text returned from decoder 2039 self._decoded_chars_used = 0 # offset into _decoded_chars for read() 2040 self._snapshot = None # info for reconstructing decoder state 2041 self._seekable = self._telling = self.buffer.seekable() 2042 self._has_read1 = hasattr(self.buffer, 'read1') 2043 self._configure(encoding, errors, newline, 2044 line_buffering, write_through) 2045 2046 def _check_newline(self, newline): 2047 if newline is not None and not isinstance(newline, str): 2048 raise TypeError("illegal newline type: %r" % (type(newline),)) 2049 if newline not in (None, "", "\n", "\r", "\r\n"): 2050 raise ValueError("illegal newline value: %r" % (newline,)) 2051 2052 def _configure(self, encoding=None, errors=None, newline=None, 2053 line_buffering=False, write_through=False): 2054 self._encoding = encoding 2055 self._errors = errors 2056 self._encoder = None 2057 self._decoder = None 2058 self._b2cratio = 0.0 2059 2060 self._readuniversal = not newline 2061 self._readtranslate = newline is None 2062 self._readnl = newline 2063 self._writetranslate = newline != '' 2064 self._writenl = newline or os.linesep 2065 2066 self._line_buffering = line_buffering 2067 self._write_through = write_through 2068 2069 # don't write a BOM in the middle of a file 2070 if self._seekable and self.writable(): 2071 position = self.buffer.tell() 2072 if position != 0: 2073 try: 2074 self._get_encoder().setstate(0) 2075 except LookupError: 2076 # Sometimes the encoder doesn't exist 2077 pass 2078 2079 # self._snapshot is either None, or a tuple (dec_flags, next_input) 2080 # where dec_flags is the second (integer) item of the decoder state 2081 # and next_input is the chunk of input bytes that comes next after the 2082 # snapshot point. We use this to reconstruct decoder states in tell(). 2083 2084 # Naming convention: 2085 # - "bytes_..." for integer variables that count input bytes 2086 # - "chars_..." for integer variables that count decoded characters 2087 2088 def __repr__(self): 2089 result = "<{}.{}".format(self.__class__.__module__, 2090 self.__class__.__qualname__) 2091 try: 2092 name = self.name 2093 except AttributeError: 2094 pass 2095 else: 2096 result += " name={0!r}".format(name) 2097 try: 2098 mode = self.mode 2099 except AttributeError: 2100 pass 2101 else: 2102 result += " mode={0!r}".format(mode) 2103 return result + " encoding={0!r}>".format(self.encoding) 2104 2105 @property 2106 def encoding(self): 2107 return self._encoding 2108 2109 @property 2110 def errors(self): 2111 return self._errors 2112 2113 @property 2114 def line_buffering(self): 2115 return self._line_buffering 2116 2117 @property 2118 def write_through(self): 2119 return self._write_through 2120 2121 @property 2122 def buffer(self): 2123 return self._buffer 2124 2125 def reconfigure(self, *, 2126 encoding=None, errors=None, newline=Ellipsis, 2127 line_buffering=None, write_through=None): 2128 """Reconfigure the text stream with new parameters. 2129 2130 This also flushes the stream. 2131 """ 2132 if (self._decoder is not None 2133 and (encoding is not None or errors is not None 2134 or newline is not Ellipsis)): 2135 raise UnsupportedOperation( 2136 "It is not possible to set the encoding or newline of stream " 2137 "after the first read") 2138 2139 if errors is None: 2140 if encoding is None: 2141 errors = self._errors 2142 else: 2143 errors = 'strict' 2144 elif not isinstance(errors, str): 2145 raise TypeError("invalid errors: %r" % errors) 2146 2147 if encoding is None: 2148 encoding = self._encoding 2149 else: 2150 if not isinstance(encoding, str): 2151 raise TypeError("invalid encoding: %r" % encoding) 2152 2153 if newline is Ellipsis: 2154 newline = self._readnl 2155 self._check_newline(newline) 2156 2157 if line_buffering is None: 2158 line_buffering = self.line_buffering 2159 if write_through is None: 2160 write_through = self.write_through 2161 2162 self.flush() 2163 self._configure(encoding, errors, newline, 2164 line_buffering, write_through) 2165 2166 def seekable(self): 2167 if self.closed: 2168 raise ValueError("I/O operation on closed file.") 2169 return self._seekable 2170 2171 def readable(self): 2172 return self.buffer.readable() 2173 2174 def writable(self): 2175 return self.buffer.writable() 2176 2177 def flush(self): 2178 self.buffer.flush() 2179 self._telling = self._seekable 2180 2181 def close(self): 2182 if self.buffer is not None and not self.closed: 2183 try: 2184 self.flush() 2185 finally: 2186 self.buffer.close() 2187 2188 @property 2189 def closed(self): 2190 return self.buffer.closed 2191 2192 @property 2193 def name(self): 2194 return self.buffer.name 2195 2196 def fileno(self): 2197 return self.buffer.fileno() 2198 2199 def isatty(self): 2200 return self.buffer.isatty() 2201 2202 def write(self, s): 2203 'Write data, where s is a str' 2204 if self.closed: 2205 raise ValueError("write to closed file") 2206 if not isinstance(s, str): 2207 raise TypeError("can't write %s to text stream" % 2208 s.__class__.__name__) 2209 length = len(s) 2210 haslf = (self._writetranslate or self._line_buffering) and "\n" in s 2211 if haslf and self._writetranslate and self._writenl != "\n": 2212 s = s.replace("\n", self._writenl) 2213 encoder = self._encoder or self._get_encoder() 2214 # XXX What if we were just reading? 2215 b = encoder.encode(s) 2216 self.buffer.write(b) 2217 if self._line_buffering and (haslf or "\r" in s): 2218 self.flush() 2219 self._set_decoded_chars('') 2220 self._snapshot = None 2221 if self._decoder: 2222 self._decoder.reset() 2223 return length 2224 2225 def _get_encoder(self): 2226 make_encoder = codecs.getincrementalencoder(self._encoding) 2227 self._encoder = make_encoder(self._errors) 2228 return self._encoder 2229 2230 def _get_decoder(self): 2231 make_decoder = codecs.getincrementaldecoder(self._encoding) 2232 decoder = make_decoder(self._errors) 2233 if self._readuniversal: 2234 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) 2235 self._decoder = decoder 2236 return decoder 2237 2238 # The following three methods implement an ADT for _decoded_chars. 2239 # Text returned from the decoder is buffered here until the client 2240 # requests it by calling our read() or readline() method. 2241 def _set_decoded_chars(self, chars): 2242 """Set the _decoded_chars buffer.""" 2243 self._decoded_chars = chars 2244 self._decoded_chars_used = 0 2245 2246 def _get_decoded_chars(self, n=None): 2247 """Advance into the _decoded_chars buffer.""" 2248 offset = self._decoded_chars_used 2249 if n is None: 2250 chars = self._decoded_chars[offset:] 2251 else: 2252 chars = self._decoded_chars[offset:offset + n] 2253 self._decoded_chars_used += len(chars) 2254 return chars 2255 2256 def _rewind_decoded_chars(self, n): 2257 """Rewind the _decoded_chars buffer.""" 2258 if self._decoded_chars_used < n: 2259 raise AssertionError("rewind decoded_chars out of bounds") 2260 self._decoded_chars_used -= n 2261 2262 def _read_chunk(self): 2263 """ 2264 Read and decode the next chunk of data from the BufferedReader. 2265 """ 2266 2267 # The return value is True unless EOF was reached. The decoded 2268 # string is placed in self._decoded_chars (replacing its previous 2269 # value). The entire input chunk is sent to the decoder, though 2270 # some of it may remain buffered in the decoder, yet to be 2271 # converted. 2272 2273 if self._decoder is None: 2274 raise ValueError("no decoder") 2275 2276 if self._telling: 2277 # To prepare for tell(), we need to snapshot a point in the 2278 # file where the decoder's input buffer is empty. 2279 2280 dec_buffer, dec_flags = self._decoder.getstate() 2281 # Given this, we know there was a valid snapshot point 2282 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 2283 2284 # Read a chunk, decode it, and put the result in self._decoded_chars. 2285 if self._has_read1: 2286 input_chunk = self.buffer.read1(self._CHUNK_SIZE) 2287 else: 2288 input_chunk = self.buffer.read(self._CHUNK_SIZE) 2289 eof = not input_chunk 2290 decoded_chars = self._decoder.decode(input_chunk, eof) 2291 self._set_decoded_chars(decoded_chars) 2292 if decoded_chars: 2293 self._b2cratio = len(input_chunk) / len(self._decoded_chars) 2294 else: 2295 self._b2cratio = 0.0 2296 2297 if self._telling: 2298 # At the snapshot point, len(dec_buffer) bytes before the read, 2299 # the next input to be decoded is dec_buffer + input_chunk. 2300 self._snapshot = (dec_flags, dec_buffer + input_chunk) 2301 2302 return not eof 2303 2304 def _pack_cookie(self, position, dec_flags=0, 2305 bytes_to_feed=0, need_eof=False, chars_to_skip=0): 2306 # The meaning of a tell() cookie is: seek to position, set the 2307 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them 2308 # into the decoder with need_eof as the EOF flag, then skip 2309 # chars_to_skip characters of the decoded result. For most simple 2310 # decoders, tell() will often just give a byte offset in the file. 2311 return (position | (dec_flags<<64) | (bytes_to_feed<<128) | 2312 (chars_to_skip<<192) | bool(need_eof)<<256) 2313 2314 def _unpack_cookie(self, bigint): 2315 rest, position = divmod(bigint, 1<<64) 2316 rest, dec_flags = divmod(rest, 1<<64) 2317 rest, bytes_to_feed = divmod(rest, 1<<64) 2318 need_eof, chars_to_skip = divmod(rest, 1<<64) 2319 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip 2320 2321 def tell(self): 2322 if not self._seekable: 2323 raise UnsupportedOperation("underlying stream is not seekable") 2324 if not self._telling: 2325 raise OSError("telling position disabled by next() call") 2326 self.flush() 2327 position = self.buffer.tell() 2328 decoder = self._decoder 2329 if decoder is None or self._snapshot is None: 2330 if self._decoded_chars: 2331 # This should never happen. 2332 raise AssertionError("pending decoded text") 2333 return position 2334 2335 # Skip backward to the snapshot point (see _read_chunk). 2336 dec_flags, next_input = self._snapshot 2337 position -= len(next_input) 2338 2339 # How many decoded characters have been used up since the snapshot? 2340 chars_to_skip = self._decoded_chars_used 2341 if chars_to_skip == 0: 2342 # We haven't moved from the snapshot point. 2343 return self._pack_cookie(position, dec_flags) 2344 2345 # Starting from the snapshot position, we will walk the decoder 2346 # forward until it gives us enough decoded characters. 2347 saved_state = decoder.getstate() 2348 try: 2349 # Fast search for an acceptable start point, close to our 2350 # current pos. 2351 # Rationale: calling decoder.decode() has a large overhead 2352 # regardless of chunk size; we want the number of such calls to 2353 # be O(1) in most situations (common decoders, sensible input). 2354 # Actually, it will be exactly 1 for fixed-size codecs (all 2355 # 8-bit codecs, also UTF-16 and UTF-32). 2356 skip_bytes = int(self._b2cratio * chars_to_skip) 2357 skip_back = 1 2358 assert skip_bytes <= len(next_input) 2359 while skip_bytes > 0: 2360 decoder.setstate((b'', dec_flags)) 2361 # Decode up to temptative start point 2362 n = len(decoder.decode(next_input[:skip_bytes])) 2363 if n <= chars_to_skip: 2364 b, d = decoder.getstate() 2365 if not b: 2366 # Before pos and no bytes buffered in decoder => OK 2367 dec_flags = d 2368 chars_to_skip -= n 2369 break 2370 # Skip back by buffered amount and reset heuristic 2371 skip_bytes -= len(b) 2372 skip_back = 1 2373 else: 2374 # We're too far ahead, skip back a bit 2375 skip_bytes -= skip_back 2376 skip_back = skip_back * 2 2377 else: 2378 skip_bytes = 0 2379 decoder.setstate((b'', dec_flags)) 2380 2381 # Note our initial start point. 2382 start_pos = position + skip_bytes 2383 start_flags = dec_flags 2384 if chars_to_skip == 0: 2385 # We haven't moved from the start point. 2386 return self._pack_cookie(start_pos, start_flags) 2387 2388 # Feed the decoder one byte at a time. As we go, note the 2389 # nearest "safe start point" before the current location 2390 # (a point where the decoder has nothing buffered, so seek() 2391 # can safely start from there and advance to this location). 2392 bytes_fed = 0 2393 need_eof = False 2394 # Chars decoded since `start_pos` 2395 chars_decoded = 0 2396 for i in range(skip_bytes, len(next_input)): 2397 bytes_fed += 1 2398 chars_decoded += len(decoder.decode(next_input[i:i+1])) 2399 dec_buffer, dec_flags = decoder.getstate() 2400 if not dec_buffer and chars_decoded <= chars_to_skip: 2401 # Decoder buffer is empty, so this is a safe start point. 2402 start_pos += bytes_fed 2403 chars_to_skip -= chars_decoded 2404 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 2405 if chars_decoded >= chars_to_skip: 2406 break 2407 else: 2408 # We didn't get enough decoded data; signal EOF to get more. 2409 chars_decoded += len(decoder.decode(b'', final=True)) 2410 need_eof = True 2411 if chars_decoded < chars_to_skip: 2412 raise OSError("can't reconstruct logical file position") 2413 2414 # The returned cookie corresponds to the last safe start point. 2415 return self._pack_cookie( 2416 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) 2417 finally: 2418 decoder.setstate(saved_state) 2419 2420 def truncate(self, pos=None): 2421 self.flush() 2422 if pos is None: 2423 pos = self.tell() 2424 return self.buffer.truncate(pos) 2425 2426 def detach(self): 2427 if self.buffer is None: 2428 raise ValueError("buffer is already detached") 2429 self.flush() 2430 buffer = self._buffer 2431 self._buffer = None 2432 return buffer 2433 2434 def seek(self, cookie, whence=0): 2435 def _reset_encoder(position): 2436 """Reset the encoder (merely useful for proper BOM handling)""" 2437 try: 2438 encoder = self._encoder or self._get_encoder() 2439 except LookupError: 2440 # Sometimes the encoder doesn't exist 2441 pass 2442 else: 2443 if position != 0: 2444 encoder.setstate(0) 2445 else: 2446 encoder.reset() 2447 2448 if self.closed: 2449 raise ValueError("tell on closed file") 2450 if not self._seekable: 2451 raise UnsupportedOperation("underlying stream is not seekable") 2452 if whence == SEEK_CUR: 2453 if cookie != 0: 2454 raise UnsupportedOperation("can't do nonzero cur-relative seeks") 2455 # Seeking to the current position should attempt to 2456 # sync the underlying buffer with the current position. 2457 whence = 0 2458 cookie = self.tell() 2459 elif whence == SEEK_END: 2460 if cookie != 0: 2461 raise UnsupportedOperation("can't do nonzero end-relative seeks") 2462 self.flush() 2463 position = self.buffer.seek(0, whence) 2464 self._set_decoded_chars('') 2465 self._snapshot = None 2466 if self._decoder: 2467 self._decoder.reset() 2468 _reset_encoder(position) 2469 return position 2470 if whence != 0: 2471 raise ValueError("unsupported whence (%r)" % (whence,)) 2472 if cookie < 0: 2473 raise ValueError("negative seek position %r" % (cookie,)) 2474 self.flush() 2475 2476 # The strategy of seek() is to go back to the safe start point 2477 # and replay the effect of read(chars_to_skip) from there. 2478 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ 2479 self._unpack_cookie(cookie) 2480 2481 # Seek back to the safe start point. 2482 self.buffer.seek(start_pos) 2483 self._set_decoded_chars('') 2484 self._snapshot = None 2485 2486 # Restore the decoder to its state from the safe start point. 2487 if cookie == 0 and self._decoder: 2488 self._decoder.reset() 2489 elif self._decoder or dec_flags or chars_to_skip: 2490 self._decoder = self._decoder or self._get_decoder() 2491 self._decoder.setstate((b'', dec_flags)) 2492 self._snapshot = (dec_flags, b'') 2493 2494 if chars_to_skip: 2495 # Just like _read_chunk, feed the decoder and save a snapshot. 2496 input_chunk = self.buffer.read(bytes_to_feed) 2497 self._set_decoded_chars( 2498 self._decoder.decode(input_chunk, need_eof)) 2499 self._snapshot = (dec_flags, input_chunk) 2500 2501 # Skip chars_to_skip of the decoded characters. 2502 if len(self._decoded_chars) < chars_to_skip: 2503 raise OSError("can't restore logical file position") 2504 self._decoded_chars_used = chars_to_skip 2505 2506 _reset_encoder(cookie) 2507 return cookie 2508 2509 def read(self, size=None): 2510 self._checkReadable() 2511 if size is None: 2512 size = -1 2513 else: 2514 try: 2515 size_index = size.__index__ 2516 except AttributeError: 2517 raise TypeError(f"{size!r} is not an integer") 2518 else: 2519 size = size_index() 2520 decoder = self._decoder or self._get_decoder() 2521 if size < 0: 2522 # Read everything. 2523 result = (self._get_decoded_chars() + 2524 decoder.decode(self.buffer.read(), final=True)) 2525 self._set_decoded_chars('') 2526 self._snapshot = None 2527 return result 2528 else: 2529 # Keep reading chunks until we have size characters to return. 2530 eof = False 2531 result = self._get_decoded_chars(size) 2532 while len(result) < size and not eof: 2533 eof = not self._read_chunk() 2534 result += self._get_decoded_chars(size - len(result)) 2535 return result 2536 2537 def __next__(self): 2538 self._telling = False 2539 line = self.readline() 2540 if not line: 2541 self._snapshot = None 2542 self._telling = self._seekable 2543 raise StopIteration 2544 return line 2545 2546 def readline(self, size=None): 2547 if self.closed: 2548 raise ValueError("read from closed file") 2549 if size is None: 2550 size = -1 2551 else: 2552 try: 2553 size_index = size.__index__ 2554 except AttributeError: 2555 raise TypeError(f"{size!r} is not an integer") 2556 else: 2557 size = size_index() 2558 2559 # Grab all the decoded text (we will rewind any extra bits later). 2560 line = self._get_decoded_chars() 2561 2562 start = 0 2563 # Make the decoder if it doesn't already exist. 2564 if not self._decoder: 2565 self._get_decoder() 2566 2567 pos = endpos = None 2568 while True: 2569 if self._readtranslate: 2570 # Newlines are already translated, only search for \n 2571 pos = line.find('\n', start) 2572 if pos >= 0: 2573 endpos = pos + 1 2574 break 2575 else: 2576 start = len(line) 2577 2578 elif self._readuniversal: 2579 # Universal newline search. Find any of \r, \r\n, \n 2580 # The decoder ensures that \r\n are not split in two pieces 2581 2582 # In C we'd look for these in parallel of course. 2583 nlpos = line.find("\n", start) 2584 crpos = line.find("\r", start) 2585 if crpos == -1: 2586 if nlpos == -1: 2587 # Nothing found 2588 start = len(line) 2589 else: 2590 # Found \n 2591 endpos = nlpos + 1 2592 break 2593 elif nlpos == -1: 2594 # Found lone \r 2595 endpos = crpos + 1 2596 break 2597 elif nlpos < crpos: 2598 # Found \n 2599 endpos = nlpos + 1 2600 break 2601 elif nlpos == crpos + 1: 2602 # Found \r\n 2603 endpos = crpos + 2 2604 break 2605 else: 2606 # Found \r 2607 endpos = crpos + 1 2608 break 2609 else: 2610 # non-universal 2611 pos = line.find(self._readnl) 2612 if pos >= 0: 2613 endpos = pos + len(self._readnl) 2614 break 2615 2616 if size >= 0 and len(line) >= size: 2617 endpos = size # reached length size 2618 break 2619 2620 # No line ending seen yet - get more data' 2621 while self._read_chunk(): 2622 if self._decoded_chars: 2623 break 2624 if self._decoded_chars: 2625 line += self._get_decoded_chars() 2626 else: 2627 # end of file 2628 self._set_decoded_chars('') 2629 self._snapshot = None 2630 return line 2631 2632 if size >= 0 and endpos > size: 2633 endpos = size # don't exceed size 2634 2635 # Rewind _decoded_chars to just after the line ending we found. 2636 self._rewind_decoded_chars(len(line) - endpos) 2637 return line[:endpos] 2638 2639 @property 2640 def newlines(self): 2641 return self._decoder.newlines if self._decoder else None 2642 2643 2644class StringIO(TextIOWrapper): 2645 """Text I/O implementation using an in-memory buffer. 2646 2647 The initial_value argument sets the value of object. The newline 2648 argument is like the one of TextIOWrapper's constructor. 2649 """ 2650 2651 def __init__(self, initial_value="", newline="\n"): 2652 super(StringIO, self).__init__(BytesIO(), 2653 encoding="utf-8", 2654 errors="surrogatepass", 2655 newline=newline) 2656 # Issue #5645: make universal newlines semantics the same as in the 2657 # C version, even under Windows. 2658 if newline is None: 2659 self._writetranslate = False 2660 if initial_value is not None: 2661 if not isinstance(initial_value, str): 2662 raise TypeError("initial_value must be str or None, not {0}" 2663 .format(type(initial_value).__name__)) 2664 self.write(initial_value) 2665 self.seek(0) 2666 2667 def getvalue(self): 2668 self.flush() 2669 decoder = self._decoder or self._get_decoder() 2670 old_state = decoder.getstate() 2671 decoder.reset() 2672 try: 2673 return decoder.decode(self.buffer.getvalue(), final=True) 2674 finally: 2675 decoder.setstate(old_state) 2676 2677 def __repr__(self): 2678 # TextIOWrapper tells the encoding in its repr. In StringIO, 2679 # that's an implementation detail. 2680 return object.__repr__(self) 2681 2682 @property 2683 def errors(self): 2684 return None 2685 2686 @property 2687 def encoding(self): 2688 return None 2689 2690 def detach(self): 2691 # This doesn't make sense on StringIO. 2692 self._unsupported("detach") 2693