1 /*
2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 
4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 
6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8 
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "structmember.h"
12 #include "_iomodule.h"
13 
14 /*[clinic input]
15 module _io
16 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
17 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
18 [clinic start generated code]*/
19 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
20 
21 _Py_IDENTIFIER(close);
22 _Py_IDENTIFIER(_dealloc_warn);
23 _Py_IDENTIFIER(decode);
24 _Py_IDENTIFIER(fileno);
25 _Py_IDENTIFIER(flush);
26 _Py_IDENTIFIER(getpreferredencoding);
27 _Py_IDENTIFIER(isatty);
28 _Py_IDENTIFIER(mode);
29 _Py_IDENTIFIER(name);
30 _Py_IDENTIFIER(raw);
31 _Py_IDENTIFIER(read);
32 _Py_IDENTIFIER(readable);
33 _Py_IDENTIFIER(replace);
34 _Py_IDENTIFIER(reset);
35 _Py_IDENTIFIER(seek);
36 _Py_IDENTIFIER(seekable);
37 _Py_IDENTIFIER(setstate);
38 _Py_IDENTIFIER(strict);
39 _Py_IDENTIFIER(tell);
40 _Py_IDENTIFIER(writable);
41 
42 /* TextIOBase */
43 
44 PyDoc_STRVAR(textiobase_doc,
45     "Base class for text I/O.\n"
46     "\n"
47     "This class provides a character and line based interface to stream\n"
48     "I/O. There is no readinto method because Python's character strings\n"
49     "are immutable. There is no public constructor.\n"
50     );
51 
52 static PyObject *
_unsupported(const char * message)53 _unsupported(const char *message)
54 {
55     _PyIO_State *state = IO_STATE();
56     if (state != NULL)
57         PyErr_SetString(state->unsupported_operation, message);
58     return NULL;
59 }
60 
61 PyDoc_STRVAR(textiobase_detach_doc,
62     "Separate the underlying buffer from the TextIOBase and return it.\n"
63     "\n"
64     "After the underlying buffer has been detached, the TextIO is in an\n"
65     "unusable state.\n"
66     );
67 
68 static PyObject *
textiobase_detach(PyObject * self)69 textiobase_detach(PyObject *self)
70 {
71     return _unsupported("detach");
72 }
73 
74 PyDoc_STRVAR(textiobase_read_doc,
75     "Read at most n characters from stream.\n"
76     "\n"
77     "Read from underlying buffer until we have n characters or we hit EOF.\n"
78     "If n is negative or omitted, read until EOF.\n"
79     );
80 
81 static PyObject *
textiobase_read(PyObject * self,PyObject * args)82 textiobase_read(PyObject *self, PyObject *args)
83 {
84     return _unsupported("read");
85 }
86 
87 PyDoc_STRVAR(textiobase_readline_doc,
88     "Read until newline or EOF.\n"
89     "\n"
90     "Returns an empty string if EOF is hit immediately.\n"
91     );
92 
93 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)94 textiobase_readline(PyObject *self, PyObject *args)
95 {
96     return _unsupported("readline");
97 }
98 
99 PyDoc_STRVAR(textiobase_write_doc,
100     "Write string to stream.\n"
101     "Returns the number of characters written (which is always equal to\n"
102     "the length of the string).\n"
103     );
104 
105 static PyObject *
textiobase_write(PyObject * self,PyObject * args)106 textiobase_write(PyObject *self, PyObject *args)
107 {
108     return _unsupported("write");
109 }
110 
111 PyDoc_STRVAR(textiobase_encoding_doc,
112     "Encoding of the text stream.\n"
113     "\n"
114     "Subclasses should override.\n"
115     );
116 
117 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)118 textiobase_encoding_get(PyObject *self, void *context)
119 {
120     Py_RETURN_NONE;
121 }
122 
123 PyDoc_STRVAR(textiobase_newlines_doc,
124     "Line endings translated so far.\n"
125     "\n"
126     "Only line endings translated during reading are considered.\n"
127     "\n"
128     "Subclasses should override.\n"
129     );
130 
131 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)132 textiobase_newlines_get(PyObject *self, void *context)
133 {
134     Py_RETURN_NONE;
135 }
136 
137 PyDoc_STRVAR(textiobase_errors_doc,
138     "The error setting of the decoder or encoder.\n"
139     "\n"
140     "Subclasses should override.\n"
141     );
142 
143 static PyObject *
textiobase_errors_get(PyObject * self,void * context)144 textiobase_errors_get(PyObject *self, void *context)
145 {
146     Py_RETURN_NONE;
147 }
148 
149 
150 static PyMethodDef textiobase_methods[] = {
151     {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
152     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
153     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
154     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
155     {NULL, NULL}
156 };
157 
158 static PyGetSetDef textiobase_getset[] = {
159     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
160     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
161     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
162     {NULL}
163 };
164 
165 PyTypeObject PyTextIOBase_Type = {
166     PyVarObject_HEAD_INIT(NULL, 0)
167     "_io._TextIOBase",          /*tp_name*/
168     0,                          /*tp_basicsize*/
169     0,                          /*tp_itemsize*/
170     0,                          /*tp_dealloc*/
171     0,                          /*tp_print*/
172     0,                          /*tp_getattr*/
173     0,                          /*tp_setattr*/
174     0,                          /*tp_compare */
175     0,                          /*tp_repr*/
176     0,                          /*tp_as_number*/
177     0,                          /*tp_as_sequence*/
178     0,                          /*tp_as_mapping*/
179     0,                          /*tp_hash */
180     0,                          /*tp_call*/
181     0,                          /*tp_str*/
182     0,                          /*tp_getattro*/
183     0,                          /*tp_setattro*/
184     0,                          /*tp_as_buffer*/
185     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
186         | Py_TPFLAGS_HAVE_FINALIZE,  /*tp_flags*/
187     textiobase_doc,             /* tp_doc */
188     0,                          /* tp_traverse */
189     0,                          /* tp_clear */
190     0,                          /* tp_richcompare */
191     0,                          /* tp_weaklistoffset */
192     0,                          /* tp_iter */
193     0,                          /* tp_iternext */
194     textiobase_methods,         /* tp_methods */
195     0,                          /* tp_members */
196     textiobase_getset,          /* tp_getset */
197     &PyIOBase_Type,             /* tp_base */
198     0,                          /* tp_dict */
199     0,                          /* tp_descr_get */
200     0,                          /* tp_descr_set */
201     0,                          /* tp_dictoffset */
202     0,                          /* tp_init */
203     0,                          /* tp_alloc */
204     0,                          /* tp_new */
205     0,                          /* tp_free */
206     0,                          /* tp_is_gc */
207     0,                          /* tp_bases */
208     0,                          /* tp_mro */
209     0,                          /* tp_cache */
210     0,                          /* tp_subclasses */
211     0,                          /* tp_weaklist */
212     0,                          /* tp_del */
213     0,                          /* tp_version_tag */
214     0,                          /* tp_finalize */
215 };
216 
217 
218 /* IncrementalNewlineDecoder */
219 
220 typedef struct {
221     PyObject_HEAD
222     PyObject *decoder;
223     PyObject *errors;
224     unsigned int pendingcr: 1;
225     unsigned int translate: 1;
226     unsigned int seennl: 3;
227 } nldecoder_object;
228 
229 /*[clinic input]
230 _io.IncrementalNewlineDecoder.__init__
231     decoder: object
232     translate: int
233     errors: object(c_default="NULL") = "strict"
234 
235 Codec used when reading a file in universal newlines mode.
236 
237 It wraps another incremental decoder, translating \r\n and \r into \n.
238 It also records the types of newlines encountered.  When used with
239 translate=False, it ensures that the newline sequence is returned in
240 one piece. When used with decoder=None, it expects unicode strings as
241 decode input and translates newlines without first invoking an external
242 decoder.
243 [clinic start generated code]*/
244 
245 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)246 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
247                                             PyObject *decoder, int translate,
248                                             PyObject *errors)
249 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
250 {
251     self->decoder = decoder;
252     Py_INCREF(decoder);
253 
254     if (errors == NULL) {
255         self->errors = _PyUnicode_FromId(&PyId_strict);
256         if (self->errors == NULL)
257             return -1;
258     }
259     else {
260         self->errors = errors;
261     }
262     Py_INCREF(self->errors);
263 
264     self->translate = translate ? 1 : 0;
265     self->seennl = 0;
266     self->pendingcr = 0;
267 
268     return 0;
269 }
270 
271 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)272 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
273 {
274     Py_CLEAR(self->decoder);
275     Py_CLEAR(self->errors);
276     Py_TYPE(self)->tp_free((PyObject *)self);
277 }
278 
279 static int
check_decoded(PyObject * decoded)280 check_decoded(PyObject *decoded)
281 {
282     if (decoded == NULL)
283         return -1;
284     if (!PyUnicode_Check(decoded)) {
285         PyErr_Format(PyExc_TypeError,
286                      "decoder should return a string result, not '%.200s'",
287                      Py_TYPE(decoded)->tp_name);
288         Py_DECREF(decoded);
289         return -1;
290     }
291     if (PyUnicode_READY(decoded) < 0) {
292         Py_DECREF(decoded);
293         return -1;
294     }
295     return 0;
296 }
297 
298 #define SEEN_CR   1
299 #define SEEN_LF   2
300 #define SEEN_CRLF 4
301 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
302 
303 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)304 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
305                                     PyObject *input, int final)
306 {
307     PyObject *output;
308     Py_ssize_t output_len;
309     nldecoder_object *self = (nldecoder_object *) myself;
310 
311     if (self->decoder == NULL) {
312         PyErr_SetString(PyExc_ValueError,
313                         "IncrementalNewlineDecoder.__init__ not called");
314         return NULL;
315     }
316 
317     /* decode input (with the eventual \r from a previous pass) */
318     if (self->decoder != Py_None) {
319         output = PyObject_CallMethodObjArgs(self->decoder,
320             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
321     }
322     else {
323         output = input;
324         Py_INCREF(output);
325     }
326 
327     if (check_decoded(output) < 0)
328         return NULL;
329 
330     output_len = PyUnicode_GET_LENGTH(output);
331     if (self->pendingcr && (final || output_len > 0)) {
332         /* Prefix output with CR */
333         int kind;
334         PyObject *modified;
335         char *out;
336 
337         modified = PyUnicode_New(output_len + 1,
338                                  PyUnicode_MAX_CHAR_VALUE(output));
339         if (modified == NULL)
340             goto error;
341         kind = PyUnicode_KIND(modified);
342         out = PyUnicode_DATA(modified);
343         PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
344         memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
345         Py_DECREF(output);
346         output = modified; /* output remains ready */
347         self->pendingcr = 0;
348         output_len++;
349     }
350 
351     /* retain last \r even when not translating data:
352      * then readline() is sure to get \r\n in one pass
353      */
354     if (!final) {
355         if (output_len > 0
356             && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
357         {
358             PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
359             if (modified == NULL)
360                 goto error;
361             Py_DECREF(output);
362             output = modified;
363             self->pendingcr = 1;
364         }
365     }
366 
367     /* Record which newlines are read and do newline translation if desired,
368        all in one pass. */
369     {
370         void *in_str;
371         Py_ssize_t len;
372         int seennl = self->seennl;
373         int only_lf = 0;
374         int kind;
375 
376         in_str = PyUnicode_DATA(output);
377         len = PyUnicode_GET_LENGTH(output);
378         kind = PyUnicode_KIND(output);
379 
380         if (len == 0)
381             return output;
382 
383         /* If, up to now, newlines are consistently \n, do a quick check
384            for the \r *byte* with the libc's optimized memchr.
385            */
386         if (seennl == SEEN_LF || seennl == 0) {
387             only_lf = (memchr(in_str, '\r', kind * len) == NULL);
388         }
389 
390         if (only_lf) {
391             /* If not already seen, quick scan for a possible "\n" character.
392                (there's nothing else to be done, even when in translation mode)
393             */
394             if (seennl == 0 &&
395                 memchr(in_str, '\n', kind * len) != NULL) {
396                 if (kind == PyUnicode_1BYTE_KIND)
397                     seennl |= SEEN_LF;
398                 else {
399                     Py_ssize_t i = 0;
400                     for (;;) {
401                         Py_UCS4 c;
402                         /* Fast loop for non-control characters */
403                         while (PyUnicode_READ(kind, in_str, i) > '\n')
404                             i++;
405                         c = PyUnicode_READ(kind, in_str, i++);
406                         if (c == '\n') {
407                             seennl |= SEEN_LF;
408                             break;
409                         }
410                         if (i >= len)
411                             break;
412                     }
413                 }
414             }
415             /* Finished: we have scanned for newlines, and none of them
416                need translating */
417         }
418         else if (!self->translate) {
419             Py_ssize_t i = 0;
420             /* We have already seen all newline types, no need to scan again */
421             if (seennl == SEEN_ALL)
422                 goto endscan;
423             for (;;) {
424                 Py_UCS4 c;
425                 /* Fast loop for non-control characters */
426                 while (PyUnicode_READ(kind, in_str, i) > '\r')
427                     i++;
428                 c = PyUnicode_READ(kind, in_str, i++);
429                 if (c == '\n')
430                     seennl |= SEEN_LF;
431                 else if (c == '\r') {
432                     if (PyUnicode_READ(kind, in_str, i) == '\n') {
433                         seennl |= SEEN_CRLF;
434                         i++;
435                     }
436                     else
437                         seennl |= SEEN_CR;
438                 }
439                 if (i >= len)
440                     break;
441                 if (seennl == SEEN_ALL)
442                     break;
443             }
444         endscan:
445             ;
446         }
447         else {
448             void *translated;
449             int kind = PyUnicode_KIND(output);
450             void *in_str = PyUnicode_DATA(output);
451             Py_ssize_t in, out;
452             /* XXX: Previous in-place translation here is disabled as
453                resizing is not possible anymore */
454             /* We could try to optimize this so that we only do a copy
455                when there is something to translate. On the other hand,
456                we already know there is a \r byte, so chances are high
457                that something needs to be done. */
458             translated = PyMem_Malloc(kind * len);
459             if (translated == NULL) {
460                 PyErr_NoMemory();
461                 goto error;
462             }
463             in = out = 0;
464             for (;;) {
465                 Py_UCS4 c;
466                 /* Fast loop for non-control characters */
467                 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
468                     PyUnicode_WRITE(kind, translated, out++, c);
469                 if (c == '\n') {
470                     PyUnicode_WRITE(kind, translated, out++, c);
471                     seennl |= SEEN_LF;
472                     continue;
473                 }
474                 if (c == '\r') {
475                     if (PyUnicode_READ(kind, in_str, in) == '\n') {
476                         in++;
477                         seennl |= SEEN_CRLF;
478                     }
479                     else
480                         seennl |= SEEN_CR;
481                     PyUnicode_WRITE(kind, translated, out++, '\n');
482                     continue;
483                 }
484                 if (in > len)
485                     break;
486                 PyUnicode_WRITE(kind, translated, out++, c);
487             }
488             Py_DECREF(output);
489             output = PyUnicode_FromKindAndData(kind, translated, out);
490             PyMem_Free(translated);
491             if (!output)
492                 return NULL;
493         }
494         self->seennl |= seennl;
495     }
496 
497     return output;
498 
499   error:
500     Py_DECREF(output);
501     return NULL;
502 }
503 
504 /*[clinic input]
505 _io.IncrementalNewlineDecoder.decode
506     input: object
507     final: bool(accept={int}) = False
508 [clinic start generated code]*/
509 
510 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)511 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
512                                           PyObject *input, int final)
513 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
514 {
515     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
516 }
517 
518 /*[clinic input]
519 _io.IncrementalNewlineDecoder.getstate
520 [clinic start generated code]*/
521 
522 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)523 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
524 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
525 {
526     PyObject *buffer;
527     unsigned long long flag;
528 
529     if (self->decoder != Py_None) {
530         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
531            _PyIO_str_getstate, NULL);
532         if (state == NULL)
533             return NULL;
534         if (!PyTuple_Check(state)) {
535             PyErr_SetString(PyExc_TypeError,
536                             "illegal decoder state");
537             Py_DECREF(state);
538             return NULL;
539         }
540         if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
541                               &buffer, &flag))
542         {
543             Py_DECREF(state);
544             return NULL;
545         }
546         Py_INCREF(buffer);
547         Py_DECREF(state);
548     }
549     else {
550         buffer = PyBytes_FromString("");
551         flag = 0;
552     }
553     flag <<= 1;
554     if (self->pendingcr)
555         flag |= 1;
556     return Py_BuildValue("NK", buffer, flag);
557 }
558 
559 /*[clinic input]
560 _io.IncrementalNewlineDecoder.setstate
561     state: object
562     /
563 [clinic start generated code]*/
564 
565 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)566 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
567                                        PyObject *state)
568 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
569 {
570     PyObject *buffer;
571     unsigned long long flag;
572 
573     if (!PyTuple_Check(state)) {
574         PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
575         return NULL;
576     }
577     if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
578                           &buffer, &flag))
579     {
580         return NULL;
581     }
582 
583     self->pendingcr = (int) (flag & 1);
584     flag >>= 1;
585 
586     if (self->decoder != Py_None)
587         return _PyObject_CallMethodId(self->decoder,
588                                       &PyId_setstate, "((OK))", buffer, flag);
589     else
590         Py_RETURN_NONE;
591 }
592 
593 /*[clinic input]
594 _io.IncrementalNewlineDecoder.reset
595 [clinic start generated code]*/
596 
597 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)598 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
599 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
600 {
601     self->seennl = 0;
602     self->pendingcr = 0;
603     if (self->decoder != Py_None)
604         return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
605     else
606         Py_RETURN_NONE;
607 }
608 
609 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)610 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
611 {
612     switch (self->seennl) {
613     case SEEN_CR:
614         return PyUnicode_FromString("\r");
615     case SEEN_LF:
616         return PyUnicode_FromString("\n");
617     case SEEN_CRLF:
618         return PyUnicode_FromString("\r\n");
619     case SEEN_CR | SEEN_LF:
620         return Py_BuildValue("ss", "\r", "\n");
621     case SEEN_CR | SEEN_CRLF:
622         return Py_BuildValue("ss", "\r", "\r\n");
623     case SEEN_LF | SEEN_CRLF:
624         return Py_BuildValue("ss", "\n", "\r\n");
625     case SEEN_CR | SEEN_LF | SEEN_CRLF:
626         return Py_BuildValue("sss", "\r", "\n", "\r\n");
627     default:
628         Py_RETURN_NONE;
629    }
630 
631 }
632 
633 /* TextIOWrapper */
634 
635 typedef PyObject *
636         (*encodefunc_t)(PyObject *, PyObject *);
637 
638 typedef struct
639 {
640     PyObject_HEAD
641     int ok; /* initialized? */
642     int detached;
643     Py_ssize_t chunk_size;
644     PyObject *buffer;
645     PyObject *encoding;
646     PyObject *encoder;
647     PyObject *decoder;
648     PyObject *readnl;
649     PyObject *errors;
650     const char *writenl; /* ASCII-encoded; NULL stands for \n */
651     char line_buffering;
652     char write_through;
653     char readuniversal;
654     char readtranslate;
655     char writetranslate;
656     char seekable;
657     char has_read1;
658     char telling;
659     char finalizing;
660     /* Specialized encoding func (see below) */
661     encodefunc_t encodefunc;
662     /* Whether or not it's the start of the stream */
663     char encoding_start_of_stream;
664 
665     /* Reads and writes are internally buffered in order to speed things up.
666        However, any read will first flush the write buffer if itsn't empty.
667 
668        Please also note that text to be written is first encoded before being
669        buffered. This is necessary so that encoding errors are immediately
670        reported to the caller, but it unfortunately means that the
671        IncrementalEncoder (whose encode() method is always written in Python)
672        becomes a bottleneck for small writes.
673     */
674     PyObject *decoded_chars;       /* buffer for text returned from decoder */
675     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
676     PyObject *pending_bytes;       /* list of bytes objects waiting to be
677                                       written, or NULL */
678     Py_ssize_t pending_bytes_count;
679 
680     /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
681      * dec_flags is the second (integer) item of the decoder state and
682      * next_input is the chunk of input bytes that comes next after the
683      * snapshot point.  We use this to reconstruct decoder states in tell().
684      */
685     PyObject *snapshot;
686     /* Bytes-to-characters ratio for the current chunk. Serves as input for
687        the heuristic in tell(). */
688     double b2cratio;
689 
690     /* Cache raw object if it's a FileIO object */
691     PyObject *raw;
692 
693     PyObject *weakreflist;
694     PyObject *dict;
695 } textio;
696 
697 static void
698 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
699 
700 /* A couple of specialized cases in order to bypass the slow incremental
701    encoding methods for the most popular encodings. */
702 
703 static PyObject *
ascii_encode(textio * self,PyObject * text)704 ascii_encode(textio *self, PyObject *text)
705 {
706     return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
707 }
708 
709 static PyObject *
utf16be_encode(textio * self,PyObject * text)710 utf16be_encode(textio *self, PyObject *text)
711 {
712     return _PyUnicode_EncodeUTF16(text,
713                                   PyUnicode_AsUTF8(self->errors), 1);
714 }
715 
716 static PyObject *
utf16le_encode(textio * self,PyObject * text)717 utf16le_encode(textio *self, PyObject *text)
718 {
719     return _PyUnicode_EncodeUTF16(text,
720                                   PyUnicode_AsUTF8(self->errors), -1);
721 }
722 
723 static PyObject *
utf16_encode(textio * self,PyObject * text)724 utf16_encode(textio *self, PyObject *text)
725 {
726     if (!self->encoding_start_of_stream) {
727         /* Skip the BOM and use native byte ordering */
728 #if PY_BIG_ENDIAN
729         return utf16be_encode(self, text);
730 #else
731         return utf16le_encode(self, text);
732 #endif
733     }
734     return _PyUnicode_EncodeUTF16(text,
735                                   PyUnicode_AsUTF8(self->errors), 0);
736 }
737 
738 static PyObject *
utf32be_encode(textio * self,PyObject * text)739 utf32be_encode(textio *self, PyObject *text)
740 {
741     return _PyUnicode_EncodeUTF32(text,
742                                   PyUnicode_AsUTF8(self->errors), 1);
743 }
744 
745 static PyObject *
utf32le_encode(textio * self,PyObject * text)746 utf32le_encode(textio *self, PyObject *text)
747 {
748     return _PyUnicode_EncodeUTF32(text,
749                                   PyUnicode_AsUTF8(self->errors), -1);
750 }
751 
752 static PyObject *
utf32_encode(textio * self,PyObject * text)753 utf32_encode(textio *self, PyObject *text)
754 {
755     if (!self->encoding_start_of_stream) {
756         /* Skip the BOM and use native byte ordering */
757 #if PY_BIG_ENDIAN
758         return utf32be_encode(self, text);
759 #else
760         return utf32le_encode(self, text);
761 #endif
762     }
763     return _PyUnicode_EncodeUTF32(text,
764                                   PyUnicode_AsUTF8(self->errors), 0);
765 }
766 
767 static PyObject *
utf8_encode(textio * self,PyObject * text)768 utf8_encode(textio *self, PyObject *text)
769 {
770     return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
771 }
772 
773 static PyObject *
latin1_encode(textio * self,PyObject * text)774 latin1_encode(textio *self, PyObject *text)
775 {
776     return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
777 }
778 
779 /* Map normalized encoding names onto the specialized encoding funcs */
780 
781 typedef struct {
782     const char *name;
783     encodefunc_t encodefunc;
784 } encodefuncentry;
785 
786 static const encodefuncentry encodefuncs[] = {
787     {"ascii",       (encodefunc_t) ascii_encode},
788     {"iso8859-1",   (encodefunc_t) latin1_encode},
789     {"utf-8",       (encodefunc_t) utf8_encode},
790     {"utf-16-be",   (encodefunc_t) utf16be_encode},
791     {"utf-16-le",   (encodefunc_t) utf16le_encode},
792     {"utf-16",      (encodefunc_t) utf16_encode},
793     {"utf-32-be",   (encodefunc_t) utf32be_encode},
794     {"utf-32-le",   (encodefunc_t) utf32le_encode},
795     {"utf-32",      (encodefunc_t) utf32_encode},
796     {NULL, NULL}
797 };
798 
799 static int
validate_newline(const char * newline)800 validate_newline(const char *newline)
801 {
802     if (newline && newline[0] != '\0'
803         && !(newline[0] == '\n' && newline[1] == '\0')
804         && !(newline[0] == '\r' && newline[1] == '\0')
805         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
806         PyErr_Format(PyExc_ValueError,
807                      "illegal newline value: %s", newline);
808         return -1;
809     }
810     return 0;
811 }
812 
813 static int
set_newline(textio * self,const char * newline)814 set_newline(textio *self, const char *newline)
815 {
816     PyObject *old = self->readnl;
817     if (newline == NULL) {
818         self->readnl = NULL;
819     }
820     else {
821         self->readnl = PyUnicode_FromString(newline);
822         if (self->readnl == NULL) {
823             self->readnl = old;
824             return -1;
825         }
826     }
827     self->readuniversal = (newline == NULL || newline[0] == '\0');
828     self->readtranslate = (newline == NULL);
829     self->writetranslate = (newline == NULL || newline[0] != '\0');
830     if (!self->readuniversal && self->readnl != NULL) {
831         // validate_newline() accepts only ASCII newlines.
832         assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
833         self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
834         if (strcmp(self->writenl, "\n") == 0) {
835             self->writenl = NULL;
836         }
837     }
838     else {
839 #ifdef MS_WINDOWS
840         self->writenl = "\r\n";
841 #else
842         self->writenl = NULL;
843 #endif
844     }
845     Py_XDECREF(old);
846     return 0;
847 }
848 
849 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)850 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
851                            const char *errors)
852 {
853     PyObject *res;
854     int r;
855 
856     res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
857     if (res == NULL)
858         return -1;
859 
860     r = PyObject_IsTrue(res);
861     Py_DECREF(res);
862     if (r == -1)
863         return -1;
864 
865     if (r != 1)
866         return 0;
867 
868     Py_CLEAR(self->decoder);
869     self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
870     if (self->decoder == NULL)
871         return -1;
872 
873     if (self->readuniversal) {
874         PyObject *incrementalDecoder = PyObject_CallFunction(
875             (PyObject *)&PyIncrementalNewlineDecoder_Type,
876             "Oi", self->decoder, (int)self->readtranslate);
877         if (incrementalDecoder == NULL)
878             return -1;
879         Py_CLEAR(self->decoder);
880         self->decoder = incrementalDecoder;
881     }
882 
883     return 0;
884 }
885 
886 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)887 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
888 {
889     PyObject *chars;
890 
891     if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
892         chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
893     else
894         chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
895                                            eof ? Py_True : Py_False, NULL);
896 
897     if (check_decoded(chars) < 0)
898         // check_decoded already decreases refcount
899         return NULL;
900 
901     return chars;
902 }
903 
904 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)905 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
906                            const char *errors)
907 {
908     PyObject *res;
909     int r;
910 
911     res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
912     if (res == NULL)
913         return -1;
914 
915     r = PyObject_IsTrue(res);
916     Py_DECREF(res);
917     if (r == -1)
918         return -1;
919 
920     if (r != 1)
921         return 0;
922 
923     Py_CLEAR(self->encoder);
924     self->encodefunc = NULL;
925     self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
926     if (self->encoder == NULL)
927         return -1;
928 
929     /* Get the normalized named of the codec */
930     if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
931         return -1;
932     }
933     if (res != NULL && PyUnicode_Check(res)) {
934         const encodefuncentry *e = encodefuncs;
935         while (e->name != NULL) {
936             if (_PyUnicode_EqualToASCIIString(res, e->name)) {
937                 self->encodefunc = e->encodefunc;
938                 break;
939             }
940             e++;
941         }
942     }
943     Py_XDECREF(res);
944 
945     return 0;
946 }
947 
948 static int
_textiowrapper_fix_encoder_state(textio * self)949 _textiowrapper_fix_encoder_state(textio *self)
950 {
951     if (!self->seekable || !self->encoder) {
952         return 0;
953     }
954 
955     self->encoding_start_of_stream = 1;
956 
957     PyObject *cookieObj = PyObject_CallMethodObjArgs(
958         self->buffer, _PyIO_str_tell, NULL);
959     if (cookieObj == NULL) {
960         return -1;
961     }
962 
963     int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
964     Py_DECREF(cookieObj);
965     if (cmp < 0) {
966         return -1;
967     }
968 
969     if (cmp == 0) {
970         self->encoding_start_of_stream = 0;
971         PyObject *res = PyObject_CallMethodObjArgs(
972             self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
973         if (res == NULL) {
974             return -1;
975         }
976         Py_DECREF(res);
977     }
978 
979     return 0;
980 }
981 
982 /*[clinic input]
983 _io.TextIOWrapper.__init__
984     buffer: object
985     encoding: str(accept={str, NoneType}) = NULL
986     errors: object = None
987     newline: str(accept={str, NoneType}) = NULL
988     line_buffering: bool(accept={int}) = False
989     write_through: bool(accept={int}) = False
990 
991 Character and line based layer over a BufferedIOBase object, buffer.
992 
993 encoding gives the name of the encoding that the stream will be
994 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
995 
996 errors determines the strictness of encoding and decoding (see
997 help(codecs.Codec) or the documentation for codecs.register) and
998 defaults to "strict".
999 
1000 newline controls how line endings are handled. It can be None, '',
1001 '\n', '\r', and '\r\n'.  It works as follows:
1002 
1003 * On input, if newline is None, universal newlines mode is
1004   enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1005   these are translated into '\n' before being returned to the
1006   caller. If it is '', universal newline mode is enabled, but line
1007   endings are returned to the caller untranslated. If it has any of
1008   the other legal values, input lines are only terminated by the given
1009   string, and the line ending is returned to the caller untranslated.
1010 
1011 * On output, if newline is None, any '\n' characters written are
1012   translated to the system default line separator, os.linesep. If
1013   newline is '' or '\n', no translation takes place. If newline is any
1014   of the other legal values, any '\n' characters written are translated
1015   to the given string.
1016 
1017 If line_buffering is True, a call to flush is implied when a call to
1018 write contains a newline character.
1019 [clinic start generated code]*/
1020 
1021 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1022 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1023                                 const char *encoding, PyObject *errors,
1024                                 const char *newline, int line_buffering,
1025                                 int write_through)
1026 /*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/
1027 {
1028     PyObject *raw, *codec_info = NULL;
1029     _PyIO_State *state = NULL;
1030     PyObject *res;
1031     int r;
1032 
1033     self->ok = 0;
1034     self->detached = 0;
1035 
1036     if (errors == Py_None) {
1037         errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
1038         if (errors == NULL) {
1039             return -1;
1040         }
1041     }
1042     else if (!PyUnicode_Check(errors)) {
1043         // Check 'errors' argument here because Argument Clinic doesn't support
1044         // 'str(accept={str, NoneType})' converter.
1045         PyErr_Format(
1046             PyExc_TypeError,
1047             "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1048             errors->ob_type->tp_name);
1049         return -1;
1050     }
1051 
1052     if (validate_newline(newline) < 0) {
1053         return -1;
1054     }
1055 
1056     Py_CLEAR(self->buffer);
1057     Py_CLEAR(self->encoding);
1058     Py_CLEAR(self->encoder);
1059     Py_CLEAR(self->decoder);
1060     Py_CLEAR(self->readnl);
1061     Py_CLEAR(self->decoded_chars);
1062     Py_CLEAR(self->pending_bytes);
1063     Py_CLEAR(self->snapshot);
1064     Py_CLEAR(self->errors);
1065     Py_CLEAR(self->raw);
1066     self->decoded_chars_used = 0;
1067     self->pending_bytes_count = 0;
1068     self->encodefunc = NULL;
1069     self->b2cratio = 0.0;
1070 
1071     if (encoding == NULL) {
1072         /* Try os.device_encoding(fileno) */
1073         PyObject *fileno;
1074         state = IO_STATE();
1075         if (state == NULL)
1076             goto error;
1077         fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
1078         /* Ignore only AttributeError and UnsupportedOperation */
1079         if (fileno == NULL) {
1080             if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1081                 PyErr_ExceptionMatches(state->unsupported_operation)) {
1082                 PyErr_Clear();
1083             }
1084             else {
1085                 goto error;
1086             }
1087         }
1088         else {
1089             int fd = _PyLong_AsInt(fileno);
1090             Py_DECREF(fileno);
1091             if (fd == -1 && PyErr_Occurred()) {
1092                 goto error;
1093             }
1094 
1095             self->encoding = _Py_device_encoding(fd);
1096             if (self->encoding == NULL)
1097                 goto error;
1098             else if (!PyUnicode_Check(self->encoding))
1099                 Py_CLEAR(self->encoding);
1100         }
1101     }
1102     if (encoding == NULL && self->encoding == NULL) {
1103         PyObject *locale_module = _PyIO_get_locale_module(state);
1104         if (locale_module == NULL)
1105             goto catch_ImportError;
1106         self->encoding = _PyObject_CallMethodIdObjArgs(
1107             locale_module, &PyId_getpreferredencoding, Py_False, NULL);
1108         Py_DECREF(locale_module);
1109         if (self->encoding == NULL) {
1110           catch_ImportError:
1111             /*
1112              Importing locale can raise an ImportError because of
1113              _functools, and locale.getpreferredencoding can raise an
1114              ImportError if _locale is not available.  These will happen
1115              during module building.
1116             */
1117             if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1118                 PyErr_Clear();
1119                 self->encoding = PyUnicode_FromString("ascii");
1120             }
1121             else
1122                 goto error;
1123         }
1124         else if (!PyUnicode_Check(self->encoding))
1125             Py_CLEAR(self->encoding);
1126     }
1127     if (self->encoding != NULL) {
1128         encoding = PyUnicode_AsUTF8(self->encoding);
1129         if (encoding == NULL)
1130             goto error;
1131     }
1132     else if (encoding != NULL) {
1133         self->encoding = PyUnicode_FromString(encoding);
1134         if (self->encoding == NULL)
1135             goto error;
1136     }
1137     else {
1138         PyErr_SetString(PyExc_OSError,
1139                         "could not determine default encoding");
1140         goto error;
1141     }
1142 
1143     /* Check we have been asked for a real text encoding */
1144     codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1145     if (codec_info == NULL) {
1146         Py_CLEAR(self->encoding);
1147         goto error;
1148     }
1149 
1150     /* XXX: Failures beyond this point have the potential to leak elements
1151      * of the partially constructed object (like self->encoding)
1152      */
1153 
1154     Py_INCREF(errors);
1155     self->errors = errors;
1156     self->chunk_size = 8192;
1157     self->line_buffering = line_buffering;
1158     self->write_through = write_through;
1159     if (set_newline(self, newline) < 0) {
1160         goto error;
1161     }
1162 
1163     self->buffer = buffer;
1164     Py_INCREF(buffer);
1165 
1166     /* Build the decoder object */
1167     if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1168         goto error;
1169 
1170     /* Build the encoder object */
1171     if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1172         goto error;
1173 
1174     /* Finished sorting out the codec details */
1175     Py_CLEAR(codec_info);
1176 
1177     if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1178         Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1179         Py_TYPE(buffer) == &PyBufferedRandom_Type)
1180     {
1181         if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1182             goto error;
1183         /* Cache the raw FileIO object to speed up 'closed' checks */
1184         if (raw != NULL) {
1185             if (Py_TYPE(raw) == &PyFileIO_Type)
1186                 self->raw = raw;
1187             else
1188                 Py_DECREF(raw);
1189         }
1190     }
1191 
1192     res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
1193     if (res == NULL)
1194         goto error;
1195     r = PyObject_IsTrue(res);
1196     Py_DECREF(res);
1197     if (r < 0)
1198         goto error;
1199     self->seekable = self->telling = r;
1200 
1201     r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1202     if (r < 0) {
1203         goto error;
1204     }
1205     Py_XDECREF(res);
1206     self->has_read1 = r;
1207 
1208     self->encoding_start_of_stream = 0;
1209     if (_textiowrapper_fix_encoder_state(self) < 0) {
1210         goto error;
1211     }
1212 
1213     self->ok = 1;
1214     return 0;
1215 
1216   error:
1217     Py_XDECREF(codec_info);
1218     return -1;
1219 }
1220 
1221 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1222  * -1 on error.
1223  */
1224 static int
convert_optional_bool(PyObject * obj,int default_value)1225 convert_optional_bool(PyObject *obj, int default_value)
1226 {
1227     long v;
1228     if (obj == Py_None) {
1229         v = default_value;
1230     }
1231     else {
1232         v = PyLong_AsLong(obj);
1233         if (v == -1 && PyErr_Occurred())
1234             return -1;
1235     }
1236     return v != 0;
1237 }
1238 
1239 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1240 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1241                               PyObject *errors, int newline_changed)
1242 {
1243     /* Use existing settings where new settings are not specified */
1244     if (encoding == Py_None && errors == Py_None && !newline_changed) {
1245         return 0;  // no change
1246     }
1247 
1248     if (encoding == Py_None) {
1249         encoding = self->encoding;
1250         if (errors == Py_None) {
1251             errors = self->errors;
1252         }
1253     }
1254     else if (errors == Py_None) {
1255         errors = _PyUnicode_FromId(&PyId_strict);
1256         if (errors == NULL) {
1257             return -1;
1258         }
1259     }
1260 
1261     const char *c_errors = PyUnicode_AsUTF8(errors);
1262     if (c_errors == NULL) {
1263         return -1;
1264     }
1265 
1266     // Create new encoder & decoder
1267     PyObject *codec_info = _PyCodec_LookupTextEncoding(
1268         PyUnicode_AsUTF8(encoding), "codecs.open()");
1269     if (codec_info == NULL) {
1270         return -1;
1271     }
1272     if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1273             _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1274         Py_DECREF(codec_info);
1275         return -1;
1276     }
1277     Py_DECREF(codec_info);
1278 
1279     Py_INCREF(encoding);
1280     Py_INCREF(errors);
1281     Py_SETREF(self->encoding, encoding);
1282     Py_SETREF(self->errors, errors);
1283 
1284     return _textiowrapper_fix_encoder_state(self);
1285 }
1286 
1287 /*[clinic input]
1288 _io.TextIOWrapper.reconfigure
1289     *
1290     encoding: object = None
1291     errors: object = None
1292     newline as newline_obj: object(c_default="NULL") = None
1293     line_buffering as line_buffering_obj: object = None
1294     write_through as write_through_obj: object = None
1295 
1296 Reconfigure the text stream with new parameters.
1297 
1298 This also does an implicit stream flush.
1299 
1300 [clinic start generated code]*/
1301 
1302 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1303 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1304                                    PyObject *errors, PyObject *newline_obj,
1305                                    PyObject *line_buffering_obj,
1306                                    PyObject *write_through_obj)
1307 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1308 {
1309     int line_buffering;
1310     int write_through;
1311     const char *newline = NULL;
1312 
1313     /* Check if something is in the read buffer */
1314     if (self->decoded_chars != NULL) {
1315         if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1316             _unsupported("It is not possible to set the encoding or newline "
1317                          "of stream after the first read");
1318             return NULL;
1319         }
1320     }
1321 
1322     if (newline_obj != NULL && newline_obj != Py_None) {
1323         newline = PyUnicode_AsUTF8(newline_obj);
1324         if (newline == NULL || validate_newline(newline) < 0) {
1325             return NULL;
1326         }
1327     }
1328 
1329     line_buffering = convert_optional_bool(line_buffering_obj,
1330                                            self->line_buffering);
1331     write_through = convert_optional_bool(write_through_obj,
1332                                           self->write_through);
1333     if (line_buffering < 0 || write_through < 0) {
1334         return NULL;
1335     }
1336 
1337     PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1338     if (res == NULL) {
1339         return NULL;
1340     }
1341     Py_DECREF(res);
1342     self->b2cratio = 0;
1343 
1344     if (newline_obj != NULL && set_newline(self, newline) < 0) {
1345         return NULL;
1346     }
1347 
1348     if (textiowrapper_change_encoding(
1349             self, encoding, errors, newline_obj != NULL) < 0) {
1350         return NULL;
1351     }
1352 
1353     self->line_buffering = line_buffering;
1354     self->write_through = write_through;
1355     Py_RETURN_NONE;
1356 }
1357 
1358 static int
textiowrapper_clear(textio * self)1359 textiowrapper_clear(textio *self)
1360 {
1361     self->ok = 0;
1362     Py_CLEAR(self->buffer);
1363     Py_CLEAR(self->encoding);
1364     Py_CLEAR(self->encoder);
1365     Py_CLEAR(self->decoder);
1366     Py_CLEAR(self->readnl);
1367     Py_CLEAR(self->decoded_chars);
1368     Py_CLEAR(self->pending_bytes);
1369     Py_CLEAR(self->snapshot);
1370     Py_CLEAR(self->errors);
1371     Py_CLEAR(self->raw);
1372 
1373     Py_CLEAR(self->dict);
1374     return 0;
1375 }
1376 
1377 static void
textiowrapper_dealloc(textio * self)1378 textiowrapper_dealloc(textio *self)
1379 {
1380     self->finalizing = 1;
1381     if (_PyIOBase_finalize((PyObject *) self) < 0)
1382         return;
1383     self->ok = 0;
1384     _PyObject_GC_UNTRACK(self);
1385     if (self->weakreflist != NULL)
1386         PyObject_ClearWeakRefs((PyObject *)self);
1387     textiowrapper_clear(self);
1388     Py_TYPE(self)->tp_free((PyObject *)self);
1389 }
1390 
1391 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1392 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1393 {
1394     Py_VISIT(self->buffer);
1395     Py_VISIT(self->encoding);
1396     Py_VISIT(self->encoder);
1397     Py_VISIT(self->decoder);
1398     Py_VISIT(self->readnl);
1399     Py_VISIT(self->decoded_chars);
1400     Py_VISIT(self->pending_bytes);
1401     Py_VISIT(self->snapshot);
1402     Py_VISIT(self->errors);
1403     Py_VISIT(self->raw);
1404 
1405     Py_VISIT(self->dict);
1406     return 0;
1407 }
1408 
1409 static PyObject *
1410 textiowrapper_closed_get(textio *self, void *context);
1411 
1412 /* This macro takes some shortcuts to make the common case faster. */
1413 #define CHECK_CLOSED(self) \
1414     do { \
1415         int r; \
1416         PyObject *_res; \
1417         if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1418             if (self->raw != NULL) \
1419                 r = _PyFileIO_closed(self->raw); \
1420             else { \
1421                 _res = textiowrapper_closed_get(self, NULL); \
1422                 if (_res == NULL) \
1423                     return NULL; \
1424                 r = PyObject_IsTrue(_res); \
1425                 Py_DECREF(_res); \
1426                 if (r < 0) \
1427                     return NULL; \
1428             } \
1429             if (r > 0) { \
1430                 PyErr_SetString(PyExc_ValueError, \
1431                                 "I/O operation on closed file."); \
1432                 return NULL; \
1433             } \
1434         } \
1435         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1436             return NULL; \
1437     } while (0)
1438 
1439 #define CHECK_INITIALIZED(self) \
1440     if (self->ok <= 0) { \
1441         PyErr_SetString(PyExc_ValueError, \
1442             "I/O operation on uninitialized object"); \
1443         return NULL; \
1444     }
1445 
1446 #define CHECK_ATTACHED(self) \
1447     CHECK_INITIALIZED(self); \
1448     if (self->detached) { \
1449         PyErr_SetString(PyExc_ValueError, \
1450              "underlying buffer has been detached"); \
1451         return NULL; \
1452     }
1453 
1454 #define CHECK_ATTACHED_INT(self) \
1455     if (self->ok <= 0) { \
1456         PyErr_SetString(PyExc_ValueError, \
1457             "I/O operation on uninitialized object"); \
1458         return -1; \
1459     } else if (self->detached) { \
1460         PyErr_SetString(PyExc_ValueError, \
1461              "underlying buffer has been detached"); \
1462         return -1; \
1463     }
1464 
1465 
1466 /*[clinic input]
1467 _io.TextIOWrapper.detach
1468 [clinic start generated code]*/
1469 
1470 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1471 _io_TextIOWrapper_detach_impl(textio *self)
1472 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1473 {
1474     PyObject *buffer, *res;
1475     CHECK_ATTACHED(self);
1476     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1477     if (res == NULL)
1478         return NULL;
1479     Py_DECREF(res);
1480     buffer = self->buffer;
1481     self->buffer = NULL;
1482     self->detached = 1;
1483     return buffer;
1484 }
1485 
1486 /* Flush the internal write buffer. This doesn't explicitly flush the
1487    underlying buffered object, though. */
1488 static int
_textiowrapper_writeflush(textio * self)1489 _textiowrapper_writeflush(textio *self)
1490 {
1491     PyObject *pending, *b, *ret;
1492 
1493     if (self->pending_bytes == NULL)
1494         return 0;
1495 
1496     pending = self->pending_bytes;
1497     Py_INCREF(pending);
1498     self->pending_bytes_count = 0;
1499     Py_CLEAR(self->pending_bytes);
1500 
1501     b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1502     Py_DECREF(pending);
1503     if (b == NULL)
1504         return -1;
1505     ret = NULL;
1506     do {
1507         ret = PyObject_CallMethodObjArgs(self->buffer,
1508                                          _PyIO_str_write, b, NULL);
1509     } while (ret == NULL && _PyIO_trap_eintr());
1510     Py_DECREF(b);
1511     if (ret == NULL)
1512         return -1;
1513     Py_DECREF(ret);
1514     return 0;
1515 }
1516 
1517 /*[clinic input]
1518 _io.TextIOWrapper.write
1519     text: unicode
1520     /
1521 [clinic start generated code]*/
1522 
1523 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1524 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1525 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1526 {
1527     PyObject *ret;
1528     PyObject *b;
1529     Py_ssize_t textlen;
1530     int haslf = 0;
1531     int needflush = 0, text_needflush = 0;
1532 
1533     if (PyUnicode_READY(text) == -1)
1534         return NULL;
1535 
1536     CHECK_ATTACHED(self);
1537     CHECK_CLOSED(self);
1538 
1539     if (self->encoder == NULL)
1540         return _unsupported("not writable");
1541 
1542     Py_INCREF(text);
1543 
1544     textlen = PyUnicode_GET_LENGTH(text);
1545 
1546     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1547         if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1548             haslf = 1;
1549 
1550     if (haslf && self->writetranslate && self->writenl != NULL) {
1551         PyObject *newtext = _PyObject_CallMethodId(
1552             text, &PyId_replace, "ss", "\n", self->writenl);
1553         Py_DECREF(text);
1554         if (newtext == NULL)
1555             return NULL;
1556         text = newtext;
1557     }
1558 
1559     if (self->write_through)
1560         text_needflush = 1;
1561     if (self->line_buffering &&
1562         (haslf ||
1563          PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1564         needflush = 1;
1565 
1566     /* XXX What if we were just reading? */
1567     if (self->encodefunc != NULL) {
1568         b = (*self->encodefunc)((PyObject *) self, text);
1569         self->encoding_start_of_stream = 0;
1570     }
1571     else
1572         b = PyObject_CallMethodObjArgs(self->encoder,
1573                                        _PyIO_str_encode, text, NULL);
1574     Py_DECREF(text);
1575     if (b == NULL)
1576         return NULL;
1577     if (!PyBytes_Check(b)) {
1578         PyErr_Format(PyExc_TypeError,
1579                      "encoder should return a bytes object, not '%.200s'",
1580                      Py_TYPE(b)->tp_name);
1581         Py_DECREF(b);
1582         return NULL;
1583     }
1584 
1585     if (self->pending_bytes == NULL) {
1586         self->pending_bytes = PyList_New(0);
1587         if (self->pending_bytes == NULL) {
1588             Py_DECREF(b);
1589             return NULL;
1590         }
1591         self->pending_bytes_count = 0;
1592     }
1593     if (PyList_Append(self->pending_bytes, b) < 0) {
1594         Py_DECREF(b);
1595         return NULL;
1596     }
1597     self->pending_bytes_count += PyBytes_GET_SIZE(b);
1598     Py_DECREF(b);
1599     if (self->pending_bytes_count > self->chunk_size || needflush ||
1600         text_needflush) {
1601         if (_textiowrapper_writeflush(self) < 0)
1602             return NULL;
1603     }
1604 
1605     if (needflush) {
1606         ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1607         if (ret == NULL)
1608             return NULL;
1609         Py_DECREF(ret);
1610     }
1611 
1612     textiowrapper_set_decoded_chars(self, NULL);
1613     Py_CLEAR(self->snapshot);
1614 
1615     if (self->decoder) {
1616         ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
1617         if (ret == NULL)
1618             return NULL;
1619         Py_DECREF(ret);
1620     }
1621 
1622     return PyLong_FromSsize_t(textlen);
1623 }
1624 
1625 /* Steal a reference to chars and store it in the decoded_char buffer;
1626  */
1627 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1628 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1629 {
1630     Py_XSETREF(self->decoded_chars, chars);
1631     self->decoded_chars_used = 0;
1632 }
1633 
1634 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1635 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1636 {
1637     PyObject *chars;
1638     Py_ssize_t avail;
1639 
1640     if (self->decoded_chars == NULL)
1641         return PyUnicode_FromStringAndSize(NULL, 0);
1642 
1643     /* decoded_chars is guaranteed to be "ready". */
1644     avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1645              - self->decoded_chars_used);
1646 
1647     assert(avail >= 0);
1648 
1649     if (n < 0 || n > avail)
1650         n = avail;
1651 
1652     if (self->decoded_chars_used > 0 || n < avail) {
1653         chars = PyUnicode_Substring(self->decoded_chars,
1654                                     self->decoded_chars_used,
1655                                     self->decoded_chars_used + n);
1656         if (chars == NULL)
1657             return NULL;
1658     }
1659     else {
1660         chars = self->decoded_chars;
1661         Py_INCREF(chars);
1662     }
1663 
1664     self->decoded_chars_used += n;
1665     return chars;
1666 }
1667 
1668 /* Read and decode the next chunk of data from the BufferedReader.
1669  */
1670 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1671 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1672 {
1673     PyObject *dec_buffer = NULL;
1674     PyObject *dec_flags = NULL;
1675     PyObject *input_chunk = NULL;
1676     Py_buffer input_chunk_buf;
1677     PyObject *decoded_chars, *chunk_size;
1678     Py_ssize_t nbytes, nchars;
1679     int eof;
1680 
1681     /* The return value is True unless EOF was reached.  The decoded string is
1682      * placed in self._decoded_chars (replacing its previous value).  The
1683      * entire input chunk is sent to the decoder, though some of it may remain
1684      * buffered in the decoder, yet to be converted.
1685      */
1686 
1687     if (self->decoder == NULL) {
1688         _unsupported("not readable");
1689         return -1;
1690     }
1691 
1692     if (self->telling) {
1693         /* To prepare for tell(), we need to snapshot a point in the file
1694          * where the decoder's input buffer is empty.
1695          */
1696 
1697         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1698                                                      _PyIO_str_getstate, NULL);
1699         if (state == NULL)
1700             return -1;
1701         /* Given this, we know there was a valid snapshot point
1702          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1703          */
1704         if (!PyTuple_Check(state)) {
1705             PyErr_SetString(PyExc_TypeError,
1706                             "illegal decoder state");
1707             Py_DECREF(state);
1708             return -1;
1709         }
1710         if (!PyArg_ParseTuple(state,
1711                               "OO;illegal decoder state", &dec_buffer, &dec_flags))
1712         {
1713             Py_DECREF(state);
1714             return -1;
1715         }
1716 
1717         if (!PyBytes_Check(dec_buffer)) {
1718             PyErr_Format(PyExc_TypeError,
1719                          "illegal decoder state: the first item should be a "
1720                          "bytes object, not '%.200s'",
1721                          Py_TYPE(dec_buffer)->tp_name);
1722             Py_DECREF(state);
1723             return -1;
1724         }
1725         Py_INCREF(dec_buffer);
1726         Py_INCREF(dec_flags);
1727         Py_DECREF(state);
1728     }
1729 
1730     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1731     if (size_hint > 0) {
1732         size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1733     }
1734     chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1735     if (chunk_size == NULL)
1736         goto fail;
1737 
1738     input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1739         (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1740         chunk_size, NULL);
1741     Py_DECREF(chunk_size);
1742     if (input_chunk == NULL)
1743         goto fail;
1744 
1745     if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1746         PyErr_Format(PyExc_TypeError,
1747                      "underlying %s() should have returned a bytes-like object, "
1748                      "not '%.200s'", (self->has_read1 ? "read1": "read"),
1749                      Py_TYPE(input_chunk)->tp_name);
1750         goto fail;
1751     }
1752 
1753     nbytes = input_chunk_buf.len;
1754     eof = (nbytes == 0);
1755 
1756     decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1757     PyBuffer_Release(&input_chunk_buf);
1758     if (decoded_chars == NULL)
1759         goto fail;
1760 
1761     textiowrapper_set_decoded_chars(self, decoded_chars);
1762     nchars = PyUnicode_GET_LENGTH(decoded_chars);
1763     if (nchars > 0)
1764         self->b2cratio = (double) nbytes / nchars;
1765     else
1766         self->b2cratio = 0.0;
1767     if (nchars > 0)
1768         eof = 0;
1769 
1770     if (self->telling) {
1771         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1772          * next input to be decoded is dec_buffer + input_chunk.
1773          */
1774         PyObject *next_input = dec_buffer;
1775         PyBytes_Concat(&next_input, input_chunk);
1776         dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1777         if (next_input == NULL) {
1778             goto fail;
1779         }
1780         PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1781         if (snapshot == NULL) {
1782             dec_flags = NULL;
1783             goto fail;
1784         }
1785         Py_XSETREF(self->snapshot, snapshot);
1786     }
1787     Py_DECREF(input_chunk);
1788 
1789     return (eof == 0);
1790 
1791   fail:
1792     Py_XDECREF(dec_buffer);
1793     Py_XDECREF(dec_flags);
1794     Py_XDECREF(input_chunk);
1795     return -1;
1796 }
1797 
1798 /*[clinic input]
1799 _io.TextIOWrapper.read
1800     size as n: Py_ssize_t(accept={int, NoneType}) = -1
1801     /
1802 [clinic start generated code]*/
1803 
1804 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1805 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1806 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1807 {
1808     PyObject *result = NULL, *chunks = NULL;
1809 
1810     CHECK_ATTACHED(self);
1811     CHECK_CLOSED(self);
1812 
1813     if (self->decoder == NULL)
1814         return _unsupported("not readable");
1815 
1816     if (_textiowrapper_writeflush(self) < 0)
1817         return NULL;
1818 
1819     if (n < 0) {
1820         /* Read everything */
1821         PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
1822         PyObject *decoded;
1823         if (bytes == NULL)
1824             goto fail;
1825 
1826         if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1827             decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1828                                                           bytes, 1);
1829         else
1830             decoded = PyObject_CallMethodObjArgs(
1831                 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1832         Py_DECREF(bytes);
1833         if (check_decoded(decoded) < 0)
1834             goto fail;
1835 
1836         result = textiowrapper_get_decoded_chars(self, -1);
1837 
1838         if (result == NULL) {
1839             Py_DECREF(decoded);
1840             return NULL;
1841         }
1842 
1843         PyUnicode_AppendAndDel(&result, decoded);
1844         if (result == NULL)
1845             goto fail;
1846 
1847         textiowrapper_set_decoded_chars(self, NULL);
1848         Py_CLEAR(self->snapshot);
1849         return result;
1850     }
1851     else {
1852         int res = 1;
1853         Py_ssize_t remaining = n;
1854 
1855         result = textiowrapper_get_decoded_chars(self, n);
1856         if (result == NULL)
1857             goto fail;
1858         if (PyUnicode_READY(result) == -1)
1859             goto fail;
1860         remaining -= PyUnicode_GET_LENGTH(result);
1861 
1862         /* Keep reading chunks until we have n characters to return */
1863         while (remaining > 0) {
1864             res = textiowrapper_read_chunk(self, remaining);
1865             if (res < 0) {
1866                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1867                    when EINTR occurs so we needn't do it ourselves. */
1868                 if (_PyIO_trap_eintr()) {
1869                     continue;
1870                 }
1871                 goto fail;
1872             }
1873             if (res == 0)  /* EOF */
1874                 break;
1875             if (chunks == NULL) {
1876                 chunks = PyList_New(0);
1877                 if (chunks == NULL)
1878                     goto fail;
1879             }
1880             if (PyUnicode_GET_LENGTH(result) > 0 &&
1881                 PyList_Append(chunks, result) < 0)
1882                 goto fail;
1883             Py_DECREF(result);
1884             result = textiowrapper_get_decoded_chars(self, remaining);
1885             if (result == NULL)
1886                 goto fail;
1887             remaining -= PyUnicode_GET_LENGTH(result);
1888         }
1889         if (chunks != NULL) {
1890             if (result != NULL && PyList_Append(chunks, result) < 0)
1891                 goto fail;
1892             Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1893             if (result == NULL)
1894                 goto fail;
1895             Py_CLEAR(chunks);
1896         }
1897         return result;
1898     }
1899   fail:
1900     Py_XDECREF(result);
1901     Py_XDECREF(chunks);
1902     return NULL;
1903 }
1904 
1905 
1906 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
1907    that is to the NUL character. Otherwise the function will produce
1908    incorrect results. */
1909 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)1910 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
1911 {
1912     if (kind == PyUnicode_1BYTE_KIND) {
1913         assert(ch < 256);
1914         return (char *) memchr((void *) s, (char) ch, end - s);
1915     }
1916     for (;;) {
1917         while (PyUnicode_READ(kind, s, 0) > ch)
1918             s += kind;
1919         if (PyUnicode_READ(kind, s, 0) == ch)
1920             return s;
1921         if (s == end)
1922             return NULL;
1923         s += kind;
1924     }
1925 }
1926 
1927 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)1928 _PyIO_find_line_ending(
1929     int translated, int universal, PyObject *readnl,
1930     int kind, const char *start, const char *end, Py_ssize_t *consumed)
1931 {
1932     Py_ssize_t len = ((char*)end - (char*)start)/kind;
1933 
1934     if (translated) {
1935         /* Newlines are already translated, only search for \n */
1936         const char *pos = find_control_char(kind, start, end, '\n');
1937         if (pos != NULL)
1938             return (pos - start)/kind + 1;
1939         else {
1940             *consumed = len;
1941             return -1;
1942         }
1943     }
1944     else if (universal) {
1945         /* Universal newline search. Find any of \r, \r\n, \n
1946          * The decoder ensures that \r\n are not split in two pieces
1947          */
1948         const char *s = start;
1949         for (;;) {
1950             Py_UCS4 ch;
1951             /* Fast path for non-control chars. The loop always ends
1952                since the Unicode string is NUL-terminated. */
1953             while (PyUnicode_READ(kind, s, 0) > '\r')
1954                 s += kind;
1955             if (s >= end) {
1956                 *consumed = len;
1957                 return -1;
1958             }
1959             ch = PyUnicode_READ(kind, s, 0);
1960             s += kind;
1961             if (ch == '\n')
1962                 return (s - start)/kind;
1963             if (ch == '\r') {
1964                 if (PyUnicode_READ(kind, s, 0) == '\n')
1965                     return (s - start)/kind + 1;
1966                 else
1967                     return (s - start)/kind;
1968             }
1969         }
1970     }
1971     else {
1972         /* Non-universal mode. */
1973         Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1974         Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
1975         /* Assume that readnl is an ASCII character. */
1976         assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
1977         if (readnl_len == 1) {
1978             const char *pos = find_control_char(kind, start, end, nl[0]);
1979             if (pos != NULL)
1980                 return (pos - start)/kind + 1;
1981             *consumed = len;
1982             return -1;
1983         }
1984         else {
1985             const char *s = start;
1986             const char *e = end - (readnl_len - 1)*kind;
1987             const char *pos;
1988             if (e < s)
1989                 e = s;
1990             while (s < e) {
1991                 Py_ssize_t i;
1992                 const char *pos = find_control_char(kind, s, end, nl[0]);
1993                 if (pos == NULL || pos >= e)
1994                     break;
1995                 for (i = 1; i < readnl_len; i++) {
1996                     if (PyUnicode_READ(kind, pos, i) != nl[i])
1997                         break;
1998                 }
1999                 if (i == readnl_len)
2000                     return (pos - start)/kind + readnl_len;
2001                 s = pos + kind;
2002             }
2003             pos = find_control_char(kind, e, end, nl[0]);
2004             if (pos == NULL)
2005                 *consumed = len;
2006             else
2007                 *consumed = (pos - start)/kind;
2008             return -1;
2009         }
2010     }
2011 }
2012 
2013 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2014 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2015 {
2016     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2017     Py_ssize_t start, endpos, chunked, offset_to_buffer;
2018     int res;
2019 
2020     CHECK_CLOSED(self);
2021 
2022     if (_textiowrapper_writeflush(self) < 0)
2023         return NULL;
2024 
2025     chunked = 0;
2026 
2027     while (1) {
2028         char *ptr;
2029         Py_ssize_t line_len;
2030         int kind;
2031         Py_ssize_t consumed = 0;
2032 
2033         /* First, get some data if necessary */
2034         res = 1;
2035         while (!self->decoded_chars ||
2036                !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2037             res = textiowrapper_read_chunk(self, 0);
2038             if (res < 0) {
2039                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2040                    when EINTR occurs so we needn't do it ourselves. */
2041                 if (_PyIO_trap_eintr()) {
2042                     continue;
2043                 }
2044                 goto error;
2045             }
2046             if (res == 0)
2047                 break;
2048         }
2049         if (res == 0) {
2050             /* end of file */
2051             textiowrapper_set_decoded_chars(self, NULL);
2052             Py_CLEAR(self->snapshot);
2053             start = endpos = offset_to_buffer = 0;
2054             break;
2055         }
2056 
2057         if (remaining == NULL) {
2058             line = self->decoded_chars;
2059             start = self->decoded_chars_used;
2060             offset_to_buffer = 0;
2061             Py_INCREF(line);
2062         }
2063         else {
2064             assert(self->decoded_chars_used == 0);
2065             line = PyUnicode_Concat(remaining, self->decoded_chars);
2066             start = 0;
2067             offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2068             Py_CLEAR(remaining);
2069             if (line == NULL)
2070                 goto error;
2071             if (PyUnicode_READY(line) == -1)
2072                 goto error;
2073         }
2074 
2075         ptr = PyUnicode_DATA(line);
2076         line_len = PyUnicode_GET_LENGTH(line);
2077         kind = PyUnicode_KIND(line);
2078 
2079         endpos = _PyIO_find_line_ending(
2080             self->readtranslate, self->readuniversal, self->readnl,
2081             kind,
2082             ptr + kind * start,
2083             ptr + kind * line_len,
2084             &consumed);
2085         if (endpos >= 0) {
2086             endpos += start;
2087             if (limit >= 0 && (endpos - start) + chunked >= limit)
2088                 endpos = start + limit - chunked;
2089             break;
2090         }
2091 
2092         /* We can put aside up to `endpos` */
2093         endpos = consumed + start;
2094         if (limit >= 0 && (endpos - start) + chunked >= limit) {
2095             /* Didn't find line ending, but reached length limit */
2096             endpos = start + limit - chunked;
2097             break;
2098         }
2099 
2100         if (endpos > start) {
2101             /* No line ending seen yet - put aside current data */
2102             PyObject *s;
2103             if (chunks == NULL) {
2104                 chunks = PyList_New(0);
2105                 if (chunks == NULL)
2106                     goto error;
2107             }
2108             s = PyUnicode_Substring(line, start, endpos);
2109             if (s == NULL)
2110                 goto error;
2111             if (PyList_Append(chunks, s) < 0) {
2112                 Py_DECREF(s);
2113                 goto error;
2114             }
2115             chunked += PyUnicode_GET_LENGTH(s);
2116             Py_DECREF(s);
2117         }
2118         /* There may be some remaining bytes we'll have to prepend to the
2119            next chunk of data */
2120         if (endpos < line_len) {
2121             remaining = PyUnicode_Substring(line, endpos, line_len);
2122             if (remaining == NULL)
2123                 goto error;
2124         }
2125         Py_CLEAR(line);
2126         /* We have consumed the buffer */
2127         textiowrapper_set_decoded_chars(self, NULL);
2128     }
2129 
2130     if (line != NULL) {
2131         /* Our line ends in the current buffer */
2132         self->decoded_chars_used = endpos - offset_to_buffer;
2133         if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2134             PyObject *s = PyUnicode_Substring(line, start, endpos);
2135             Py_CLEAR(line);
2136             if (s == NULL)
2137                 goto error;
2138             line = s;
2139         }
2140     }
2141     if (remaining != NULL) {
2142         if (chunks == NULL) {
2143             chunks = PyList_New(0);
2144             if (chunks == NULL)
2145                 goto error;
2146         }
2147         if (PyList_Append(chunks, remaining) < 0)
2148             goto error;
2149         Py_CLEAR(remaining);
2150     }
2151     if (chunks != NULL) {
2152         if (line != NULL) {
2153             if (PyList_Append(chunks, line) < 0)
2154                 goto error;
2155             Py_DECREF(line);
2156         }
2157         line = PyUnicode_Join(_PyIO_empty_str, chunks);
2158         if (line == NULL)
2159             goto error;
2160         Py_CLEAR(chunks);
2161     }
2162     if (line == NULL) {
2163         Py_INCREF(_PyIO_empty_str);
2164         line = _PyIO_empty_str;
2165     }
2166 
2167     return line;
2168 
2169   error:
2170     Py_XDECREF(chunks);
2171     Py_XDECREF(remaining);
2172     Py_XDECREF(line);
2173     return NULL;
2174 }
2175 
2176 /*[clinic input]
2177 _io.TextIOWrapper.readline
2178     size: Py_ssize_t = -1
2179     /
2180 [clinic start generated code]*/
2181 
2182 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2183 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2184 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2185 {
2186     CHECK_ATTACHED(self);
2187     return _textiowrapper_readline(self, size);
2188 }
2189 
2190 /* Seek and Tell */
2191 
2192 typedef struct {
2193     Py_off_t start_pos;
2194     int dec_flags;
2195     int bytes_to_feed;
2196     int chars_to_skip;
2197     char need_eof;
2198 } cookie_type;
2199 
2200 /*
2201    To speed up cookie packing/unpacking, we store the fields in a temporary
2202    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2203    The following macros define at which offsets in the intermediary byte
2204    string the various CookieStruct fields will be stored.
2205  */
2206 
2207 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2208 
2209 #if PY_BIG_ENDIAN
2210 /* We want the least significant byte of start_pos to also be the least
2211    significant byte of the cookie, which means that in big-endian mode we
2212    must copy the fields in reverse order. */
2213 
2214 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2215 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2216 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2217 # define OFF_CHARS_TO_SKIP  (sizeof(char))
2218 # define OFF_NEED_EOF       0
2219 
2220 #else
2221 /* Little-endian mode: the least significant byte of start_pos will
2222    naturally end up the least significant byte of the cookie. */
2223 
2224 # define OFF_START_POS      0
2225 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2226 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2227 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2228 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2229 
2230 #endif
2231 
2232 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2233 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2234 {
2235     unsigned char buffer[COOKIE_BUF_LEN];
2236     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2237     if (cookieLong == NULL)
2238         return -1;
2239 
2240     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2241                             PY_LITTLE_ENDIAN, 0) < 0) {
2242         Py_DECREF(cookieLong);
2243         return -1;
2244     }
2245     Py_DECREF(cookieLong);
2246 
2247     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2248     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2249     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2250     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2251     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2252 
2253     return 0;
2254 }
2255 
2256 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2257 textiowrapper_build_cookie(cookie_type *cookie)
2258 {
2259     unsigned char buffer[COOKIE_BUF_LEN];
2260 
2261     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2262     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2263     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2264     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2265     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2266 
2267     return _PyLong_FromByteArray(buffer, sizeof(buffer),
2268                                  PY_LITTLE_ENDIAN, 0);
2269 }
2270 
2271 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2272 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2273 {
2274     PyObject *res;
2275     /* When seeking to the start of the stream, we call decoder.reset()
2276        rather than decoder.getstate().
2277        This is for a few decoders such as utf-16 for which the state value
2278        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2279        utf-16, that we are expecting a BOM).
2280     */
2281     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2282         res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2283     else
2284         res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2285                                      "((yi))", "", cookie->dec_flags);
2286     if (res == NULL)
2287         return -1;
2288     Py_DECREF(res);
2289     return 0;
2290 }
2291 
2292 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2293 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2294 {
2295     PyObject *res;
2296     if (start_of_stream) {
2297         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2298         self->encoding_start_of_stream = 1;
2299     }
2300     else {
2301         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2302                                          _PyLong_Zero, NULL);
2303         self->encoding_start_of_stream = 0;
2304     }
2305     if (res == NULL)
2306         return -1;
2307     Py_DECREF(res);
2308     return 0;
2309 }
2310 
2311 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2312 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2313 {
2314     /* Same as _textiowrapper_decoder_setstate() above. */
2315     return _textiowrapper_encoder_reset(
2316         self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2317 }
2318 
2319 /*[clinic input]
2320 _io.TextIOWrapper.seek
2321     cookie as cookieObj: object
2322     whence: int = 0
2323     /
2324 [clinic start generated code]*/
2325 
2326 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2327 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2328 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2329 {
2330     PyObject *posobj;
2331     cookie_type cookie;
2332     PyObject *res;
2333     int cmp;
2334     PyObject *snapshot;
2335 
2336     CHECK_ATTACHED(self);
2337     CHECK_CLOSED(self);
2338 
2339     Py_INCREF(cookieObj);
2340 
2341     if (!self->seekable) {
2342         _unsupported("underlying stream is not seekable");
2343         goto fail;
2344     }
2345 
2346     if (whence == 1) {
2347         /* seek relative to current position */
2348         cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2349         if (cmp < 0)
2350             goto fail;
2351 
2352         if (cmp == 0) {
2353             _unsupported("can't do nonzero cur-relative seeks");
2354             goto fail;
2355         }
2356 
2357         /* Seeking to the current position should attempt to
2358          * sync the underlying buffer with the current position.
2359          */
2360         Py_DECREF(cookieObj);
2361         cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
2362         if (cookieObj == NULL)
2363             goto fail;
2364     }
2365     else if (whence == 2) {
2366         /* seek relative to end of file */
2367         cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2368         if (cmp < 0)
2369             goto fail;
2370 
2371         if (cmp == 0) {
2372             _unsupported("can't do nonzero end-relative seeks");
2373             goto fail;
2374         }
2375 
2376         res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2377         if (res == NULL)
2378             goto fail;
2379         Py_DECREF(res);
2380 
2381         textiowrapper_set_decoded_chars(self, NULL);
2382         Py_CLEAR(self->snapshot);
2383         if (self->decoder) {
2384             res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
2385             if (res == NULL)
2386                 goto fail;
2387             Py_DECREF(res);
2388         }
2389 
2390         res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2391         Py_CLEAR(cookieObj);
2392         if (res == NULL)
2393             goto fail;
2394         if (self->encoder) {
2395             /* If seek() == 0, we are at the start of stream, otherwise not */
2396             cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
2397             if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2398                 Py_DECREF(res);
2399                 goto fail;
2400             }
2401         }
2402         return res;
2403     }
2404     else if (whence != 0) {
2405         PyErr_Format(PyExc_ValueError,
2406                      "invalid whence (%d, should be 0, 1 or 2)", whence);
2407         goto fail;
2408     }
2409 
2410     cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
2411     if (cmp < 0)
2412         goto fail;
2413 
2414     if (cmp == 1) {
2415         PyErr_Format(PyExc_ValueError,
2416                      "negative seek position %R", cookieObj);
2417         goto fail;
2418     }
2419 
2420     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2421     if (res == NULL)
2422         goto fail;
2423     Py_DECREF(res);
2424 
2425     /* The strategy of seek() is to go back to the safe start point
2426      * and replay the effect of read(chars_to_skip) from there.
2427      */
2428     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2429         goto fail;
2430 
2431     /* Seek back to the safe start point. */
2432     posobj = PyLong_FromOff_t(cookie.start_pos);
2433     if (posobj == NULL)
2434         goto fail;
2435     res = PyObject_CallMethodObjArgs(self->buffer,
2436                                      _PyIO_str_seek, posobj, NULL);
2437     Py_DECREF(posobj);
2438     if (res == NULL)
2439         goto fail;
2440     Py_DECREF(res);
2441 
2442     textiowrapper_set_decoded_chars(self, NULL);
2443     Py_CLEAR(self->snapshot);
2444 
2445     /* Restore the decoder to its state from the safe start point. */
2446     if (self->decoder) {
2447         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2448             goto fail;
2449     }
2450 
2451     if (cookie.chars_to_skip) {
2452         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2453         PyObject *input_chunk = _PyObject_CallMethodId(
2454             self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2455         PyObject *decoded;
2456 
2457         if (input_chunk == NULL)
2458             goto fail;
2459 
2460         if (!PyBytes_Check(input_chunk)) {
2461             PyErr_Format(PyExc_TypeError,
2462                          "underlying read() should have returned a bytes "
2463                          "object, not '%.200s'",
2464                          Py_TYPE(input_chunk)->tp_name);
2465             Py_DECREF(input_chunk);
2466             goto fail;
2467         }
2468 
2469         snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2470         if (snapshot == NULL) {
2471             goto fail;
2472         }
2473         Py_XSETREF(self->snapshot, snapshot);
2474 
2475         decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2476             "Oi", input_chunk, (int)cookie.need_eof);
2477 
2478         if (check_decoded(decoded) < 0)
2479             goto fail;
2480 
2481         textiowrapper_set_decoded_chars(self, decoded);
2482 
2483         /* Skip chars_to_skip of the decoded characters. */
2484         if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2485             PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2486             goto fail;
2487         }
2488         self->decoded_chars_used = cookie.chars_to_skip;
2489     }
2490     else {
2491         snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2492         if (snapshot == NULL)
2493             goto fail;
2494         Py_XSETREF(self->snapshot, snapshot);
2495     }
2496 
2497     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2498     if (self->encoder) {
2499         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2500             goto fail;
2501     }
2502     return cookieObj;
2503   fail:
2504     Py_XDECREF(cookieObj);
2505     return NULL;
2506 
2507 }
2508 
2509 /*[clinic input]
2510 _io.TextIOWrapper.tell
2511 [clinic start generated code]*/
2512 
2513 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2514 _io_TextIOWrapper_tell_impl(textio *self)
2515 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2516 {
2517     PyObject *res;
2518     PyObject *posobj = NULL;
2519     cookie_type cookie = {0,0,0,0,0};
2520     PyObject *next_input;
2521     Py_ssize_t chars_to_skip, chars_decoded;
2522     Py_ssize_t skip_bytes, skip_back;
2523     PyObject *saved_state = NULL;
2524     char *input, *input_end;
2525     Py_ssize_t dec_buffer_len;
2526     int dec_flags;
2527 
2528     CHECK_ATTACHED(self);
2529     CHECK_CLOSED(self);
2530 
2531     if (!self->seekable) {
2532         _unsupported("underlying stream is not seekable");
2533         goto fail;
2534     }
2535     if (!self->telling) {
2536         PyErr_SetString(PyExc_OSError,
2537                         "telling position disabled by next() call");
2538         goto fail;
2539     }
2540 
2541     if (_textiowrapper_writeflush(self) < 0)
2542         return NULL;
2543     res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2544     if (res == NULL)
2545         goto fail;
2546     Py_DECREF(res);
2547 
2548     posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
2549     if (posobj == NULL)
2550         goto fail;
2551 
2552     if (self->decoder == NULL || self->snapshot == NULL) {
2553         assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2554         return posobj;
2555     }
2556 
2557 #if defined(HAVE_LARGEFILE_SUPPORT)
2558     cookie.start_pos = PyLong_AsLongLong(posobj);
2559 #else
2560     cookie.start_pos = PyLong_AsLong(posobj);
2561 #endif
2562     Py_DECREF(posobj);
2563     if (PyErr_Occurred())
2564         goto fail;
2565 
2566     /* Skip backward to the snapshot point (see _read_chunk). */
2567     assert(PyTuple_Check(self->snapshot));
2568     if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2569         goto fail;
2570 
2571     assert (PyBytes_Check(next_input));
2572 
2573     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2574 
2575     /* How many decoded characters have been used up since the snapshot? */
2576     if (self->decoded_chars_used == 0)  {
2577         /* We haven't moved from the snapshot point. */
2578         return textiowrapper_build_cookie(&cookie);
2579     }
2580 
2581     chars_to_skip = self->decoded_chars_used;
2582 
2583     /* Decoder state will be restored at the end */
2584     saved_state = PyObject_CallMethodObjArgs(self->decoder,
2585                                              _PyIO_str_getstate, NULL);
2586     if (saved_state == NULL)
2587         goto fail;
2588 
2589 #define DECODER_GETSTATE() do { \
2590         PyObject *dec_buffer; \
2591         PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2592             _PyIO_str_getstate, NULL); \
2593         if (_state == NULL) \
2594             goto fail; \
2595         if (!PyTuple_Check(_state)) { \
2596             PyErr_SetString(PyExc_TypeError, \
2597                             "illegal decoder state"); \
2598             Py_DECREF(_state); \
2599             goto fail; \
2600         } \
2601         if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2602                               &dec_buffer, &dec_flags)) \
2603         { \
2604             Py_DECREF(_state); \
2605             goto fail; \
2606         } \
2607         if (!PyBytes_Check(dec_buffer)) { \
2608             PyErr_Format(PyExc_TypeError, \
2609                          "illegal decoder state: the first item should be a " \
2610                          "bytes object, not '%.200s'", \
2611                          Py_TYPE(dec_buffer)->tp_name); \
2612             Py_DECREF(_state); \
2613             goto fail; \
2614         } \
2615         dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2616         Py_DECREF(_state); \
2617     } while (0)
2618 
2619 #define DECODER_DECODE(start, len, res) do { \
2620         PyObject *_decoded = _PyObject_CallMethodId( \
2621             self->decoder, &PyId_decode, "y#", start, len); \
2622         if (check_decoded(_decoded) < 0) \
2623             goto fail; \
2624         res = PyUnicode_GET_LENGTH(_decoded); \
2625         Py_DECREF(_decoded); \
2626     } while (0)
2627 
2628     /* Fast search for an acceptable start point, close to our
2629        current pos */
2630     skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2631     skip_back = 1;
2632     assert(skip_back <= PyBytes_GET_SIZE(next_input));
2633     input = PyBytes_AS_STRING(next_input);
2634     while (skip_bytes > 0) {
2635         /* Decode up to temptative start point */
2636         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2637             goto fail;
2638         DECODER_DECODE(input, skip_bytes, chars_decoded);
2639         if (chars_decoded <= chars_to_skip) {
2640             DECODER_GETSTATE();
2641             if (dec_buffer_len == 0) {
2642                 /* Before pos and no bytes buffered in decoder => OK */
2643                 cookie.dec_flags = dec_flags;
2644                 chars_to_skip -= chars_decoded;
2645                 break;
2646             }
2647             /* Skip back by buffered amount and reset heuristic */
2648             skip_bytes -= dec_buffer_len;
2649             skip_back = 1;
2650         }
2651         else {
2652             /* We're too far ahead, skip back a bit */
2653             skip_bytes -= skip_back;
2654             skip_back *= 2;
2655         }
2656     }
2657     if (skip_bytes <= 0) {
2658         skip_bytes = 0;
2659         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2660             goto fail;
2661     }
2662 
2663     /* Note our initial start point. */
2664     cookie.start_pos += skip_bytes;
2665     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2666     if (chars_to_skip == 0)
2667         goto finally;
2668 
2669     /* We should be close to the desired position.  Now feed the decoder one
2670      * byte at a time until we reach the `chars_to_skip` target.
2671      * As we go, note the nearest "safe start point" before the current
2672      * location (a point where the decoder has nothing buffered, so seek()
2673      * can safely start from there and advance to this location).
2674      */
2675     chars_decoded = 0;
2676     input = PyBytes_AS_STRING(next_input);
2677     input_end = input + PyBytes_GET_SIZE(next_input);
2678     input += skip_bytes;
2679     while (input < input_end) {
2680         Py_ssize_t n;
2681 
2682         DECODER_DECODE(input, (Py_ssize_t)1, n);
2683         /* We got n chars for 1 byte */
2684         chars_decoded += n;
2685         cookie.bytes_to_feed += 1;
2686         DECODER_GETSTATE();
2687 
2688         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2689             /* Decoder buffer is empty, so this is a safe start point. */
2690             cookie.start_pos += cookie.bytes_to_feed;
2691             chars_to_skip -= chars_decoded;
2692             cookie.dec_flags = dec_flags;
2693             cookie.bytes_to_feed = 0;
2694             chars_decoded = 0;
2695         }
2696         if (chars_decoded >= chars_to_skip)
2697             break;
2698         input++;
2699     }
2700     if (input == input_end) {
2701         /* We didn't get enough decoded data; signal EOF to get more. */
2702         PyObject *decoded = _PyObject_CallMethodId(
2703             self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
2704         if (check_decoded(decoded) < 0)
2705             goto fail;
2706         chars_decoded += PyUnicode_GET_LENGTH(decoded);
2707         Py_DECREF(decoded);
2708         cookie.need_eof = 1;
2709 
2710         if (chars_decoded < chars_to_skip) {
2711             PyErr_SetString(PyExc_OSError,
2712                             "can't reconstruct logical file position");
2713             goto fail;
2714         }
2715     }
2716 
2717 finally:
2718     res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2719     Py_DECREF(saved_state);
2720     if (res == NULL)
2721         return NULL;
2722     Py_DECREF(res);
2723 
2724     /* The returned cookie corresponds to the last safe start point. */
2725     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2726     return textiowrapper_build_cookie(&cookie);
2727 
2728 fail:
2729     if (saved_state) {
2730         PyObject *type, *value, *traceback;
2731         PyErr_Fetch(&type, &value, &traceback);
2732         res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2733         _PyErr_ChainExceptions(type, value, traceback);
2734         Py_DECREF(saved_state);
2735         Py_XDECREF(res);
2736     }
2737     return NULL;
2738 }
2739 
2740 /*[clinic input]
2741 _io.TextIOWrapper.truncate
2742     pos: object = None
2743     /
2744 [clinic start generated code]*/
2745 
2746 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2747 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2748 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2749 {
2750     PyObject *res;
2751 
2752     CHECK_ATTACHED(self)
2753 
2754     res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2755     if (res == NULL)
2756         return NULL;
2757     Py_DECREF(res);
2758 
2759     return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2760 }
2761 
2762 static PyObject *
textiowrapper_repr(textio * self)2763 textiowrapper_repr(textio *self)
2764 {
2765     PyObject *nameobj, *modeobj, *res, *s;
2766     int status;
2767 
2768     CHECK_INITIALIZED(self);
2769 
2770     res = PyUnicode_FromString("<_io.TextIOWrapper");
2771     if (res == NULL)
2772         return NULL;
2773 
2774     status = Py_ReprEnter((PyObject *)self);
2775     if (status != 0) {
2776         if (status > 0) {
2777             PyErr_Format(PyExc_RuntimeError,
2778                          "reentrant call inside %s.__repr__",
2779                          Py_TYPE(self)->tp_name);
2780         }
2781         goto error;
2782     }
2783     nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
2784     if (nameobj == NULL) {
2785         if (PyErr_ExceptionMatches(PyExc_Exception))
2786             PyErr_Clear();
2787         else
2788             goto error;
2789     }
2790     else {
2791         s = PyUnicode_FromFormat(" name=%R", nameobj);
2792         Py_DECREF(nameobj);
2793         if (s == NULL)
2794             goto error;
2795         PyUnicode_AppendAndDel(&res, s);
2796         if (res == NULL)
2797             goto error;
2798     }
2799     modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
2800     if (modeobj == NULL) {
2801         if (PyErr_ExceptionMatches(PyExc_Exception))
2802             PyErr_Clear();
2803         else
2804             goto error;
2805     }
2806     else {
2807         s = PyUnicode_FromFormat(" mode=%R", modeobj);
2808         Py_DECREF(modeobj);
2809         if (s == NULL)
2810             goto error;
2811         PyUnicode_AppendAndDel(&res, s);
2812         if (res == NULL)
2813             goto error;
2814     }
2815     s = PyUnicode_FromFormat("%U encoding=%R>",
2816                              res, self->encoding);
2817     Py_DECREF(res);
2818     if (status == 0) {
2819         Py_ReprLeave((PyObject *)self);
2820     }
2821     return s;
2822 
2823   error:
2824     Py_XDECREF(res);
2825     if (status == 0) {
2826         Py_ReprLeave((PyObject *)self);
2827     }
2828     return NULL;
2829 }
2830 
2831 
2832 /* Inquiries */
2833 
2834 /*[clinic input]
2835 _io.TextIOWrapper.fileno
2836 [clinic start generated code]*/
2837 
2838 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2839 _io_TextIOWrapper_fileno_impl(textio *self)
2840 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2841 {
2842     CHECK_ATTACHED(self);
2843     return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
2844 }
2845 
2846 /*[clinic input]
2847 _io.TextIOWrapper.seekable
2848 [clinic start generated code]*/
2849 
2850 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2851 _io_TextIOWrapper_seekable_impl(textio *self)
2852 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2853 {
2854     CHECK_ATTACHED(self);
2855     return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
2856 }
2857 
2858 /*[clinic input]
2859 _io.TextIOWrapper.readable
2860 [clinic start generated code]*/
2861 
2862 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2863 _io_TextIOWrapper_readable_impl(textio *self)
2864 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2865 {
2866     CHECK_ATTACHED(self);
2867     return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
2868 }
2869 
2870 /*[clinic input]
2871 _io.TextIOWrapper.writable
2872 [clinic start generated code]*/
2873 
2874 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)2875 _io_TextIOWrapper_writable_impl(textio *self)
2876 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2877 {
2878     CHECK_ATTACHED(self);
2879     return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
2880 }
2881 
2882 /*[clinic input]
2883 _io.TextIOWrapper.isatty
2884 [clinic start generated code]*/
2885 
2886 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)2887 _io_TextIOWrapper_isatty_impl(textio *self)
2888 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
2889 {
2890     CHECK_ATTACHED(self);
2891     return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
2892 }
2893 
2894 static PyObject *
textiowrapper_getstate(textio * self,PyObject * args)2895 textiowrapper_getstate(textio *self, PyObject *args)
2896 {
2897     PyErr_Format(PyExc_TypeError,
2898                  "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2899     return NULL;
2900 }
2901 
2902 /*[clinic input]
2903 _io.TextIOWrapper.flush
2904 [clinic start generated code]*/
2905 
2906 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)2907 _io_TextIOWrapper_flush_impl(textio *self)
2908 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
2909 {
2910     CHECK_ATTACHED(self);
2911     CHECK_CLOSED(self);
2912     self->telling = self->seekable;
2913     if (_textiowrapper_writeflush(self) < 0)
2914         return NULL;
2915     return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
2916 }
2917 
2918 /*[clinic input]
2919 _io.TextIOWrapper.close
2920 [clinic start generated code]*/
2921 
2922 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)2923 _io_TextIOWrapper_close_impl(textio *self)
2924 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
2925 {
2926     PyObject *res;
2927     int r;
2928     CHECK_ATTACHED(self);
2929 
2930     res = textiowrapper_closed_get(self, NULL);
2931     if (res == NULL)
2932         return NULL;
2933     r = PyObject_IsTrue(res);
2934     Py_DECREF(res);
2935     if (r < 0)
2936         return NULL;
2937 
2938     if (r > 0) {
2939         Py_RETURN_NONE; /* stream already closed */
2940     }
2941     else {
2942         PyObject *exc = NULL, *val, *tb;
2943         if (self->finalizing) {
2944             res = _PyObject_CallMethodIdObjArgs(self->buffer,
2945                                                 &PyId__dealloc_warn,
2946                                                 self, NULL);
2947             if (res)
2948                 Py_DECREF(res);
2949             else
2950                 PyErr_Clear();
2951         }
2952         res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2953         if (res == NULL)
2954             PyErr_Fetch(&exc, &val, &tb);
2955         else
2956             Py_DECREF(res);
2957 
2958         res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2959         if (exc != NULL) {
2960             _PyErr_ChainExceptions(exc, val, tb);
2961             Py_CLEAR(res);
2962         }
2963         return res;
2964     }
2965 }
2966 
2967 static PyObject *
textiowrapper_iternext(textio * self)2968 textiowrapper_iternext(textio *self)
2969 {
2970     PyObject *line;
2971 
2972     CHECK_ATTACHED(self);
2973 
2974     self->telling = 0;
2975     if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2976         /* Skip method call overhead for speed */
2977         line = _textiowrapper_readline(self, -1);
2978     }
2979     else {
2980         line = PyObject_CallMethodObjArgs((PyObject *)self,
2981                                            _PyIO_str_readline, NULL);
2982         if (line && !PyUnicode_Check(line)) {
2983             PyErr_Format(PyExc_OSError,
2984                          "readline() should have returned a str object, "
2985                          "not '%.200s'", Py_TYPE(line)->tp_name);
2986             Py_DECREF(line);
2987             return NULL;
2988         }
2989     }
2990 
2991     if (line == NULL || PyUnicode_READY(line) == -1)
2992         return NULL;
2993 
2994     if (PyUnicode_GET_LENGTH(line) == 0) {
2995         /* Reached EOF or would have blocked */
2996         Py_DECREF(line);
2997         Py_CLEAR(self->snapshot);
2998         self->telling = self->seekable;
2999         return NULL;
3000     }
3001 
3002     return line;
3003 }
3004 
3005 static PyObject *
textiowrapper_name_get(textio * self,void * context)3006 textiowrapper_name_get(textio *self, void *context)
3007 {
3008     CHECK_ATTACHED(self);
3009     return _PyObject_GetAttrId(self->buffer, &PyId_name);
3010 }
3011 
3012 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3013 textiowrapper_closed_get(textio *self, void *context)
3014 {
3015     CHECK_ATTACHED(self);
3016     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3017 }
3018 
3019 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3020 textiowrapper_newlines_get(textio *self, void *context)
3021 {
3022     PyObject *res;
3023     CHECK_ATTACHED(self);
3024     if (self->decoder == NULL ||
3025         _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3026     {
3027         Py_RETURN_NONE;
3028     }
3029     return res;
3030 }
3031 
3032 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3033 textiowrapper_errors_get(textio *self, void *context)
3034 {
3035     CHECK_INITIALIZED(self);
3036     Py_INCREF(self->errors);
3037     return self->errors;
3038 }
3039 
3040 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3041 textiowrapper_chunk_size_get(textio *self, void *context)
3042 {
3043     CHECK_ATTACHED(self);
3044     return PyLong_FromSsize_t(self->chunk_size);
3045 }
3046 
3047 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3048 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3049 {
3050     Py_ssize_t n;
3051     CHECK_ATTACHED_INT(self);
3052     if (arg == NULL) {
3053         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3054         return -1;
3055     }
3056     n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3057     if (n == -1 && PyErr_Occurred())
3058         return -1;
3059     if (n <= 0) {
3060         PyErr_SetString(PyExc_ValueError,
3061                         "a strictly positive integer is required");
3062         return -1;
3063     }
3064     self->chunk_size = n;
3065     return 0;
3066 }
3067 
3068 #include "clinic/textio.c.h"
3069 
3070 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3071     _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3072     _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3073     _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3074     _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3075     {NULL}
3076 };
3077 
3078 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3079     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3080     {NULL}
3081 };
3082 
3083 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3084     PyVarObject_HEAD_INIT(NULL, 0)
3085     "_io.IncrementalNewlineDecoder", /*tp_name*/
3086     sizeof(nldecoder_object), /*tp_basicsize*/
3087     0,                          /*tp_itemsize*/
3088     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3089     0,                          /*tp_print*/
3090     0,                          /*tp_getattr*/
3091     0,                          /*tp_setattr*/
3092     0,                          /*tp_compare */
3093     0,                          /*tp_repr*/
3094     0,                          /*tp_as_number*/
3095     0,                          /*tp_as_sequence*/
3096     0,                          /*tp_as_mapping*/
3097     0,                          /*tp_hash */
3098     0,                          /*tp_call*/
3099     0,                          /*tp_str*/
3100     0,                          /*tp_getattro*/
3101     0,                          /*tp_setattro*/
3102     0,                          /*tp_as_buffer*/
3103     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
3104     _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3105     0,                          /* tp_traverse */
3106     0,                          /* tp_clear */
3107     0,                          /* tp_richcompare */
3108     0,                          /*tp_weaklistoffset*/
3109     0,                          /* tp_iter */
3110     0,                          /* tp_iternext */
3111     incrementalnewlinedecoder_methods, /* tp_methods */
3112     0,                          /* tp_members */
3113     incrementalnewlinedecoder_getset, /* tp_getset */
3114     0,                          /* tp_base */
3115     0,                          /* tp_dict */
3116     0,                          /* tp_descr_get */
3117     0,                          /* tp_descr_set */
3118     0,                          /* tp_dictoffset */
3119     _io_IncrementalNewlineDecoder___init__, /* tp_init */
3120     0,                          /* tp_alloc */
3121     PyType_GenericNew,          /* tp_new */
3122 };
3123 
3124 
3125 static PyMethodDef textiowrapper_methods[] = {
3126     _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3127     _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3128     _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3129     _IO_TEXTIOWRAPPER_READ_METHODDEF
3130     _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3131     _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3132     _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3133 
3134     _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3135     _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3136     _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3137     _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3138     _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3139     {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
3140 
3141     _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3142     _IO_TEXTIOWRAPPER_TELL_METHODDEF
3143     _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3144     {NULL, NULL}
3145 };
3146 
3147 static PyMemberDef textiowrapper_members[] = {
3148     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3149     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3150     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3151     {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3152     {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3153     {NULL}
3154 };
3155 
3156 static PyGetSetDef textiowrapper_getset[] = {
3157     {"name", (getter)textiowrapper_name_get, NULL, NULL},
3158     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3159 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3160 */
3161     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3162     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3163     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3164                     (setter)textiowrapper_chunk_size_set, NULL},
3165     {NULL}
3166 };
3167 
3168 PyTypeObject PyTextIOWrapper_Type = {
3169     PyVarObject_HEAD_INIT(NULL, 0)
3170     "_io.TextIOWrapper",        /*tp_name*/
3171     sizeof(textio), /*tp_basicsize*/
3172     0,                          /*tp_itemsize*/
3173     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3174     0,                          /*tp_print*/
3175     0,                          /*tp_getattr*/
3176     0,                          /*tps_etattr*/
3177     0,                          /*tp_compare */
3178     (reprfunc)textiowrapper_repr,/*tp_repr*/
3179     0,                          /*tp_as_number*/
3180     0,                          /*tp_as_sequence*/
3181     0,                          /*tp_as_mapping*/
3182     0,                          /*tp_hash */
3183     0,                          /*tp_call*/
3184     0,                          /*tp_str*/
3185     0,                          /*tp_getattro*/
3186     0,                          /*tp_setattro*/
3187     0,                          /*tp_as_buffer*/
3188     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3189         | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
3190     _io_TextIOWrapper___init____doc__, /* tp_doc */
3191     (traverseproc)textiowrapper_traverse, /* tp_traverse */
3192     (inquiry)textiowrapper_clear, /* tp_clear */
3193     0,                          /* tp_richcompare */
3194     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3195     0,                          /* tp_iter */
3196     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3197     textiowrapper_methods,      /* tp_methods */
3198     textiowrapper_members,      /* tp_members */
3199     textiowrapper_getset,       /* tp_getset */
3200     0,                          /* tp_base */
3201     0,                          /* tp_dict */
3202     0,                          /* tp_descr_get */
3203     0,                          /* tp_descr_set */
3204     offsetof(textio, dict), /*tp_dictoffset*/
3205     _io_TextIOWrapper___init__, /* tp_init */
3206     0,                          /* tp_alloc */
3207     PyType_GenericNew,          /* tp_new */
3208     0,                          /* tp_free */
3209     0,                          /* tp_is_gc */
3210     0,                          /* tp_bases */
3211     0,                          /* tp_mro */
3212     0,                          /* tp_cache */
3213     0,                          /* tp_subclasses */
3214     0,                          /* tp_weaklist */
3215     0,                          /* tp_del */
3216     0,                          /* tp_version_tag */
3217     0,                          /* tp_finalize */
3218 };
3219