1 /*
2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 
4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 
6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8 
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "pycore_interp.h"        // PyInterpreterState.fs_codec
12 #include "pycore_object.h"
13 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
14 #include "structmember.h"         // PyMemberDef
15 #include "_iomodule.h"
16 
17 /*[clinic input]
18 module _io
19 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
20 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
21 [clinic start generated code]*/
22 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
23 
24 _Py_IDENTIFIER(close);
25 _Py_IDENTIFIER(_dealloc_warn);
26 _Py_IDENTIFIER(decode);
27 _Py_IDENTIFIER(fileno);
28 _Py_IDENTIFIER(flush);
29 _Py_IDENTIFIER(getpreferredencoding);
30 _Py_IDENTIFIER(isatty);
31 _Py_IDENTIFIER(mode);
32 _Py_IDENTIFIER(name);
33 _Py_IDENTIFIER(raw);
34 _Py_IDENTIFIER(read);
35 _Py_IDENTIFIER(readable);
36 _Py_IDENTIFIER(replace);
37 _Py_IDENTIFIER(reset);
38 _Py_IDENTIFIER(seek);
39 _Py_IDENTIFIER(seekable);
40 _Py_IDENTIFIER(setstate);
41 _Py_IDENTIFIER(strict);
42 _Py_IDENTIFIER(tell);
43 _Py_IDENTIFIER(writable);
44 
45 /* TextIOBase */
46 
47 PyDoc_STRVAR(textiobase_doc,
48     "Base class for text I/O.\n"
49     "\n"
50     "This class provides a character and line based interface to stream\n"
51     "I/O. There is no readinto method because Python's character strings\n"
52     "are immutable. There is no public constructor.\n"
53     );
54 
55 static PyObject *
_unsupported(const char * message)56 _unsupported(const char *message)
57 {
58     _PyIO_State *state = IO_STATE();
59     if (state != NULL)
60         PyErr_SetString(state->unsupported_operation, message);
61     return NULL;
62 }
63 
64 PyDoc_STRVAR(textiobase_detach_doc,
65     "Separate the underlying buffer from the TextIOBase and return it.\n"
66     "\n"
67     "After the underlying buffer has been detached, the TextIO is in an\n"
68     "unusable state.\n"
69     );
70 
71 static PyObject *
textiobase_detach(PyObject * self,PyObject * Py_UNUSED (ignored))72 textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
73 {
74     return _unsupported("detach");
75 }
76 
77 PyDoc_STRVAR(textiobase_read_doc,
78     "Read at most n characters from stream.\n"
79     "\n"
80     "Read from underlying buffer until we have n characters or we hit EOF.\n"
81     "If n is negative or omitted, read until EOF.\n"
82     );
83 
84 static PyObject *
textiobase_read(PyObject * self,PyObject * args)85 textiobase_read(PyObject *self, PyObject *args)
86 {
87     return _unsupported("read");
88 }
89 
90 PyDoc_STRVAR(textiobase_readline_doc,
91     "Read until newline or EOF.\n"
92     "\n"
93     "Returns an empty string if EOF is hit immediately.\n"
94     );
95 
96 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)97 textiobase_readline(PyObject *self, PyObject *args)
98 {
99     return _unsupported("readline");
100 }
101 
102 PyDoc_STRVAR(textiobase_write_doc,
103     "Write string to stream.\n"
104     "Returns the number of characters written (which is always equal to\n"
105     "the length of the string).\n"
106     );
107 
108 static PyObject *
textiobase_write(PyObject * self,PyObject * args)109 textiobase_write(PyObject *self, PyObject *args)
110 {
111     return _unsupported("write");
112 }
113 
114 PyDoc_STRVAR(textiobase_encoding_doc,
115     "Encoding of the text stream.\n"
116     "\n"
117     "Subclasses should override.\n"
118     );
119 
120 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)121 textiobase_encoding_get(PyObject *self, void *context)
122 {
123     Py_RETURN_NONE;
124 }
125 
126 PyDoc_STRVAR(textiobase_newlines_doc,
127     "Line endings translated so far.\n"
128     "\n"
129     "Only line endings translated during reading are considered.\n"
130     "\n"
131     "Subclasses should override.\n"
132     );
133 
134 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)135 textiobase_newlines_get(PyObject *self, void *context)
136 {
137     Py_RETURN_NONE;
138 }
139 
140 PyDoc_STRVAR(textiobase_errors_doc,
141     "The error setting of the decoder or encoder.\n"
142     "\n"
143     "Subclasses should override.\n"
144     );
145 
146 static PyObject *
textiobase_errors_get(PyObject * self,void * context)147 textiobase_errors_get(PyObject *self, void *context)
148 {
149     Py_RETURN_NONE;
150 }
151 
152 
153 static PyMethodDef textiobase_methods[] = {
154     {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
155     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
156     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
157     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
158     {NULL, NULL}
159 };
160 
161 static PyGetSetDef textiobase_getset[] = {
162     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
163     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
164     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
165     {NULL}
166 };
167 
168 PyTypeObject PyTextIOBase_Type = {
169     PyVarObject_HEAD_INIT(NULL, 0)
170     "_io._TextIOBase",          /*tp_name*/
171     0,                          /*tp_basicsize*/
172     0,                          /*tp_itemsize*/
173     0,                          /*tp_dealloc*/
174     0,                          /*tp_vectorcall_offset*/
175     0,                          /*tp_getattr*/
176     0,                          /*tp_setattr*/
177     0,                          /*tp_as_async*/
178     0,                          /*tp_repr*/
179     0,                          /*tp_as_number*/
180     0,                          /*tp_as_sequence*/
181     0,                          /*tp_as_mapping*/
182     0,                          /*tp_hash */
183     0,                          /*tp_call*/
184     0,                          /*tp_str*/
185     0,                          /*tp_getattro*/
186     0,                          /*tp_setattro*/
187     0,                          /*tp_as_buffer*/
188     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
189     textiobase_doc,             /* tp_doc */
190     0,                          /* tp_traverse */
191     0,                          /* tp_clear */
192     0,                          /* tp_richcompare */
193     0,                          /* tp_weaklistoffset */
194     0,                          /* tp_iter */
195     0,                          /* tp_iternext */
196     textiobase_methods,         /* tp_methods */
197     0,                          /* tp_members */
198     textiobase_getset,          /* tp_getset */
199     &PyIOBase_Type,             /* tp_base */
200     0,                          /* tp_dict */
201     0,                          /* tp_descr_get */
202     0,                          /* tp_descr_set */
203     0,                          /* tp_dictoffset */
204     0,                          /* tp_init */
205     0,                          /* tp_alloc */
206     0,                          /* tp_new */
207     0,                          /* tp_free */
208     0,                          /* tp_is_gc */
209     0,                          /* tp_bases */
210     0,                          /* tp_mro */
211     0,                          /* tp_cache */
212     0,                          /* tp_subclasses */
213     0,                          /* tp_weaklist */
214     0,                          /* tp_del */
215     0,                          /* tp_version_tag */
216     0,                          /* tp_finalize */
217 };
218 
219 
220 /* IncrementalNewlineDecoder */
221 
222 typedef struct {
223     PyObject_HEAD
224     PyObject *decoder;
225     PyObject *errors;
226     unsigned int pendingcr: 1;
227     unsigned int translate: 1;
228     unsigned int seennl: 3;
229 } nldecoder_object;
230 
231 /*[clinic input]
232 _io.IncrementalNewlineDecoder.__init__
233     decoder: object
234     translate: int
235     errors: object(c_default="NULL") = "strict"
236 
237 Codec used when reading a file in universal newlines mode.
238 
239 It wraps another incremental decoder, translating \r\n and \r into \n.
240 It also records the types of newlines encountered.  When used with
241 translate=False, it ensures that the newline sequence is returned in
242 one piece. When used with decoder=None, it expects unicode strings as
243 decode input and translates newlines without first invoking an external
244 decoder.
245 [clinic start generated code]*/
246 
247 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)248 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
249                                             PyObject *decoder, int translate,
250                                             PyObject *errors)
251 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
252 {
253     self->decoder = decoder;
254     Py_INCREF(decoder);
255 
256     if (errors == NULL) {
257         self->errors = _PyUnicode_FromId(&PyId_strict);
258         if (self->errors == NULL)
259             return -1;
260     }
261     else {
262         self->errors = errors;
263     }
264     Py_INCREF(self->errors);
265 
266     self->translate = translate ? 1 : 0;
267     self->seennl = 0;
268     self->pendingcr = 0;
269 
270     return 0;
271 }
272 
273 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)274 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
275 {
276     Py_CLEAR(self->decoder);
277     Py_CLEAR(self->errors);
278     Py_TYPE(self)->tp_free((PyObject *)self);
279 }
280 
281 static int
check_decoded(PyObject * decoded)282 check_decoded(PyObject *decoded)
283 {
284     if (decoded == NULL)
285         return -1;
286     if (!PyUnicode_Check(decoded)) {
287         PyErr_Format(PyExc_TypeError,
288                      "decoder should return a string result, not '%.200s'",
289                      Py_TYPE(decoded)->tp_name);
290         Py_DECREF(decoded);
291         return -1;
292     }
293     if (PyUnicode_READY(decoded) < 0) {
294         Py_DECREF(decoded);
295         return -1;
296     }
297     return 0;
298 }
299 
300 #define SEEN_CR   1
301 #define SEEN_LF   2
302 #define SEEN_CRLF 4
303 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
304 
305 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)306 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
307                                     PyObject *input, int final)
308 {
309     PyObject *output;
310     Py_ssize_t output_len;
311     nldecoder_object *self = (nldecoder_object *) myself;
312 
313     if (self->decoder == NULL) {
314         PyErr_SetString(PyExc_ValueError,
315                         "IncrementalNewlineDecoder.__init__ not called");
316         return NULL;
317     }
318 
319     /* decode input (with the eventual \r from a previous pass) */
320     if (self->decoder != Py_None) {
321         output = PyObject_CallMethodObjArgs(self->decoder,
322             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
323     }
324     else {
325         output = input;
326         Py_INCREF(output);
327     }
328 
329     if (check_decoded(output) < 0)
330         return NULL;
331 
332     output_len = PyUnicode_GET_LENGTH(output);
333     if (self->pendingcr && (final || output_len > 0)) {
334         /* Prefix output with CR */
335         int kind;
336         PyObject *modified;
337         char *out;
338 
339         modified = PyUnicode_New(output_len + 1,
340                                  PyUnicode_MAX_CHAR_VALUE(output));
341         if (modified == NULL)
342             goto error;
343         kind = PyUnicode_KIND(modified);
344         out = PyUnicode_DATA(modified);
345         PyUnicode_WRITE(kind, out, 0, '\r');
346         memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
347         Py_DECREF(output);
348         output = modified; /* output remains ready */
349         self->pendingcr = 0;
350         output_len++;
351     }
352 
353     /* retain last \r even when not translating data:
354      * then readline() is sure to get \r\n in one pass
355      */
356     if (!final) {
357         if (output_len > 0
358             && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
359         {
360             PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
361             if (modified == NULL)
362                 goto error;
363             Py_DECREF(output);
364             output = modified;
365             self->pendingcr = 1;
366         }
367     }
368 
369     /* Record which newlines are read and do newline translation if desired,
370        all in one pass. */
371     {
372         const void *in_str;
373         Py_ssize_t len;
374         int seennl = self->seennl;
375         int only_lf = 0;
376         int kind;
377 
378         in_str = PyUnicode_DATA(output);
379         len = PyUnicode_GET_LENGTH(output);
380         kind = PyUnicode_KIND(output);
381 
382         if (len == 0)
383             return output;
384 
385         /* If, up to now, newlines are consistently \n, do a quick check
386            for the \r *byte* with the libc's optimized memchr.
387            */
388         if (seennl == SEEN_LF || seennl == 0) {
389             only_lf = (memchr(in_str, '\r', kind * len) == NULL);
390         }
391 
392         if (only_lf) {
393             /* If not already seen, quick scan for a possible "\n" character.
394                (there's nothing else to be done, even when in translation mode)
395             */
396             if (seennl == 0 &&
397                 memchr(in_str, '\n', kind * len) != NULL) {
398                 if (kind == PyUnicode_1BYTE_KIND)
399                     seennl |= SEEN_LF;
400                 else {
401                     Py_ssize_t i = 0;
402                     for (;;) {
403                         Py_UCS4 c;
404                         /* Fast loop for non-control characters */
405                         while (PyUnicode_READ(kind, in_str, i) > '\n')
406                             i++;
407                         c = PyUnicode_READ(kind, in_str, i++);
408                         if (c == '\n') {
409                             seennl |= SEEN_LF;
410                             break;
411                         }
412                         if (i >= len)
413                             break;
414                     }
415                 }
416             }
417             /* Finished: we have scanned for newlines, and none of them
418                need translating */
419         }
420         else if (!self->translate) {
421             Py_ssize_t i = 0;
422             /* We have already seen all newline types, no need to scan again */
423             if (seennl == SEEN_ALL)
424                 goto endscan;
425             for (;;) {
426                 Py_UCS4 c;
427                 /* Fast loop for non-control characters */
428                 while (PyUnicode_READ(kind, in_str, i) > '\r')
429                     i++;
430                 c = PyUnicode_READ(kind, in_str, i++);
431                 if (c == '\n')
432                     seennl |= SEEN_LF;
433                 else if (c == '\r') {
434                     if (PyUnicode_READ(kind, in_str, i) == '\n') {
435                         seennl |= SEEN_CRLF;
436                         i++;
437                     }
438                     else
439                         seennl |= SEEN_CR;
440                 }
441                 if (i >= len)
442                     break;
443                 if (seennl == SEEN_ALL)
444                     break;
445             }
446         endscan:
447             ;
448         }
449         else {
450             void *translated;
451             int kind = PyUnicode_KIND(output);
452             const void *in_str = PyUnicode_DATA(output);
453             Py_ssize_t in, out;
454             /* XXX: Previous in-place translation here is disabled as
455                resizing is not possible anymore */
456             /* We could try to optimize this so that we only do a copy
457                when there is something to translate. On the other hand,
458                we already know there is a \r byte, so chances are high
459                that something needs to be done. */
460             translated = PyMem_Malloc(kind * len);
461             if (translated == NULL) {
462                 PyErr_NoMemory();
463                 goto error;
464             }
465             in = out = 0;
466             for (;;) {
467                 Py_UCS4 c;
468                 /* Fast loop for non-control characters */
469                 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
470                     PyUnicode_WRITE(kind, translated, out++, c);
471                 if (c == '\n') {
472                     PyUnicode_WRITE(kind, translated, out++, c);
473                     seennl |= SEEN_LF;
474                     continue;
475                 }
476                 if (c == '\r') {
477                     if (PyUnicode_READ(kind, in_str, in) == '\n') {
478                         in++;
479                         seennl |= SEEN_CRLF;
480                     }
481                     else
482                         seennl |= SEEN_CR;
483                     PyUnicode_WRITE(kind, translated, out++, '\n');
484                     continue;
485                 }
486                 if (in > len)
487                     break;
488                 PyUnicode_WRITE(kind, translated, out++, c);
489             }
490             Py_DECREF(output);
491             output = PyUnicode_FromKindAndData(kind, translated, out);
492             PyMem_Free(translated);
493             if (!output)
494                 return NULL;
495         }
496         self->seennl |= seennl;
497     }
498 
499     return output;
500 
501   error:
502     Py_DECREF(output);
503     return NULL;
504 }
505 
506 /*[clinic input]
507 _io.IncrementalNewlineDecoder.decode
508     input: object
509     final: bool(accept={int}) = False
510 [clinic start generated code]*/
511 
512 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)513 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
514                                           PyObject *input, int final)
515 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
516 {
517     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
518 }
519 
520 /*[clinic input]
521 _io.IncrementalNewlineDecoder.getstate
522 [clinic start generated code]*/
523 
524 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)525 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
526 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
527 {
528     PyObject *buffer;
529     unsigned long long flag;
530 
531     if (self->decoder != Py_None) {
532         PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
533            _PyIO_str_getstate);
534         if (state == NULL)
535             return NULL;
536         if (!PyTuple_Check(state)) {
537             PyErr_SetString(PyExc_TypeError,
538                             "illegal decoder state");
539             Py_DECREF(state);
540             return NULL;
541         }
542         if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
543                               &buffer, &flag))
544         {
545             Py_DECREF(state);
546             return NULL;
547         }
548         Py_INCREF(buffer);
549         Py_DECREF(state);
550     }
551     else {
552         buffer = PyBytes_FromString("");
553         flag = 0;
554     }
555     flag <<= 1;
556     if (self->pendingcr)
557         flag |= 1;
558     return Py_BuildValue("NK", buffer, flag);
559 }
560 
561 /*[clinic input]
562 _io.IncrementalNewlineDecoder.setstate
563     state: object
564     /
565 [clinic start generated code]*/
566 
567 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)568 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
569                                        PyObject *state)
570 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
571 {
572     PyObject *buffer;
573     unsigned long long flag;
574 
575     if (!PyTuple_Check(state)) {
576         PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
577         return NULL;
578     }
579     if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
580                           &buffer, &flag))
581     {
582         return NULL;
583     }
584 
585     self->pendingcr = (int) (flag & 1);
586     flag >>= 1;
587 
588     if (self->decoder != Py_None)
589         return _PyObject_CallMethodId(self->decoder,
590                                       &PyId_setstate, "((OK))", buffer, flag);
591     else
592         Py_RETURN_NONE;
593 }
594 
595 /*[clinic input]
596 _io.IncrementalNewlineDecoder.reset
597 [clinic start generated code]*/
598 
599 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)600 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
601 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
602 {
603     self->seennl = 0;
604     self->pendingcr = 0;
605     if (self->decoder != Py_None)
606         return PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
607     else
608         Py_RETURN_NONE;
609 }
610 
611 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)612 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
613 {
614     switch (self->seennl) {
615     case SEEN_CR:
616         return PyUnicode_FromString("\r");
617     case SEEN_LF:
618         return PyUnicode_FromString("\n");
619     case SEEN_CRLF:
620         return PyUnicode_FromString("\r\n");
621     case SEEN_CR | SEEN_LF:
622         return Py_BuildValue("ss", "\r", "\n");
623     case SEEN_CR | SEEN_CRLF:
624         return Py_BuildValue("ss", "\r", "\r\n");
625     case SEEN_LF | SEEN_CRLF:
626         return Py_BuildValue("ss", "\n", "\r\n");
627     case SEEN_CR | SEEN_LF | SEEN_CRLF:
628         return Py_BuildValue("sss", "\r", "\n", "\r\n");
629     default:
630         Py_RETURN_NONE;
631    }
632 
633 }
634 
635 /* TextIOWrapper */
636 
637 typedef PyObject *
638         (*encodefunc_t)(PyObject *, PyObject *);
639 
640 typedef struct
641 {
642     PyObject_HEAD
643     int ok; /* initialized? */
644     int detached;
645     Py_ssize_t chunk_size;
646     PyObject *buffer;
647     PyObject *encoding;
648     PyObject *encoder;
649     PyObject *decoder;
650     PyObject *readnl;
651     PyObject *errors;
652     const char *writenl; /* ASCII-encoded; NULL stands for \n */
653     char line_buffering;
654     char write_through;
655     char readuniversal;
656     char readtranslate;
657     char writetranslate;
658     char seekable;
659     char has_read1;
660     char telling;
661     char finalizing;
662     /* Specialized encoding func (see below) */
663     encodefunc_t encodefunc;
664     /* Whether or not it's the start of the stream */
665     char encoding_start_of_stream;
666 
667     /* Reads and writes are internally buffered in order to speed things up.
668        However, any read will first flush the write buffer if itsn't empty.
669 
670        Please also note that text to be written is first encoded before being
671        buffered. This is necessary so that encoding errors are immediately
672        reported to the caller, but it unfortunately means that the
673        IncrementalEncoder (whose encode() method is always written in Python)
674        becomes a bottleneck for small writes.
675     */
676     PyObject *decoded_chars;       /* buffer for text returned from decoder */
677     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
678     PyObject *pending_bytes;       // data waiting to be written.
679                                    // ascii unicode, bytes, or list of them.
680     Py_ssize_t pending_bytes_count;
681 
682     /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
683      * dec_flags is the second (integer) item of the decoder state and
684      * next_input is the chunk of input bytes that comes next after the
685      * snapshot point.  We use this to reconstruct decoder states in tell().
686      */
687     PyObject *snapshot;
688     /* Bytes-to-characters ratio for the current chunk. Serves as input for
689        the heuristic in tell(). */
690     double b2cratio;
691 
692     /* Cache raw object if it's a FileIO object */
693     PyObject *raw;
694 
695     PyObject *weakreflist;
696     PyObject *dict;
697 } textio;
698 
699 static void
700 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
701 
702 /* A couple of specialized cases in order to bypass the slow incremental
703    encoding methods for the most popular encodings. */
704 
705 static PyObject *
ascii_encode(textio * self,PyObject * text)706 ascii_encode(textio *self, PyObject *text)
707 {
708     return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
709 }
710 
711 static PyObject *
utf16be_encode(textio * self,PyObject * text)712 utf16be_encode(textio *self, PyObject *text)
713 {
714     return _PyUnicode_EncodeUTF16(text,
715                                   PyUnicode_AsUTF8(self->errors), 1);
716 }
717 
718 static PyObject *
utf16le_encode(textio * self,PyObject * text)719 utf16le_encode(textio *self, PyObject *text)
720 {
721     return _PyUnicode_EncodeUTF16(text,
722                                   PyUnicode_AsUTF8(self->errors), -1);
723 }
724 
725 static PyObject *
utf16_encode(textio * self,PyObject * text)726 utf16_encode(textio *self, PyObject *text)
727 {
728     if (!self->encoding_start_of_stream) {
729         /* Skip the BOM and use native byte ordering */
730 #if PY_BIG_ENDIAN
731         return utf16be_encode(self, text);
732 #else
733         return utf16le_encode(self, text);
734 #endif
735     }
736     return _PyUnicode_EncodeUTF16(text,
737                                   PyUnicode_AsUTF8(self->errors), 0);
738 }
739 
740 static PyObject *
utf32be_encode(textio * self,PyObject * text)741 utf32be_encode(textio *self, PyObject *text)
742 {
743     return _PyUnicode_EncodeUTF32(text,
744                                   PyUnicode_AsUTF8(self->errors), 1);
745 }
746 
747 static PyObject *
utf32le_encode(textio * self,PyObject * text)748 utf32le_encode(textio *self, PyObject *text)
749 {
750     return _PyUnicode_EncodeUTF32(text,
751                                   PyUnicode_AsUTF8(self->errors), -1);
752 }
753 
754 static PyObject *
utf32_encode(textio * self,PyObject * text)755 utf32_encode(textio *self, PyObject *text)
756 {
757     if (!self->encoding_start_of_stream) {
758         /* Skip the BOM and use native byte ordering */
759 #if PY_BIG_ENDIAN
760         return utf32be_encode(self, text);
761 #else
762         return utf32le_encode(self, text);
763 #endif
764     }
765     return _PyUnicode_EncodeUTF32(text,
766                                   PyUnicode_AsUTF8(self->errors), 0);
767 }
768 
769 static PyObject *
utf8_encode(textio * self,PyObject * text)770 utf8_encode(textio *self, PyObject *text)
771 {
772     return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
773 }
774 
775 static PyObject *
latin1_encode(textio * self,PyObject * text)776 latin1_encode(textio *self, PyObject *text)
777 {
778     return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
779 }
780 
781 // Return true when encoding can be skipped when text is ascii.
782 static inline int
is_asciicompat_encoding(encodefunc_t f)783 is_asciicompat_encoding(encodefunc_t f)
784 {
785     return f == (encodefunc_t) ascii_encode
786         || f == (encodefunc_t) latin1_encode
787         || f == (encodefunc_t) utf8_encode;
788 }
789 
790 /* Map normalized encoding names onto the specialized encoding funcs */
791 
792 typedef struct {
793     const char *name;
794     encodefunc_t encodefunc;
795 } encodefuncentry;
796 
797 static const encodefuncentry encodefuncs[] = {
798     {"ascii",       (encodefunc_t) ascii_encode},
799     {"iso8859-1",   (encodefunc_t) latin1_encode},
800     {"utf-8",       (encodefunc_t) utf8_encode},
801     {"utf-16-be",   (encodefunc_t) utf16be_encode},
802     {"utf-16-le",   (encodefunc_t) utf16le_encode},
803     {"utf-16",      (encodefunc_t) utf16_encode},
804     {"utf-32-be",   (encodefunc_t) utf32be_encode},
805     {"utf-32-le",   (encodefunc_t) utf32le_encode},
806     {"utf-32",      (encodefunc_t) utf32_encode},
807     {NULL, NULL}
808 };
809 
810 static int
validate_newline(const char * newline)811 validate_newline(const char *newline)
812 {
813     if (newline && newline[0] != '\0'
814         && !(newline[0] == '\n' && newline[1] == '\0')
815         && !(newline[0] == '\r' && newline[1] == '\0')
816         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
817         PyErr_Format(PyExc_ValueError,
818                      "illegal newline value: %s", newline);
819         return -1;
820     }
821     return 0;
822 }
823 
824 static int
set_newline(textio * self,const char * newline)825 set_newline(textio *self, const char *newline)
826 {
827     PyObject *old = self->readnl;
828     if (newline == NULL) {
829         self->readnl = NULL;
830     }
831     else {
832         self->readnl = PyUnicode_FromString(newline);
833         if (self->readnl == NULL) {
834             self->readnl = old;
835             return -1;
836         }
837     }
838     self->readuniversal = (newline == NULL || newline[0] == '\0');
839     self->readtranslate = (newline == NULL);
840     self->writetranslate = (newline == NULL || newline[0] != '\0');
841     if (!self->readuniversal && self->readnl != NULL) {
842         // validate_newline() accepts only ASCII newlines.
843         assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
844         self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
845         if (strcmp(self->writenl, "\n") == 0) {
846             self->writenl = NULL;
847         }
848     }
849     else {
850 #ifdef MS_WINDOWS
851         self->writenl = "\r\n";
852 #else
853         self->writenl = NULL;
854 #endif
855     }
856     Py_XDECREF(old);
857     return 0;
858 }
859 
860 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)861 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
862                            const char *errors)
863 {
864     PyObject *res;
865     int r;
866 
867     res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
868     if (res == NULL)
869         return -1;
870 
871     r = PyObject_IsTrue(res);
872     Py_DECREF(res);
873     if (r == -1)
874         return -1;
875 
876     if (r != 1)
877         return 0;
878 
879     Py_CLEAR(self->decoder);
880     self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
881     if (self->decoder == NULL)
882         return -1;
883 
884     if (self->readuniversal) {
885         PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
886             (PyObject *)&PyIncrementalNewlineDecoder_Type,
887             self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
888         if (incrementalDecoder == NULL)
889             return -1;
890         Py_CLEAR(self->decoder);
891         self->decoder = incrementalDecoder;
892     }
893 
894     return 0;
895 }
896 
897 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)898 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
899 {
900     PyObject *chars;
901 
902     if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
903         chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
904     else
905         chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
906                                            eof ? Py_True : Py_False, NULL);
907 
908     if (check_decoded(chars) < 0)
909         // check_decoded already decreases refcount
910         return NULL;
911 
912     return chars;
913 }
914 
915 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)916 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
917                            const char *errors)
918 {
919     PyObject *res;
920     int r;
921 
922     res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
923     if (res == NULL)
924         return -1;
925 
926     r = PyObject_IsTrue(res);
927     Py_DECREF(res);
928     if (r == -1)
929         return -1;
930 
931     if (r != 1)
932         return 0;
933 
934     Py_CLEAR(self->encoder);
935     self->encodefunc = NULL;
936     self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
937     if (self->encoder == NULL)
938         return -1;
939 
940     /* Get the normalized named of the codec */
941     if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
942         return -1;
943     }
944     if (res != NULL && PyUnicode_Check(res)) {
945         const encodefuncentry *e = encodefuncs;
946         while (e->name != NULL) {
947             if (_PyUnicode_EqualToASCIIString(res, e->name)) {
948                 self->encodefunc = e->encodefunc;
949                 break;
950             }
951             e++;
952         }
953     }
954     Py_XDECREF(res);
955 
956     return 0;
957 }
958 
959 static int
_textiowrapper_fix_encoder_state(textio * self)960 _textiowrapper_fix_encoder_state(textio *self)
961 {
962     if (!self->seekable || !self->encoder) {
963         return 0;
964     }
965 
966     self->encoding_start_of_stream = 1;
967 
968     PyObject *cookieObj = PyObject_CallMethodNoArgs(
969         self->buffer, _PyIO_str_tell);
970     if (cookieObj == NULL) {
971         return -1;
972     }
973 
974     int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
975     Py_DECREF(cookieObj);
976     if (cmp < 0) {
977         return -1;
978     }
979 
980     if (cmp == 0) {
981         self->encoding_start_of_stream = 0;
982         PyObject *res = PyObject_CallMethodOneArg(
983             self->encoder, _PyIO_str_setstate, _PyLong_Zero);
984         if (res == NULL) {
985             return -1;
986         }
987         Py_DECREF(res);
988     }
989 
990     return 0;
991 }
992 
993 static int
io_check_errors(PyObject * errors)994 io_check_errors(PyObject *errors)
995 {
996     assert(errors != NULL && errors != Py_None);
997 
998     PyInterpreterState *interp = _PyInterpreterState_GET();
999 #ifndef Py_DEBUG
1000     /* In release mode, only check in development mode (-X dev) */
1001     if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1002         return 0;
1003     }
1004 #else
1005     /* Always check in debug mode */
1006 #endif
1007 
1008     /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1009        before_PyUnicode_InitEncodings() is called. */
1010     if (!interp->unicode.fs_codec.encoding) {
1011         return 0;
1012     }
1013 
1014     Py_ssize_t name_length;
1015     const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1016     if (name == NULL) {
1017         return -1;
1018     }
1019     if (strlen(name) != (size_t)name_length) {
1020         PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1021         return -1;
1022     }
1023     PyObject *handler = PyCodec_LookupError(name);
1024     if (handler != NULL) {
1025         Py_DECREF(handler);
1026         return 0;
1027     }
1028     return -1;
1029 }
1030 
1031 
1032 
1033 /*[clinic input]
1034 _io.TextIOWrapper.__init__
1035     buffer: object
1036     encoding: str(accept={str, NoneType}) = None
1037     errors: object = None
1038     newline: str(accept={str, NoneType}) = None
1039     line_buffering: bool(accept={int}) = False
1040     write_through: bool(accept={int}) = False
1041 
1042 Character and line based layer over a BufferedIOBase object, buffer.
1043 
1044 encoding gives the name of the encoding that the stream will be
1045 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1046 
1047 errors determines the strictness of encoding and decoding (see
1048 help(codecs.Codec) or the documentation for codecs.register) and
1049 defaults to "strict".
1050 
1051 newline controls how line endings are handled. It can be None, '',
1052 '\n', '\r', and '\r\n'.  It works as follows:
1053 
1054 * On input, if newline is None, universal newlines mode is
1055   enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1056   these are translated into '\n' before being returned to the
1057   caller. If it is '', universal newline mode is enabled, but line
1058   endings are returned to the caller untranslated. If it has any of
1059   the other legal values, input lines are only terminated by the given
1060   string, and the line ending is returned to the caller untranslated.
1061 
1062 * On output, if newline is None, any '\n' characters written are
1063   translated to the system default line separator, os.linesep. If
1064   newline is '' or '\n', no translation takes place. If newline is any
1065   of the other legal values, any '\n' characters written are translated
1066   to the given string.
1067 
1068 If line_buffering is True, a call to flush is implied when a call to
1069 write contains a newline character.
1070 [clinic start generated code]*/
1071 
1072 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1073 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1074                                 const char *encoding, PyObject *errors,
1075                                 const char *newline, int line_buffering,
1076                                 int write_through)
1077 /*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
1078 {
1079     PyObject *raw, *codec_info = NULL;
1080     _PyIO_State *state = NULL;
1081     PyObject *res;
1082     int r;
1083 
1084     self->ok = 0;
1085     self->detached = 0;
1086 
1087     if (errors == Py_None) {
1088         errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
1089         if (errors == NULL) {
1090             return -1;
1091         }
1092     }
1093     else if (!PyUnicode_Check(errors)) {
1094         // Check 'errors' argument here because Argument Clinic doesn't support
1095         // 'str(accept={str, NoneType})' converter.
1096         PyErr_Format(
1097             PyExc_TypeError,
1098             "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1099             Py_TYPE(errors)->tp_name);
1100         return -1;
1101     }
1102     else if (io_check_errors(errors)) {
1103         return -1;
1104     }
1105 
1106     if (validate_newline(newline) < 0) {
1107         return -1;
1108     }
1109 
1110     Py_CLEAR(self->buffer);
1111     Py_CLEAR(self->encoding);
1112     Py_CLEAR(self->encoder);
1113     Py_CLEAR(self->decoder);
1114     Py_CLEAR(self->readnl);
1115     Py_CLEAR(self->decoded_chars);
1116     Py_CLEAR(self->pending_bytes);
1117     Py_CLEAR(self->snapshot);
1118     Py_CLEAR(self->errors);
1119     Py_CLEAR(self->raw);
1120     self->decoded_chars_used = 0;
1121     self->pending_bytes_count = 0;
1122     self->encodefunc = NULL;
1123     self->b2cratio = 0.0;
1124 
1125     if (encoding == NULL) {
1126         /* Try os.device_encoding(fileno) */
1127         PyObject *fileno;
1128         state = IO_STATE();
1129         if (state == NULL)
1130             goto error;
1131         fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
1132         /* Ignore only AttributeError and UnsupportedOperation */
1133         if (fileno == NULL) {
1134             if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1135                 PyErr_ExceptionMatches(state->unsupported_operation)) {
1136                 PyErr_Clear();
1137             }
1138             else {
1139                 goto error;
1140             }
1141         }
1142         else {
1143             int fd = _PyLong_AsInt(fileno);
1144             Py_DECREF(fileno);
1145             if (fd == -1 && PyErr_Occurred()) {
1146                 goto error;
1147             }
1148 
1149             self->encoding = _Py_device_encoding(fd);
1150             if (self->encoding == NULL)
1151                 goto error;
1152             else if (!PyUnicode_Check(self->encoding))
1153                 Py_CLEAR(self->encoding);
1154         }
1155     }
1156     if (encoding == NULL && self->encoding == NULL) {
1157         PyObject *locale_module = _PyIO_get_locale_module(state);
1158         if (locale_module == NULL)
1159             goto catch_ImportError;
1160         self->encoding = _PyObject_CallMethodIdOneArg(
1161             locale_module, &PyId_getpreferredencoding, Py_False);
1162         Py_DECREF(locale_module);
1163         if (self->encoding == NULL) {
1164           catch_ImportError:
1165             /*
1166              Importing locale can raise an ImportError because of
1167              _functools, and locale.getpreferredencoding can raise an
1168              ImportError if _locale is not available.  These will happen
1169              during module building.
1170             */
1171             if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1172                 PyErr_Clear();
1173                 self->encoding = PyUnicode_FromString("ascii");
1174             }
1175             else
1176                 goto error;
1177         }
1178         else if (!PyUnicode_Check(self->encoding))
1179             Py_CLEAR(self->encoding);
1180     }
1181     if (self->encoding != NULL) {
1182         encoding = PyUnicode_AsUTF8(self->encoding);
1183         if (encoding == NULL)
1184             goto error;
1185     }
1186     else if (encoding != NULL) {
1187         self->encoding = PyUnicode_FromString(encoding);
1188         if (self->encoding == NULL)
1189             goto error;
1190     }
1191     else {
1192         PyErr_SetString(PyExc_OSError,
1193                         "could not determine default encoding");
1194         goto error;
1195     }
1196 
1197     /* Check we have been asked for a real text encoding */
1198     codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1199     if (codec_info == NULL) {
1200         Py_CLEAR(self->encoding);
1201         goto error;
1202     }
1203 
1204     /* XXX: Failures beyond this point have the potential to leak elements
1205      * of the partially constructed object (like self->encoding)
1206      */
1207 
1208     Py_INCREF(errors);
1209     self->errors = errors;
1210     self->chunk_size = 8192;
1211     self->line_buffering = line_buffering;
1212     self->write_through = write_through;
1213     if (set_newline(self, newline) < 0) {
1214         goto error;
1215     }
1216 
1217     self->buffer = buffer;
1218     Py_INCREF(buffer);
1219 
1220     /* Build the decoder object */
1221     if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1222         goto error;
1223 
1224     /* Build the encoder object */
1225     if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1226         goto error;
1227 
1228     /* Finished sorting out the codec details */
1229     Py_CLEAR(codec_info);
1230 
1231     if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1232         Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1233         Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
1234     {
1235         if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1236             goto error;
1237         /* Cache the raw FileIO object to speed up 'closed' checks */
1238         if (raw != NULL) {
1239             if (Py_IS_TYPE(raw, &PyFileIO_Type))
1240                 self->raw = raw;
1241             else
1242                 Py_DECREF(raw);
1243         }
1244     }
1245 
1246     res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
1247     if (res == NULL)
1248         goto error;
1249     r = PyObject_IsTrue(res);
1250     Py_DECREF(res);
1251     if (r < 0)
1252         goto error;
1253     self->seekable = self->telling = r;
1254 
1255     r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1256     if (r < 0) {
1257         goto error;
1258     }
1259     Py_XDECREF(res);
1260     self->has_read1 = r;
1261 
1262     self->encoding_start_of_stream = 0;
1263     if (_textiowrapper_fix_encoder_state(self) < 0) {
1264         goto error;
1265     }
1266 
1267     self->ok = 1;
1268     return 0;
1269 
1270   error:
1271     Py_XDECREF(codec_info);
1272     return -1;
1273 }
1274 
1275 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1276  * -1 on error.
1277  */
1278 static int
convert_optional_bool(PyObject * obj,int default_value)1279 convert_optional_bool(PyObject *obj, int default_value)
1280 {
1281     long v;
1282     if (obj == Py_None) {
1283         v = default_value;
1284     }
1285     else {
1286         v = PyLong_AsLong(obj);
1287         if (v == -1 && PyErr_Occurred())
1288             return -1;
1289     }
1290     return v != 0;
1291 }
1292 
1293 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1294 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1295                               PyObject *errors, int newline_changed)
1296 {
1297     /* Use existing settings where new settings are not specified */
1298     if (encoding == Py_None && errors == Py_None && !newline_changed) {
1299         return 0;  // no change
1300     }
1301 
1302     if (encoding == Py_None) {
1303         encoding = self->encoding;
1304         if (errors == Py_None) {
1305             errors = self->errors;
1306         }
1307     }
1308     else if (errors == Py_None) {
1309         errors = _PyUnicode_FromId(&PyId_strict);
1310         if (errors == NULL) {
1311             return -1;
1312         }
1313     }
1314 
1315     const char *c_errors = PyUnicode_AsUTF8(errors);
1316     if (c_errors == NULL) {
1317         return -1;
1318     }
1319 
1320     // Create new encoder & decoder
1321     PyObject *codec_info = _PyCodec_LookupTextEncoding(
1322         PyUnicode_AsUTF8(encoding), "codecs.open()");
1323     if (codec_info == NULL) {
1324         return -1;
1325     }
1326     if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1327             _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1328         Py_DECREF(codec_info);
1329         return -1;
1330     }
1331     Py_DECREF(codec_info);
1332 
1333     Py_INCREF(encoding);
1334     Py_INCREF(errors);
1335     Py_SETREF(self->encoding, encoding);
1336     Py_SETREF(self->errors, errors);
1337 
1338     return _textiowrapper_fix_encoder_state(self);
1339 }
1340 
1341 /*[clinic input]
1342 _io.TextIOWrapper.reconfigure
1343     *
1344     encoding: object = None
1345     errors: object = None
1346     newline as newline_obj: object(c_default="NULL") = None
1347     line_buffering as line_buffering_obj: object = None
1348     write_through as write_through_obj: object = None
1349 
1350 Reconfigure the text stream with new parameters.
1351 
1352 This also does an implicit stream flush.
1353 
1354 [clinic start generated code]*/
1355 
1356 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1357 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1358                                    PyObject *errors, PyObject *newline_obj,
1359                                    PyObject *line_buffering_obj,
1360                                    PyObject *write_through_obj)
1361 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1362 {
1363     int line_buffering;
1364     int write_through;
1365     const char *newline = NULL;
1366 
1367     /* Check if something is in the read buffer */
1368     if (self->decoded_chars != NULL) {
1369         if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1370             _unsupported("It is not possible to set the encoding or newline "
1371                          "of stream after the first read");
1372             return NULL;
1373         }
1374     }
1375 
1376     if (newline_obj != NULL && newline_obj != Py_None) {
1377         newline = PyUnicode_AsUTF8(newline_obj);
1378         if (newline == NULL || validate_newline(newline) < 0) {
1379             return NULL;
1380         }
1381     }
1382 
1383     line_buffering = convert_optional_bool(line_buffering_obj,
1384                                            self->line_buffering);
1385     write_through = convert_optional_bool(write_through_obj,
1386                                           self->write_through);
1387     if (line_buffering < 0 || write_through < 0) {
1388         return NULL;
1389     }
1390 
1391     PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
1392     if (res == NULL) {
1393         return NULL;
1394     }
1395     Py_DECREF(res);
1396     self->b2cratio = 0;
1397 
1398     if (newline_obj != NULL && set_newline(self, newline) < 0) {
1399         return NULL;
1400     }
1401 
1402     if (textiowrapper_change_encoding(
1403             self, encoding, errors, newline_obj != NULL) < 0) {
1404         return NULL;
1405     }
1406 
1407     self->line_buffering = line_buffering;
1408     self->write_through = write_through;
1409     Py_RETURN_NONE;
1410 }
1411 
1412 static int
textiowrapper_clear(textio * self)1413 textiowrapper_clear(textio *self)
1414 {
1415     self->ok = 0;
1416     Py_CLEAR(self->buffer);
1417     Py_CLEAR(self->encoding);
1418     Py_CLEAR(self->encoder);
1419     Py_CLEAR(self->decoder);
1420     Py_CLEAR(self->readnl);
1421     Py_CLEAR(self->decoded_chars);
1422     Py_CLEAR(self->pending_bytes);
1423     Py_CLEAR(self->snapshot);
1424     Py_CLEAR(self->errors);
1425     Py_CLEAR(self->raw);
1426 
1427     Py_CLEAR(self->dict);
1428     return 0;
1429 }
1430 
1431 static void
textiowrapper_dealloc(textio * self)1432 textiowrapper_dealloc(textio *self)
1433 {
1434     self->finalizing = 1;
1435     if (_PyIOBase_finalize((PyObject *) self) < 0)
1436         return;
1437     self->ok = 0;
1438     _PyObject_GC_UNTRACK(self);
1439     if (self->weakreflist != NULL)
1440         PyObject_ClearWeakRefs((PyObject *)self);
1441     textiowrapper_clear(self);
1442     Py_TYPE(self)->tp_free((PyObject *)self);
1443 }
1444 
1445 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1446 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1447 {
1448     Py_VISIT(self->buffer);
1449     Py_VISIT(self->encoding);
1450     Py_VISIT(self->encoder);
1451     Py_VISIT(self->decoder);
1452     Py_VISIT(self->readnl);
1453     Py_VISIT(self->decoded_chars);
1454     Py_VISIT(self->pending_bytes);
1455     Py_VISIT(self->snapshot);
1456     Py_VISIT(self->errors);
1457     Py_VISIT(self->raw);
1458 
1459     Py_VISIT(self->dict);
1460     return 0;
1461 }
1462 
1463 static PyObject *
1464 textiowrapper_closed_get(textio *self, void *context);
1465 
1466 /* This macro takes some shortcuts to make the common case faster. */
1467 #define CHECK_CLOSED(self) \
1468     do { \
1469         int r; \
1470         PyObject *_res; \
1471         if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
1472             if (self->raw != NULL) \
1473                 r = _PyFileIO_closed(self->raw); \
1474             else { \
1475                 _res = textiowrapper_closed_get(self, NULL); \
1476                 if (_res == NULL) \
1477                     return NULL; \
1478                 r = PyObject_IsTrue(_res); \
1479                 Py_DECREF(_res); \
1480                 if (r < 0) \
1481                     return NULL; \
1482             } \
1483             if (r > 0) { \
1484                 PyErr_SetString(PyExc_ValueError, \
1485                                 "I/O operation on closed file."); \
1486                 return NULL; \
1487             } \
1488         } \
1489         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1490             return NULL; \
1491     } while (0)
1492 
1493 #define CHECK_INITIALIZED(self) \
1494     if (self->ok <= 0) { \
1495         PyErr_SetString(PyExc_ValueError, \
1496             "I/O operation on uninitialized object"); \
1497         return NULL; \
1498     }
1499 
1500 #define CHECK_ATTACHED(self) \
1501     CHECK_INITIALIZED(self); \
1502     if (self->detached) { \
1503         PyErr_SetString(PyExc_ValueError, \
1504              "underlying buffer has been detached"); \
1505         return NULL; \
1506     }
1507 
1508 #define CHECK_ATTACHED_INT(self) \
1509     if (self->ok <= 0) { \
1510         PyErr_SetString(PyExc_ValueError, \
1511             "I/O operation on uninitialized object"); \
1512         return -1; \
1513     } else if (self->detached) { \
1514         PyErr_SetString(PyExc_ValueError, \
1515              "underlying buffer has been detached"); \
1516         return -1; \
1517     }
1518 
1519 
1520 /*[clinic input]
1521 _io.TextIOWrapper.detach
1522 [clinic start generated code]*/
1523 
1524 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1525 _io_TextIOWrapper_detach_impl(textio *self)
1526 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1527 {
1528     PyObject *buffer, *res;
1529     CHECK_ATTACHED(self);
1530     res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
1531     if (res == NULL)
1532         return NULL;
1533     Py_DECREF(res);
1534     buffer = self->buffer;
1535     self->buffer = NULL;
1536     self->detached = 1;
1537     return buffer;
1538 }
1539 
1540 /* Flush the internal write buffer. This doesn't explicitly flush the
1541    underlying buffered object, though. */
1542 static int
_textiowrapper_writeflush(textio * self)1543 _textiowrapper_writeflush(textio *self)
1544 {
1545     if (self->pending_bytes == NULL)
1546         return 0;
1547 
1548     PyObject *pending = self->pending_bytes;
1549     PyObject *b;
1550 
1551     if (PyBytes_Check(pending)) {
1552         b = pending;
1553         Py_INCREF(b);
1554     }
1555     else if (PyUnicode_Check(pending)) {
1556         assert(PyUnicode_IS_ASCII(pending));
1557         assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1558         b = PyBytes_FromStringAndSize(
1559                 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1560         if (b == NULL) {
1561             return -1;
1562         }
1563     }
1564     else {
1565         assert(PyList_Check(pending));
1566         b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1567         if (b == NULL) {
1568             return -1;
1569         }
1570 
1571         char *buf = PyBytes_AsString(b);
1572         Py_ssize_t pos = 0;
1573 
1574         for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1575             PyObject *obj = PyList_GET_ITEM(pending, i);
1576             char *src;
1577             Py_ssize_t len;
1578             if (PyUnicode_Check(obj)) {
1579                 assert(PyUnicode_IS_ASCII(obj));
1580                 src = PyUnicode_DATA(obj);
1581                 len = PyUnicode_GET_LENGTH(obj);
1582             }
1583             else {
1584                 assert(PyBytes_Check(obj));
1585                 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1586                     Py_DECREF(b);
1587                     return -1;
1588                 }
1589             }
1590             memcpy(buf + pos, src, len);
1591             pos += len;
1592         }
1593         assert(pos == self->pending_bytes_count);
1594     }
1595 
1596     self->pending_bytes_count = 0;
1597     self->pending_bytes = NULL;
1598     Py_DECREF(pending);
1599 
1600     PyObject *ret;
1601     do {
1602         ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
1603     } while (ret == NULL && _PyIO_trap_eintr());
1604     Py_DECREF(b);
1605     if (ret == NULL)
1606         return -1;
1607     Py_DECREF(ret);
1608     return 0;
1609 }
1610 
1611 /*[clinic input]
1612 _io.TextIOWrapper.write
1613     text: unicode
1614     /
1615 [clinic start generated code]*/
1616 
1617 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1618 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1619 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1620 {
1621     PyObject *ret;
1622     PyObject *b;
1623     Py_ssize_t textlen;
1624     int haslf = 0;
1625     int needflush = 0, text_needflush = 0;
1626 
1627     if (PyUnicode_READY(text) == -1)
1628         return NULL;
1629 
1630     CHECK_ATTACHED(self);
1631     CHECK_CLOSED(self);
1632 
1633     if (self->encoder == NULL)
1634         return _unsupported("not writable");
1635 
1636     Py_INCREF(text);
1637 
1638     textlen = PyUnicode_GET_LENGTH(text);
1639 
1640     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1641         if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1642             haslf = 1;
1643 
1644     if (haslf && self->writetranslate && self->writenl != NULL) {
1645         PyObject *newtext = _PyObject_CallMethodId(
1646             text, &PyId_replace, "ss", "\n", self->writenl);
1647         Py_DECREF(text);
1648         if (newtext == NULL)
1649             return NULL;
1650         text = newtext;
1651     }
1652 
1653     if (self->write_through)
1654         text_needflush = 1;
1655     if (self->line_buffering &&
1656         (haslf ||
1657          PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1658         needflush = 1;
1659 
1660     /* XXX What if we were just reading? */
1661     if (self->encodefunc != NULL) {
1662         if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
1663             b = text;
1664             Py_INCREF(b);
1665         }
1666         else {
1667             b = (*self->encodefunc)((PyObject *) self, text);
1668         }
1669         self->encoding_start_of_stream = 0;
1670     }
1671     else
1672         b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
1673 
1674     Py_DECREF(text);
1675     if (b == NULL)
1676         return NULL;
1677     if (b != text && !PyBytes_Check(b)) {
1678         PyErr_Format(PyExc_TypeError,
1679                      "encoder should return a bytes object, not '%.200s'",
1680                      Py_TYPE(b)->tp_name);
1681         Py_DECREF(b);
1682         return NULL;
1683     }
1684 
1685     Py_ssize_t bytes_len;
1686     if (b == text) {
1687         bytes_len = PyUnicode_GET_LENGTH(b);
1688     }
1689     else {
1690         bytes_len = PyBytes_GET_SIZE(b);
1691     }
1692 
1693     if (self->pending_bytes == NULL) {
1694         self->pending_bytes_count = 0;
1695         self->pending_bytes = b;
1696     }
1697     else if (!PyList_CheckExact(self->pending_bytes)) {
1698         PyObject *list = PyList_New(2);
1699         if (list == NULL) {
1700             Py_DECREF(b);
1701             return NULL;
1702         }
1703         PyList_SET_ITEM(list, 0, self->pending_bytes);
1704         PyList_SET_ITEM(list, 1, b);
1705         self->pending_bytes = list;
1706     }
1707     else {
1708         if (PyList_Append(self->pending_bytes, b) < 0) {
1709             Py_DECREF(b);
1710             return NULL;
1711         }
1712         Py_DECREF(b);
1713     }
1714 
1715     self->pending_bytes_count += bytes_len;
1716     if (self->pending_bytes_count > self->chunk_size || needflush ||
1717         text_needflush) {
1718         if (_textiowrapper_writeflush(self) < 0)
1719             return NULL;
1720     }
1721 
1722     if (needflush) {
1723         ret = PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush);
1724         if (ret == NULL)
1725             return NULL;
1726         Py_DECREF(ret);
1727     }
1728 
1729     textiowrapper_set_decoded_chars(self, NULL);
1730     Py_CLEAR(self->snapshot);
1731 
1732     if (self->decoder) {
1733         ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
1734         if (ret == NULL)
1735             return NULL;
1736         Py_DECREF(ret);
1737     }
1738 
1739     return PyLong_FromSsize_t(textlen);
1740 }
1741 
1742 /* Steal a reference to chars and store it in the decoded_char buffer;
1743  */
1744 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1745 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1746 {
1747     Py_XSETREF(self->decoded_chars, chars);
1748     self->decoded_chars_used = 0;
1749 }
1750 
1751 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1752 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1753 {
1754     PyObject *chars;
1755     Py_ssize_t avail;
1756 
1757     if (self->decoded_chars == NULL)
1758         return PyUnicode_FromStringAndSize(NULL, 0);
1759 
1760     /* decoded_chars is guaranteed to be "ready". */
1761     avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1762              - self->decoded_chars_used);
1763 
1764     assert(avail >= 0);
1765 
1766     if (n < 0 || n > avail)
1767         n = avail;
1768 
1769     if (self->decoded_chars_used > 0 || n < avail) {
1770         chars = PyUnicode_Substring(self->decoded_chars,
1771                                     self->decoded_chars_used,
1772                                     self->decoded_chars_used + n);
1773         if (chars == NULL)
1774             return NULL;
1775     }
1776     else {
1777         chars = self->decoded_chars;
1778         Py_INCREF(chars);
1779     }
1780 
1781     self->decoded_chars_used += n;
1782     return chars;
1783 }
1784 
1785 /* Read and decode the next chunk of data from the BufferedReader.
1786  */
1787 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1788 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1789 {
1790     PyObject *dec_buffer = NULL;
1791     PyObject *dec_flags = NULL;
1792     PyObject *input_chunk = NULL;
1793     Py_buffer input_chunk_buf;
1794     PyObject *decoded_chars, *chunk_size;
1795     Py_ssize_t nbytes, nchars;
1796     int eof;
1797 
1798     /* The return value is True unless EOF was reached.  The decoded string is
1799      * placed in self._decoded_chars (replacing its previous value).  The
1800      * entire input chunk is sent to the decoder, though some of it may remain
1801      * buffered in the decoder, yet to be converted.
1802      */
1803 
1804     if (self->decoder == NULL) {
1805         _unsupported("not readable");
1806         return -1;
1807     }
1808 
1809     if (self->telling) {
1810         /* To prepare for tell(), we need to snapshot a point in the file
1811          * where the decoder's input buffer is empty.
1812          */
1813         PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1814                                                      _PyIO_str_getstate);
1815         if (state == NULL)
1816             return -1;
1817         /* Given this, we know there was a valid snapshot point
1818          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1819          */
1820         if (!PyTuple_Check(state)) {
1821             PyErr_SetString(PyExc_TypeError,
1822                             "illegal decoder state");
1823             Py_DECREF(state);
1824             return -1;
1825         }
1826         if (!PyArg_ParseTuple(state,
1827                               "OO;illegal decoder state", &dec_buffer, &dec_flags))
1828         {
1829             Py_DECREF(state);
1830             return -1;
1831         }
1832 
1833         if (!PyBytes_Check(dec_buffer)) {
1834             PyErr_Format(PyExc_TypeError,
1835                          "illegal decoder state: the first item should be a "
1836                          "bytes object, not '%.200s'",
1837                          Py_TYPE(dec_buffer)->tp_name);
1838             Py_DECREF(state);
1839             return -1;
1840         }
1841         Py_INCREF(dec_buffer);
1842         Py_INCREF(dec_flags);
1843         Py_DECREF(state);
1844     }
1845 
1846     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1847     if (size_hint > 0) {
1848         size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1849     }
1850     chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1851     if (chunk_size == NULL)
1852         goto fail;
1853 
1854     input_chunk = PyObject_CallMethodOneArg(self->buffer,
1855         (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1856         chunk_size);
1857     Py_DECREF(chunk_size);
1858     if (input_chunk == NULL)
1859         goto fail;
1860 
1861     if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1862         PyErr_Format(PyExc_TypeError,
1863                      "underlying %s() should have returned a bytes-like object, "
1864                      "not '%.200s'", (self->has_read1 ? "read1": "read"),
1865                      Py_TYPE(input_chunk)->tp_name);
1866         goto fail;
1867     }
1868 
1869     nbytes = input_chunk_buf.len;
1870     eof = (nbytes == 0);
1871 
1872     decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1873     PyBuffer_Release(&input_chunk_buf);
1874     if (decoded_chars == NULL)
1875         goto fail;
1876 
1877     textiowrapper_set_decoded_chars(self, decoded_chars);
1878     nchars = PyUnicode_GET_LENGTH(decoded_chars);
1879     if (nchars > 0)
1880         self->b2cratio = (double) nbytes / nchars;
1881     else
1882         self->b2cratio = 0.0;
1883     if (nchars > 0)
1884         eof = 0;
1885 
1886     if (self->telling) {
1887         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1888          * next input to be decoded is dec_buffer + input_chunk.
1889          */
1890         PyObject *next_input = dec_buffer;
1891         PyBytes_Concat(&next_input, input_chunk);
1892         dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1893         if (next_input == NULL) {
1894             goto fail;
1895         }
1896         PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1897         if (snapshot == NULL) {
1898             dec_flags = NULL;
1899             goto fail;
1900         }
1901         Py_XSETREF(self->snapshot, snapshot);
1902     }
1903     Py_DECREF(input_chunk);
1904 
1905     return (eof == 0);
1906 
1907   fail:
1908     Py_XDECREF(dec_buffer);
1909     Py_XDECREF(dec_flags);
1910     Py_XDECREF(input_chunk);
1911     return -1;
1912 }
1913 
1914 /*[clinic input]
1915 _io.TextIOWrapper.read
1916     size as n: Py_ssize_t(accept={int, NoneType}) = -1
1917     /
1918 [clinic start generated code]*/
1919 
1920 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1921 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1922 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1923 {
1924     PyObject *result = NULL, *chunks = NULL;
1925 
1926     CHECK_ATTACHED(self);
1927     CHECK_CLOSED(self);
1928 
1929     if (self->decoder == NULL)
1930         return _unsupported("not readable");
1931 
1932     if (_textiowrapper_writeflush(self) < 0)
1933         return NULL;
1934 
1935     if (n < 0) {
1936         /* Read everything */
1937         PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
1938         PyObject *decoded;
1939         if (bytes == NULL)
1940             goto fail;
1941 
1942         if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
1943             decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1944                                                           bytes, 1);
1945         else
1946             decoded = PyObject_CallMethodObjArgs(
1947                 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1948         Py_DECREF(bytes);
1949         if (check_decoded(decoded) < 0)
1950             goto fail;
1951 
1952         result = textiowrapper_get_decoded_chars(self, -1);
1953 
1954         if (result == NULL) {
1955             Py_DECREF(decoded);
1956             return NULL;
1957         }
1958 
1959         PyUnicode_AppendAndDel(&result, decoded);
1960         if (result == NULL)
1961             goto fail;
1962 
1963         textiowrapper_set_decoded_chars(self, NULL);
1964         Py_CLEAR(self->snapshot);
1965         return result;
1966     }
1967     else {
1968         int res = 1;
1969         Py_ssize_t remaining = n;
1970 
1971         result = textiowrapper_get_decoded_chars(self, n);
1972         if (result == NULL)
1973             goto fail;
1974         if (PyUnicode_READY(result) == -1)
1975             goto fail;
1976         remaining -= PyUnicode_GET_LENGTH(result);
1977 
1978         /* Keep reading chunks until we have n characters to return */
1979         while (remaining > 0) {
1980             res = textiowrapper_read_chunk(self, remaining);
1981             if (res < 0) {
1982                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1983                    when EINTR occurs so we needn't do it ourselves. */
1984                 if (_PyIO_trap_eintr()) {
1985                     continue;
1986                 }
1987                 goto fail;
1988             }
1989             if (res == 0)  /* EOF */
1990                 break;
1991             if (chunks == NULL) {
1992                 chunks = PyList_New(0);
1993                 if (chunks == NULL)
1994                     goto fail;
1995             }
1996             if (PyUnicode_GET_LENGTH(result) > 0 &&
1997                 PyList_Append(chunks, result) < 0)
1998                 goto fail;
1999             Py_DECREF(result);
2000             result = textiowrapper_get_decoded_chars(self, remaining);
2001             if (result == NULL)
2002                 goto fail;
2003             remaining -= PyUnicode_GET_LENGTH(result);
2004         }
2005         if (chunks != NULL) {
2006             if (result != NULL && PyList_Append(chunks, result) < 0)
2007                 goto fail;
2008             Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
2009             if (result == NULL)
2010                 goto fail;
2011             Py_CLEAR(chunks);
2012         }
2013         return result;
2014     }
2015   fail:
2016     Py_XDECREF(result);
2017     Py_XDECREF(chunks);
2018     return NULL;
2019 }
2020 
2021 
2022 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2023    that is to the NUL character. Otherwise the function will produce
2024    incorrect results. */
2025 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)2026 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2027 {
2028     if (kind == PyUnicode_1BYTE_KIND) {
2029         assert(ch < 256);
2030         return (char *) memchr((const void *) s, (char) ch, end - s);
2031     }
2032     for (;;) {
2033         while (PyUnicode_READ(kind, s, 0) > ch)
2034             s += kind;
2035         if (PyUnicode_READ(kind, s, 0) == ch)
2036             return s;
2037         if (s == end)
2038             return NULL;
2039         s += kind;
2040     }
2041 }
2042 
2043 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2044 _PyIO_find_line_ending(
2045     int translated, int universal, PyObject *readnl,
2046     int kind, const char *start, const char *end, Py_ssize_t *consumed)
2047 {
2048     Py_ssize_t len = (end - start)/kind;
2049 
2050     if (translated) {
2051         /* Newlines are already translated, only search for \n */
2052         const char *pos = find_control_char(kind, start, end, '\n');
2053         if (pos != NULL)
2054             return (pos - start)/kind + 1;
2055         else {
2056             *consumed = len;
2057             return -1;
2058         }
2059     }
2060     else if (universal) {
2061         /* Universal newline search. Find any of \r, \r\n, \n
2062          * The decoder ensures that \r\n are not split in two pieces
2063          */
2064         const char *s = start;
2065         for (;;) {
2066             Py_UCS4 ch;
2067             /* Fast path for non-control chars. The loop always ends
2068                since the Unicode string is NUL-terminated. */
2069             while (PyUnicode_READ(kind, s, 0) > '\r')
2070                 s += kind;
2071             if (s >= end) {
2072                 *consumed = len;
2073                 return -1;
2074             }
2075             ch = PyUnicode_READ(kind, s, 0);
2076             s += kind;
2077             if (ch == '\n')
2078                 return (s - start)/kind;
2079             if (ch == '\r') {
2080                 if (PyUnicode_READ(kind, s, 0) == '\n')
2081                     return (s - start)/kind + 1;
2082                 else
2083                     return (s - start)/kind;
2084             }
2085         }
2086     }
2087     else {
2088         /* Non-universal mode. */
2089         Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2090         const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2091         /* Assume that readnl is an ASCII character. */
2092         assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2093         if (readnl_len == 1) {
2094             const char *pos = find_control_char(kind, start, end, nl[0]);
2095             if (pos != NULL)
2096                 return (pos - start)/kind + 1;
2097             *consumed = len;
2098             return -1;
2099         }
2100         else {
2101             const char *s = start;
2102             const char *e = end - (readnl_len - 1)*kind;
2103             const char *pos;
2104             if (e < s)
2105                 e = s;
2106             while (s < e) {
2107                 Py_ssize_t i;
2108                 const char *pos = find_control_char(kind, s, end, nl[0]);
2109                 if (pos == NULL || pos >= e)
2110                     break;
2111                 for (i = 1; i < readnl_len; i++) {
2112                     if (PyUnicode_READ(kind, pos, i) != nl[i])
2113                         break;
2114                 }
2115                 if (i == readnl_len)
2116                     return (pos - start)/kind + readnl_len;
2117                 s = pos + kind;
2118             }
2119             pos = find_control_char(kind, e, end, nl[0]);
2120             if (pos == NULL)
2121                 *consumed = len;
2122             else
2123                 *consumed = (pos - start)/kind;
2124             return -1;
2125         }
2126     }
2127 }
2128 
2129 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2130 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2131 {
2132     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2133     Py_ssize_t start, endpos, chunked, offset_to_buffer;
2134     int res;
2135 
2136     CHECK_CLOSED(self);
2137 
2138     if (_textiowrapper_writeflush(self) < 0)
2139         return NULL;
2140 
2141     chunked = 0;
2142 
2143     while (1) {
2144         const char *ptr;
2145         Py_ssize_t line_len;
2146         int kind;
2147         Py_ssize_t consumed = 0;
2148 
2149         /* First, get some data if necessary */
2150         res = 1;
2151         while (!self->decoded_chars ||
2152                !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2153             res = textiowrapper_read_chunk(self, 0);
2154             if (res < 0) {
2155                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2156                    when EINTR occurs so we needn't do it ourselves. */
2157                 if (_PyIO_trap_eintr()) {
2158                     continue;
2159                 }
2160                 goto error;
2161             }
2162             if (res == 0)
2163                 break;
2164         }
2165         if (res == 0) {
2166             /* end of file */
2167             textiowrapper_set_decoded_chars(self, NULL);
2168             Py_CLEAR(self->snapshot);
2169             start = endpos = offset_to_buffer = 0;
2170             break;
2171         }
2172 
2173         if (remaining == NULL) {
2174             line = self->decoded_chars;
2175             start = self->decoded_chars_used;
2176             offset_to_buffer = 0;
2177             Py_INCREF(line);
2178         }
2179         else {
2180             assert(self->decoded_chars_used == 0);
2181             line = PyUnicode_Concat(remaining, self->decoded_chars);
2182             start = 0;
2183             offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2184             Py_CLEAR(remaining);
2185             if (line == NULL)
2186                 goto error;
2187             if (PyUnicode_READY(line) == -1)
2188                 goto error;
2189         }
2190 
2191         ptr = PyUnicode_DATA(line);
2192         line_len = PyUnicode_GET_LENGTH(line);
2193         kind = PyUnicode_KIND(line);
2194 
2195         endpos = _PyIO_find_line_ending(
2196             self->readtranslate, self->readuniversal, self->readnl,
2197             kind,
2198             ptr + kind * start,
2199             ptr + kind * line_len,
2200             &consumed);
2201         if (endpos >= 0) {
2202             endpos += start;
2203             if (limit >= 0 && (endpos - start) + chunked >= limit)
2204                 endpos = start + limit - chunked;
2205             break;
2206         }
2207 
2208         /* We can put aside up to `endpos` */
2209         endpos = consumed + start;
2210         if (limit >= 0 && (endpos - start) + chunked >= limit) {
2211             /* Didn't find line ending, but reached length limit */
2212             endpos = start + limit - chunked;
2213             break;
2214         }
2215 
2216         if (endpos > start) {
2217             /* No line ending seen yet - put aside current data */
2218             PyObject *s;
2219             if (chunks == NULL) {
2220                 chunks = PyList_New(0);
2221                 if (chunks == NULL)
2222                     goto error;
2223             }
2224             s = PyUnicode_Substring(line, start, endpos);
2225             if (s == NULL)
2226                 goto error;
2227             if (PyList_Append(chunks, s) < 0) {
2228                 Py_DECREF(s);
2229                 goto error;
2230             }
2231             chunked += PyUnicode_GET_LENGTH(s);
2232             Py_DECREF(s);
2233         }
2234         /* There may be some remaining bytes we'll have to prepend to the
2235            next chunk of data */
2236         if (endpos < line_len) {
2237             remaining = PyUnicode_Substring(line, endpos, line_len);
2238             if (remaining == NULL)
2239                 goto error;
2240         }
2241         Py_CLEAR(line);
2242         /* We have consumed the buffer */
2243         textiowrapper_set_decoded_chars(self, NULL);
2244     }
2245 
2246     if (line != NULL) {
2247         /* Our line ends in the current buffer */
2248         self->decoded_chars_used = endpos - offset_to_buffer;
2249         if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2250             PyObject *s = PyUnicode_Substring(line, start, endpos);
2251             Py_CLEAR(line);
2252             if (s == NULL)
2253                 goto error;
2254             line = s;
2255         }
2256     }
2257     if (remaining != NULL) {
2258         if (chunks == NULL) {
2259             chunks = PyList_New(0);
2260             if (chunks == NULL)
2261                 goto error;
2262         }
2263         if (PyList_Append(chunks, remaining) < 0)
2264             goto error;
2265         Py_CLEAR(remaining);
2266     }
2267     if (chunks != NULL) {
2268         if (line != NULL) {
2269             if (PyList_Append(chunks, line) < 0)
2270                 goto error;
2271             Py_DECREF(line);
2272         }
2273         line = PyUnicode_Join(_PyIO_empty_str, chunks);
2274         if (line == NULL)
2275             goto error;
2276         Py_CLEAR(chunks);
2277     }
2278     if (line == NULL) {
2279         Py_INCREF(_PyIO_empty_str);
2280         line = _PyIO_empty_str;
2281     }
2282 
2283     return line;
2284 
2285   error:
2286     Py_XDECREF(chunks);
2287     Py_XDECREF(remaining);
2288     Py_XDECREF(line);
2289     return NULL;
2290 }
2291 
2292 /*[clinic input]
2293 _io.TextIOWrapper.readline
2294     size: Py_ssize_t = -1
2295     /
2296 [clinic start generated code]*/
2297 
2298 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2299 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2300 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2301 {
2302     CHECK_ATTACHED(self);
2303     return _textiowrapper_readline(self, size);
2304 }
2305 
2306 /* Seek and Tell */
2307 
2308 typedef struct {
2309     Py_off_t start_pos;
2310     int dec_flags;
2311     int bytes_to_feed;
2312     int chars_to_skip;
2313     char need_eof;
2314 } cookie_type;
2315 
2316 /*
2317    To speed up cookie packing/unpacking, we store the fields in a temporary
2318    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2319    The following macros define at which offsets in the intermediary byte
2320    string the various CookieStruct fields will be stored.
2321  */
2322 
2323 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2324 
2325 #if PY_BIG_ENDIAN
2326 /* We want the least significant byte of start_pos to also be the least
2327    significant byte of the cookie, which means that in big-endian mode we
2328    must copy the fields in reverse order. */
2329 
2330 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2331 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2332 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2333 # define OFF_CHARS_TO_SKIP  (sizeof(char))
2334 # define OFF_NEED_EOF       0
2335 
2336 #else
2337 /* Little-endian mode: the least significant byte of start_pos will
2338    naturally end up the least significant byte of the cookie. */
2339 
2340 # define OFF_START_POS      0
2341 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2342 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2343 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2344 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2345 
2346 #endif
2347 
2348 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2349 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2350 {
2351     unsigned char buffer[COOKIE_BUF_LEN];
2352     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2353     if (cookieLong == NULL)
2354         return -1;
2355 
2356     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2357                             PY_LITTLE_ENDIAN, 0) < 0) {
2358         Py_DECREF(cookieLong);
2359         return -1;
2360     }
2361     Py_DECREF(cookieLong);
2362 
2363     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2364     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2365     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2366     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2367     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2368 
2369     return 0;
2370 }
2371 
2372 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2373 textiowrapper_build_cookie(cookie_type *cookie)
2374 {
2375     unsigned char buffer[COOKIE_BUF_LEN];
2376 
2377     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2378     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2379     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2380     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2381     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2382 
2383     return _PyLong_FromByteArray(buffer, sizeof(buffer),
2384                                  PY_LITTLE_ENDIAN, 0);
2385 }
2386 
2387 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2388 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2389 {
2390     PyObject *res;
2391     /* When seeking to the start of the stream, we call decoder.reset()
2392        rather than decoder.getstate().
2393        This is for a few decoders such as utf-16 for which the state value
2394        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2395        utf-16, that we are expecting a BOM).
2396     */
2397     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2398         res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
2399     else
2400         res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2401                                      "((yi))", "", cookie->dec_flags);
2402     if (res == NULL)
2403         return -1;
2404     Py_DECREF(res);
2405     return 0;
2406 }
2407 
2408 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2409 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2410 {
2411     PyObject *res;
2412     if (start_of_stream) {
2413         res = PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset);
2414         self->encoding_start_of_stream = 1;
2415     }
2416     else {
2417         res = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate,
2418                                         _PyLong_Zero);
2419         self->encoding_start_of_stream = 0;
2420     }
2421     if (res == NULL)
2422         return -1;
2423     Py_DECREF(res);
2424     return 0;
2425 }
2426 
2427 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2428 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2429 {
2430     /* Same as _textiowrapper_decoder_setstate() above. */
2431     return _textiowrapper_encoder_reset(
2432         self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2433 }
2434 
2435 /*[clinic input]
2436 _io.TextIOWrapper.seek
2437     cookie as cookieObj: object
2438     whence: int = 0
2439     /
2440 [clinic start generated code]*/
2441 
2442 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2443 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2444 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2445 {
2446     PyObject *posobj;
2447     cookie_type cookie;
2448     PyObject *res;
2449     int cmp;
2450     PyObject *snapshot;
2451 
2452     CHECK_ATTACHED(self);
2453     CHECK_CLOSED(self);
2454 
2455     Py_INCREF(cookieObj);
2456 
2457     if (!self->seekable) {
2458         _unsupported("underlying stream is not seekable");
2459         goto fail;
2460     }
2461 
2462     switch (whence) {
2463     case SEEK_CUR:
2464         /* seek relative to current position */
2465         cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2466         if (cmp < 0)
2467             goto fail;
2468 
2469         if (cmp == 0) {
2470             _unsupported("can't do nonzero cur-relative seeks");
2471             goto fail;
2472         }
2473 
2474         /* Seeking to the current position should attempt to
2475          * sync the underlying buffer with the current position.
2476          */
2477         Py_DECREF(cookieObj);
2478         cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
2479         if (cookieObj == NULL)
2480             goto fail;
2481         break;
2482 
2483     case SEEK_END:
2484         /* seek relative to end of file */
2485         cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2486         if (cmp < 0)
2487             goto fail;
2488 
2489         if (cmp == 0) {
2490             _unsupported("can't do nonzero end-relative seeks");
2491             goto fail;
2492         }
2493 
2494         res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
2495         if (res == NULL)
2496             goto fail;
2497         Py_DECREF(res);
2498 
2499         textiowrapper_set_decoded_chars(self, NULL);
2500         Py_CLEAR(self->snapshot);
2501         if (self->decoder) {
2502             res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
2503             if (res == NULL)
2504                 goto fail;
2505             Py_DECREF(res);
2506         }
2507 
2508         res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2509         Py_CLEAR(cookieObj);
2510         if (res == NULL)
2511             goto fail;
2512         if (self->encoder) {
2513             /* If seek() == 0, we are at the start of stream, otherwise not */
2514             cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
2515             if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2516                 Py_DECREF(res);
2517                 goto fail;
2518             }
2519         }
2520         return res;
2521 
2522     case SEEK_SET:
2523         break;
2524 
2525     default:
2526         PyErr_Format(PyExc_ValueError,
2527                      "invalid whence (%d, should be %d, %d or %d)", whence,
2528                      SEEK_SET, SEEK_CUR, SEEK_END);
2529         goto fail;
2530     }
2531 
2532     cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
2533     if (cmp < 0)
2534         goto fail;
2535 
2536     if (cmp == 1) {
2537         PyErr_Format(PyExc_ValueError,
2538                      "negative seek position %R", cookieObj);
2539         goto fail;
2540     }
2541 
2542     res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
2543     if (res == NULL)
2544         goto fail;
2545     Py_DECREF(res);
2546 
2547     /* The strategy of seek() is to go back to the safe start point
2548      * and replay the effect of read(chars_to_skip) from there.
2549      */
2550     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2551         goto fail;
2552 
2553     /* Seek back to the safe start point. */
2554     posobj = PyLong_FromOff_t(cookie.start_pos);
2555     if (posobj == NULL)
2556         goto fail;
2557     res = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj);
2558     Py_DECREF(posobj);
2559     if (res == NULL)
2560         goto fail;
2561     Py_DECREF(res);
2562 
2563     textiowrapper_set_decoded_chars(self, NULL);
2564     Py_CLEAR(self->snapshot);
2565 
2566     /* Restore the decoder to its state from the safe start point. */
2567     if (self->decoder) {
2568         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2569             goto fail;
2570     }
2571 
2572     if (cookie.chars_to_skip) {
2573         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2574         PyObject *input_chunk = _PyObject_CallMethodId(
2575             self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2576         PyObject *decoded;
2577 
2578         if (input_chunk == NULL)
2579             goto fail;
2580 
2581         if (!PyBytes_Check(input_chunk)) {
2582             PyErr_Format(PyExc_TypeError,
2583                          "underlying read() should have returned a bytes "
2584                          "object, not '%.200s'",
2585                          Py_TYPE(input_chunk)->tp_name);
2586             Py_DECREF(input_chunk);
2587             goto fail;
2588         }
2589 
2590         snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2591         if (snapshot == NULL) {
2592             goto fail;
2593         }
2594         Py_XSETREF(self->snapshot, snapshot);
2595 
2596         decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode,
2597             input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2598 
2599         if (check_decoded(decoded) < 0)
2600             goto fail;
2601 
2602         textiowrapper_set_decoded_chars(self, decoded);
2603 
2604         /* Skip chars_to_skip of the decoded characters. */
2605         if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2606             PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2607             goto fail;
2608         }
2609         self->decoded_chars_used = cookie.chars_to_skip;
2610     }
2611     else {
2612         snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2613         if (snapshot == NULL)
2614             goto fail;
2615         Py_XSETREF(self->snapshot, snapshot);
2616     }
2617 
2618     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2619     if (self->encoder) {
2620         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2621             goto fail;
2622     }
2623     return cookieObj;
2624   fail:
2625     Py_XDECREF(cookieObj);
2626     return NULL;
2627 
2628 }
2629 
2630 /*[clinic input]
2631 _io.TextIOWrapper.tell
2632 [clinic start generated code]*/
2633 
2634 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2635 _io_TextIOWrapper_tell_impl(textio *self)
2636 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2637 {
2638     PyObject *res;
2639     PyObject *posobj = NULL;
2640     cookie_type cookie = {0,0,0,0,0};
2641     PyObject *next_input;
2642     Py_ssize_t chars_to_skip, chars_decoded;
2643     Py_ssize_t skip_bytes, skip_back;
2644     PyObject *saved_state = NULL;
2645     const char *input, *input_end;
2646     Py_ssize_t dec_buffer_len;
2647     int dec_flags;
2648 
2649     CHECK_ATTACHED(self);
2650     CHECK_CLOSED(self);
2651 
2652     if (!self->seekable) {
2653         _unsupported("underlying stream is not seekable");
2654         goto fail;
2655     }
2656     if (!self->telling) {
2657         PyErr_SetString(PyExc_OSError,
2658                         "telling position disabled by next() call");
2659         goto fail;
2660     }
2661 
2662     if (_textiowrapper_writeflush(self) < 0)
2663         return NULL;
2664     res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
2665     if (res == NULL)
2666         goto fail;
2667     Py_DECREF(res);
2668 
2669     posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
2670     if (posobj == NULL)
2671         goto fail;
2672 
2673     if (self->decoder == NULL || self->snapshot == NULL) {
2674         assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2675         return posobj;
2676     }
2677 
2678 #if defined(HAVE_LARGEFILE_SUPPORT)
2679     cookie.start_pos = PyLong_AsLongLong(posobj);
2680 #else
2681     cookie.start_pos = PyLong_AsLong(posobj);
2682 #endif
2683     Py_DECREF(posobj);
2684     if (PyErr_Occurred())
2685         goto fail;
2686 
2687     /* Skip backward to the snapshot point (see _read_chunk). */
2688     assert(PyTuple_Check(self->snapshot));
2689     if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2690         goto fail;
2691 
2692     assert (PyBytes_Check(next_input));
2693 
2694     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2695 
2696     /* How many decoded characters have been used up since the snapshot? */
2697     if (self->decoded_chars_used == 0)  {
2698         /* We haven't moved from the snapshot point. */
2699         return textiowrapper_build_cookie(&cookie);
2700     }
2701 
2702     chars_to_skip = self->decoded_chars_used;
2703 
2704     /* Decoder state will be restored at the end */
2705     saved_state = PyObject_CallMethodNoArgs(self->decoder,
2706                                              _PyIO_str_getstate);
2707     if (saved_state == NULL)
2708         goto fail;
2709 
2710 #define DECODER_GETSTATE() do { \
2711         PyObject *dec_buffer; \
2712         PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2713             _PyIO_str_getstate); \
2714         if (_state == NULL) \
2715             goto fail; \
2716         if (!PyTuple_Check(_state)) { \
2717             PyErr_SetString(PyExc_TypeError, \
2718                             "illegal decoder state"); \
2719             Py_DECREF(_state); \
2720             goto fail; \
2721         } \
2722         if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2723                               &dec_buffer, &dec_flags)) \
2724         { \
2725             Py_DECREF(_state); \
2726             goto fail; \
2727         } \
2728         if (!PyBytes_Check(dec_buffer)) { \
2729             PyErr_Format(PyExc_TypeError, \
2730                          "illegal decoder state: the first item should be a " \
2731                          "bytes object, not '%.200s'", \
2732                          Py_TYPE(dec_buffer)->tp_name); \
2733             Py_DECREF(_state); \
2734             goto fail; \
2735         } \
2736         dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2737         Py_DECREF(_state); \
2738     } while (0)
2739 
2740 #define DECODER_DECODE(start, len, res) do { \
2741         PyObject *_decoded = _PyObject_CallMethodId( \
2742             self->decoder, &PyId_decode, "y#", start, len); \
2743         if (check_decoded(_decoded) < 0) \
2744             goto fail; \
2745         res = PyUnicode_GET_LENGTH(_decoded); \
2746         Py_DECREF(_decoded); \
2747     } while (0)
2748 
2749     /* Fast search for an acceptable start point, close to our
2750        current pos */
2751     skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2752     skip_back = 1;
2753     assert(skip_back <= PyBytes_GET_SIZE(next_input));
2754     input = PyBytes_AS_STRING(next_input);
2755     while (skip_bytes > 0) {
2756         /* Decode up to temptative start point */
2757         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2758             goto fail;
2759         DECODER_DECODE(input, skip_bytes, chars_decoded);
2760         if (chars_decoded <= chars_to_skip) {
2761             DECODER_GETSTATE();
2762             if (dec_buffer_len == 0) {
2763                 /* Before pos and no bytes buffered in decoder => OK */
2764                 cookie.dec_flags = dec_flags;
2765                 chars_to_skip -= chars_decoded;
2766                 break;
2767             }
2768             /* Skip back by buffered amount and reset heuristic */
2769             skip_bytes -= dec_buffer_len;
2770             skip_back = 1;
2771         }
2772         else {
2773             /* We're too far ahead, skip back a bit */
2774             skip_bytes -= skip_back;
2775             skip_back *= 2;
2776         }
2777     }
2778     if (skip_bytes <= 0) {
2779         skip_bytes = 0;
2780         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2781             goto fail;
2782     }
2783 
2784     /* Note our initial start point. */
2785     cookie.start_pos += skip_bytes;
2786     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2787     if (chars_to_skip == 0)
2788         goto finally;
2789 
2790     /* We should be close to the desired position.  Now feed the decoder one
2791      * byte at a time until we reach the `chars_to_skip` target.
2792      * As we go, note the nearest "safe start point" before the current
2793      * location (a point where the decoder has nothing buffered, so seek()
2794      * can safely start from there and advance to this location).
2795      */
2796     chars_decoded = 0;
2797     input = PyBytes_AS_STRING(next_input);
2798     input_end = input + PyBytes_GET_SIZE(next_input);
2799     input += skip_bytes;
2800     while (input < input_end) {
2801         Py_ssize_t n;
2802 
2803         DECODER_DECODE(input, (Py_ssize_t)1, n);
2804         /* We got n chars for 1 byte */
2805         chars_decoded += n;
2806         cookie.bytes_to_feed += 1;
2807         DECODER_GETSTATE();
2808 
2809         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2810             /* Decoder buffer is empty, so this is a safe start point. */
2811             cookie.start_pos += cookie.bytes_to_feed;
2812             chars_to_skip -= chars_decoded;
2813             cookie.dec_flags = dec_flags;
2814             cookie.bytes_to_feed = 0;
2815             chars_decoded = 0;
2816         }
2817         if (chars_decoded >= chars_to_skip)
2818             break;
2819         input++;
2820     }
2821     if (input == input_end) {
2822         /* We didn't get enough decoded data; signal EOF to get more. */
2823         PyObject *decoded = _PyObject_CallMethodId(
2824             self->decoder, &PyId_decode, "yO", "", /* final = */ Py_True);
2825         if (check_decoded(decoded) < 0)
2826             goto fail;
2827         chars_decoded += PyUnicode_GET_LENGTH(decoded);
2828         Py_DECREF(decoded);
2829         cookie.need_eof = 1;
2830 
2831         if (chars_decoded < chars_to_skip) {
2832             PyErr_SetString(PyExc_OSError,
2833                             "can't reconstruct logical file position");
2834             goto fail;
2835         }
2836     }
2837 
2838 finally:
2839     res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
2840     Py_DECREF(saved_state);
2841     if (res == NULL)
2842         return NULL;
2843     Py_DECREF(res);
2844 
2845     /* The returned cookie corresponds to the last safe start point. */
2846     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2847     return textiowrapper_build_cookie(&cookie);
2848 
2849 fail:
2850     if (saved_state) {
2851         PyObject *type, *value, *traceback;
2852         PyErr_Fetch(&type, &value, &traceback);
2853         res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
2854         _PyErr_ChainExceptions(type, value, traceback);
2855         Py_DECREF(saved_state);
2856         Py_XDECREF(res);
2857     }
2858     return NULL;
2859 }
2860 
2861 /*[clinic input]
2862 _io.TextIOWrapper.truncate
2863     pos: object = None
2864     /
2865 [clinic start generated code]*/
2866 
2867 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2868 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2869 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2870 {
2871     PyObject *res;
2872 
2873     CHECK_ATTACHED(self)
2874 
2875     res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
2876     if (res == NULL)
2877         return NULL;
2878     Py_DECREF(res);
2879 
2880     return PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos);
2881 }
2882 
2883 static PyObject *
textiowrapper_repr(textio * self)2884 textiowrapper_repr(textio *self)
2885 {
2886     PyObject *nameobj, *modeobj, *res, *s;
2887     int status;
2888 
2889     CHECK_INITIALIZED(self);
2890 
2891     res = PyUnicode_FromString("<_io.TextIOWrapper");
2892     if (res == NULL)
2893         return NULL;
2894 
2895     status = Py_ReprEnter((PyObject *)self);
2896     if (status != 0) {
2897         if (status > 0) {
2898             PyErr_Format(PyExc_RuntimeError,
2899                          "reentrant call inside %s.__repr__",
2900                          Py_TYPE(self)->tp_name);
2901         }
2902         goto error;
2903     }
2904     if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2905         if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2906             goto error;
2907         }
2908         /* Ignore ValueError raised if the underlying stream was detached */
2909         PyErr_Clear();
2910     }
2911     if (nameobj != NULL) {
2912         s = PyUnicode_FromFormat(" name=%R", nameobj);
2913         Py_DECREF(nameobj);
2914         if (s == NULL)
2915             goto error;
2916         PyUnicode_AppendAndDel(&res, s);
2917         if (res == NULL)
2918             goto error;
2919     }
2920     if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2921         goto error;
2922     }
2923     if (modeobj != NULL) {
2924         s = PyUnicode_FromFormat(" mode=%R", modeobj);
2925         Py_DECREF(modeobj);
2926         if (s == NULL)
2927             goto error;
2928         PyUnicode_AppendAndDel(&res, s);
2929         if (res == NULL)
2930             goto error;
2931     }
2932     s = PyUnicode_FromFormat("%U encoding=%R>",
2933                              res, self->encoding);
2934     Py_DECREF(res);
2935     if (status == 0) {
2936         Py_ReprLeave((PyObject *)self);
2937     }
2938     return s;
2939 
2940   error:
2941     Py_XDECREF(res);
2942     if (status == 0) {
2943         Py_ReprLeave((PyObject *)self);
2944     }
2945     return NULL;
2946 }
2947 
2948 
2949 /* Inquiries */
2950 
2951 /*[clinic input]
2952 _io.TextIOWrapper.fileno
2953 [clinic start generated code]*/
2954 
2955 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2956 _io_TextIOWrapper_fileno_impl(textio *self)
2957 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2958 {
2959     CHECK_ATTACHED(self);
2960     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
2961 }
2962 
2963 /*[clinic input]
2964 _io.TextIOWrapper.seekable
2965 [clinic start generated code]*/
2966 
2967 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2968 _io_TextIOWrapper_seekable_impl(textio *self)
2969 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2970 {
2971     CHECK_ATTACHED(self);
2972     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
2973 }
2974 
2975 /*[clinic input]
2976 _io.TextIOWrapper.readable
2977 [clinic start generated code]*/
2978 
2979 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2980 _io_TextIOWrapper_readable_impl(textio *self)
2981 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2982 {
2983     CHECK_ATTACHED(self);
2984     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
2985 }
2986 
2987 /*[clinic input]
2988 _io.TextIOWrapper.writable
2989 [clinic start generated code]*/
2990 
2991 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)2992 _io_TextIOWrapper_writable_impl(textio *self)
2993 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2994 {
2995     CHECK_ATTACHED(self);
2996     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
2997 }
2998 
2999 /*[clinic input]
3000 _io.TextIOWrapper.isatty
3001 [clinic start generated code]*/
3002 
3003 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)3004 _io_TextIOWrapper_isatty_impl(textio *self)
3005 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
3006 {
3007     CHECK_ATTACHED(self);
3008     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
3009 }
3010 
3011 /*[clinic input]
3012 _io.TextIOWrapper.flush
3013 [clinic start generated code]*/
3014 
3015 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)3016 _io_TextIOWrapper_flush_impl(textio *self)
3017 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
3018 {
3019     CHECK_ATTACHED(self);
3020     CHECK_CLOSED(self);
3021     self->telling = self->seekable;
3022     if (_textiowrapper_writeflush(self) < 0)
3023         return NULL;
3024     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
3025 }
3026 
3027 /*[clinic input]
3028 _io.TextIOWrapper.close
3029 [clinic start generated code]*/
3030 
3031 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)3032 _io_TextIOWrapper_close_impl(textio *self)
3033 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
3034 {
3035     PyObject *res;
3036     int r;
3037     CHECK_ATTACHED(self);
3038 
3039     res = textiowrapper_closed_get(self, NULL);
3040     if (res == NULL)
3041         return NULL;
3042     r = PyObject_IsTrue(res);
3043     Py_DECREF(res);
3044     if (r < 0)
3045         return NULL;
3046 
3047     if (r > 0) {
3048         Py_RETURN_NONE; /* stream already closed */
3049     }
3050     else {
3051         PyObject *exc = NULL, *val, *tb;
3052         if (self->finalizing) {
3053             res = _PyObject_CallMethodIdOneArg(self->buffer,
3054                                               &PyId__dealloc_warn,
3055                                               (PyObject *)self);
3056             if (res)
3057                 Py_DECREF(res);
3058             else
3059                 PyErr_Clear();
3060         }
3061         res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
3062         if (res == NULL)
3063             PyErr_Fetch(&exc, &val, &tb);
3064         else
3065             Py_DECREF(res);
3066 
3067         res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
3068         if (exc != NULL) {
3069             _PyErr_ChainExceptions(exc, val, tb);
3070             Py_CLEAR(res);
3071         }
3072         return res;
3073     }
3074 }
3075 
3076 static PyObject *
textiowrapper_iternext(textio * self)3077 textiowrapper_iternext(textio *self)
3078 {
3079     PyObject *line;
3080 
3081     CHECK_ATTACHED(self);
3082 
3083     self->telling = 0;
3084     if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
3085         /* Skip method call overhead for speed */
3086         line = _textiowrapper_readline(self, -1);
3087     }
3088     else {
3089         line = PyObject_CallMethodNoArgs((PyObject *)self,
3090                                           _PyIO_str_readline);
3091         if (line && !PyUnicode_Check(line)) {
3092             PyErr_Format(PyExc_OSError,
3093                          "readline() should have returned a str object, "
3094                          "not '%.200s'", Py_TYPE(line)->tp_name);
3095             Py_DECREF(line);
3096             return NULL;
3097         }
3098     }
3099 
3100     if (line == NULL || PyUnicode_READY(line) == -1)
3101         return NULL;
3102 
3103     if (PyUnicode_GET_LENGTH(line) == 0) {
3104         /* Reached EOF or would have blocked */
3105         Py_DECREF(line);
3106         Py_CLEAR(self->snapshot);
3107         self->telling = self->seekable;
3108         return NULL;
3109     }
3110 
3111     return line;
3112 }
3113 
3114 static PyObject *
textiowrapper_name_get(textio * self,void * context)3115 textiowrapper_name_get(textio *self, void *context)
3116 {
3117     CHECK_ATTACHED(self);
3118     return _PyObject_GetAttrId(self->buffer, &PyId_name);
3119 }
3120 
3121 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3122 textiowrapper_closed_get(textio *self, void *context)
3123 {
3124     CHECK_ATTACHED(self);
3125     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3126 }
3127 
3128 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3129 textiowrapper_newlines_get(textio *self, void *context)
3130 {
3131     PyObject *res;
3132     CHECK_ATTACHED(self);
3133     if (self->decoder == NULL ||
3134         _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3135     {
3136         Py_RETURN_NONE;
3137     }
3138     return res;
3139 }
3140 
3141 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3142 textiowrapper_errors_get(textio *self, void *context)
3143 {
3144     CHECK_INITIALIZED(self);
3145     Py_INCREF(self->errors);
3146     return self->errors;
3147 }
3148 
3149 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3150 textiowrapper_chunk_size_get(textio *self, void *context)
3151 {
3152     CHECK_ATTACHED(self);
3153     return PyLong_FromSsize_t(self->chunk_size);
3154 }
3155 
3156 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3157 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3158 {
3159     Py_ssize_t n;
3160     CHECK_ATTACHED_INT(self);
3161     if (arg == NULL) {
3162         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3163         return -1;
3164     }
3165     n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3166     if (n == -1 && PyErr_Occurred())
3167         return -1;
3168     if (n <= 0) {
3169         PyErr_SetString(PyExc_ValueError,
3170                         "a strictly positive integer is required");
3171         return -1;
3172     }
3173     self->chunk_size = n;
3174     return 0;
3175 }
3176 
3177 #include "clinic/textio.c.h"
3178 
3179 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3180     _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3181     _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3182     _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3183     _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3184     {NULL}
3185 };
3186 
3187 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3188     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3189     {NULL}
3190 };
3191 
3192 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3193     PyVarObject_HEAD_INIT(NULL, 0)
3194     "_io.IncrementalNewlineDecoder", /*tp_name*/
3195     sizeof(nldecoder_object), /*tp_basicsize*/
3196     0,                          /*tp_itemsize*/
3197     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3198     0,                          /*tp_vectorcall_offset*/
3199     0,                          /*tp_getattr*/
3200     0,                          /*tp_setattr*/
3201     0,                          /*tp_as_async*/
3202     0,                          /*tp_repr*/
3203     0,                          /*tp_as_number*/
3204     0,                          /*tp_as_sequence*/
3205     0,                          /*tp_as_mapping*/
3206     0,                          /*tp_hash */
3207     0,                          /*tp_call*/
3208     0,                          /*tp_str*/
3209     0,                          /*tp_getattro*/
3210     0,                          /*tp_setattro*/
3211     0,                          /*tp_as_buffer*/
3212     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
3213     _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3214     0,                          /* tp_traverse */
3215     0,                          /* tp_clear */
3216     0,                          /* tp_richcompare */
3217     0,                          /*tp_weaklistoffset*/
3218     0,                          /* tp_iter */
3219     0,                          /* tp_iternext */
3220     incrementalnewlinedecoder_methods, /* tp_methods */
3221     0,                          /* tp_members */
3222     incrementalnewlinedecoder_getset, /* tp_getset */
3223     0,                          /* tp_base */
3224     0,                          /* tp_dict */
3225     0,                          /* tp_descr_get */
3226     0,                          /* tp_descr_set */
3227     0,                          /* tp_dictoffset */
3228     _io_IncrementalNewlineDecoder___init__, /* tp_init */
3229     0,                          /* tp_alloc */
3230     PyType_GenericNew,          /* tp_new */
3231 };
3232 
3233 
3234 static PyMethodDef textiowrapper_methods[] = {
3235     _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3236     _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3237     _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3238     _IO_TEXTIOWRAPPER_READ_METHODDEF
3239     _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3240     _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3241     _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3242 
3243     _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3244     _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3245     _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3246     _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3247     _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3248 
3249     _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3250     _IO_TEXTIOWRAPPER_TELL_METHODDEF
3251     _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3252     {NULL, NULL}
3253 };
3254 
3255 static PyMemberDef textiowrapper_members[] = {
3256     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3257     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3258     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3259     {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3260     {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3261     {NULL}
3262 };
3263 
3264 static PyGetSetDef textiowrapper_getset[] = {
3265     {"name", (getter)textiowrapper_name_get, NULL, NULL},
3266     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3267 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3268 */
3269     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3270     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3271     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3272                     (setter)textiowrapper_chunk_size_set, NULL},
3273     {NULL}
3274 };
3275 
3276 PyTypeObject PyTextIOWrapper_Type = {
3277     PyVarObject_HEAD_INIT(NULL, 0)
3278     "_io.TextIOWrapper",        /*tp_name*/
3279     sizeof(textio), /*tp_basicsize*/
3280     0,                          /*tp_itemsize*/
3281     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3282     0,                          /*tp_vectorcall_offset*/
3283     0,                          /*tp_getattr*/
3284     0,                          /*tps_etattr*/
3285     0,                          /*tp_as_async*/
3286     (reprfunc)textiowrapper_repr,/*tp_repr*/
3287     0,                          /*tp_as_number*/
3288     0,                          /*tp_as_sequence*/
3289     0,                          /*tp_as_mapping*/
3290     0,                          /*tp_hash */
3291     0,                          /*tp_call*/
3292     0,                          /*tp_str*/
3293     0,                          /*tp_getattro*/
3294     0,                          /*tp_setattro*/
3295     0,                          /*tp_as_buffer*/
3296     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3297         | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
3298     _io_TextIOWrapper___init____doc__, /* tp_doc */
3299     (traverseproc)textiowrapper_traverse, /* tp_traverse */
3300     (inquiry)textiowrapper_clear, /* tp_clear */
3301     0,                          /* tp_richcompare */
3302     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3303     0,                          /* tp_iter */
3304     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3305     textiowrapper_methods,      /* tp_methods */
3306     textiowrapper_members,      /* tp_members */
3307     textiowrapper_getset,       /* tp_getset */
3308     0,                          /* tp_base */
3309     0,                          /* tp_dict */
3310     0,                          /* tp_descr_get */
3311     0,                          /* tp_descr_set */
3312     offsetof(textio, dict), /*tp_dictoffset*/
3313     _io_TextIOWrapper___init__, /* tp_init */
3314     0,                          /* tp_alloc */
3315     PyType_GenericNew,          /* tp_new */
3316     0,                          /* tp_free */
3317     0,                          /* tp_is_gc */
3318     0,                          /* tp_bases */
3319     0,                          /* tp_mro */
3320     0,                          /* tp_cache */
3321     0,                          /* tp_subclasses */
3322     0,                          /* tp_weaklist */
3323     0,                          /* tp_del */
3324     0,                          /* tp_version_tag */
3325     0,                          /* tp_finalize */
3326 };
3327