1 /*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "structmember.h"
12 #include "_iomodule.h"
13
14 /*[clinic input]
15 module _io
16 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
17 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
18 [clinic start generated code]*/
19 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
20
21 _Py_IDENTIFIER(close);
22 _Py_IDENTIFIER(_dealloc_warn);
23 _Py_IDENTIFIER(decode);
24 _Py_IDENTIFIER(fileno);
25 _Py_IDENTIFIER(flush);
26 _Py_IDENTIFIER(getpreferredencoding);
27 _Py_IDENTIFIER(isatty);
28 _Py_IDENTIFIER(mode);
29 _Py_IDENTIFIER(name);
30 _Py_IDENTIFIER(raw);
31 _Py_IDENTIFIER(read);
32 _Py_IDENTIFIER(readable);
33 _Py_IDENTIFIER(replace);
34 _Py_IDENTIFIER(reset);
35 _Py_IDENTIFIER(seek);
36 _Py_IDENTIFIER(seekable);
37 _Py_IDENTIFIER(setstate);
38 _Py_IDENTIFIER(strict);
39 _Py_IDENTIFIER(tell);
40 _Py_IDENTIFIER(writable);
41
42 /* TextIOBase */
43
44 PyDoc_STRVAR(textiobase_doc,
45 "Base class for text I/O.\n"
46 "\n"
47 "This class provides a character and line based interface to stream\n"
48 "I/O. There is no readinto method because Python's character strings\n"
49 "are immutable. There is no public constructor.\n"
50 );
51
52 static PyObject *
_unsupported(const char * message)53 _unsupported(const char *message)
54 {
55 _PyIO_State *state = IO_STATE();
56 if (state != NULL)
57 PyErr_SetString(state->unsupported_operation, message);
58 return NULL;
59 }
60
61 PyDoc_STRVAR(textiobase_detach_doc,
62 "Separate the underlying buffer from the TextIOBase and return it.\n"
63 "\n"
64 "After the underlying buffer has been detached, the TextIO is in an\n"
65 "unusable state.\n"
66 );
67
68 static PyObject *
textiobase_detach(PyObject * self)69 textiobase_detach(PyObject *self)
70 {
71 return _unsupported("detach");
72 }
73
74 PyDoc_STRVAR(textiobase_read_doc,
75 "Read at most n characters from stream.\n"
76 "\n"
77 "Read from underlying buffer until we have n characters or we hit EOF.\n"
78 "If n is negative or omitted, read until EOF.\n"
79 );
80
81 static PyObject *
textiobase_read(PyObject * self,PyObject * args)82 textiobase_read(PyObject *self, PyObject *args)
83 {
84 return _unsupported("read");
85 }
86
87 PyDoc_STRVAR(textiobase_readline_doc,
88 "Read until newline or EOF.\n"
89 "\n"
90 "Returns an empty string if EOF is hit immediately.\n"
91 );
92
93 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)94 textiobase_readline(PyObject *self, PyObject *args)
95 {
96 return _unsupported("readline");
97 }
98
99 PyDoc_STRVAR(textiobase_write_doc,
100 "Write string to stream.\n"
101 "Returns the number of characters written (which is always equal to\n"
102 "the length of the string).\n"
103 );
104
105 static PyObject *
textiobase_write(PyObject * self,PyObject * args)106 textiobase_write(PyObject *self, PyObject *args)
107 {
108 return _unsupported("write");
109 }
110
111 PyDoc_STRVAR(textiobase_encoding_doc,
112 "Encoding of the text stream.\n"
113 "\n"
114 "Subclasses should override.\n"
115 );
116
117 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)118 textiobase_encoding_get(PyObject *self, void *context)
119 {
120 Py_RETURN_NONE;
121 }
122
123 PyDoc_STRVAR(textiobase_newlines_doc,
124 "Line endings translated so far.\n"
125 "\n"
126 "Only line endings translated during reading are considered.\n"
127 "\n"
128 "Subclasses should override.\n"
129 );
130
131 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)132 textiobase_newlines_get(PyObject *self, void *context)
133 {
134 Py_RETURN_NONE;
135 }
136
137 PyDoc_STRVAR(textiobase_errors_doc,
138 "The error setting of the decoder or encoder.\n"
139 "\n"
140 "Subclasses should override.\n"
141 );
142
143 static PyObject *
textiobase_errors_get(PyObject * self,void * context)144 textiobase_errors_get(PyObject *self, void *context)
145 {
146 Py_RETURN_NONE;
147 }
148
149
150 static PyMethodDef textiobase_methods[] = {
151 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
152 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
153 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
154 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
155 {NULL, NULL}
156 };
157
158 static PyGetSetDef textiobase_getset[] = {
159 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
160 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
161 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
162 {NULL}
163 };
164
165 PyTypeObject PyTextIOBase_Type = {
166 PyVarObject_HEAD_INIT(NULL, 0)
167 "_io._TextIOBase", /*tp_name*/
168 0, /*tp_basicsize*/
169 0, /*tp_itemsize*/
170 0, /*tp_dealloc*/
171 0, /*tp_print*/
172 0, /*tp_getattr*/
173 0, /*tp_setattr*/
174 0, /*tp_compare */
175 0, /*tp_repr*/
176 0, /*tp_as_number*/
177 0, /*tp_as_sequence*/
178 0, /*tp_as_mapping*/
179 0, /*tp_hash */
180 0, /*tp_call*/
181 0, /*tp_str*/
182 0, /*tp_getattro*/
183 0, /*tp_setattro*/
184 0, /*tp_as_buffer*/
185 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
186 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
187 textiobase_doc, /* tp_doc */
188 0, /* tp_traverse */
189 0, /* tp_clear */
190 0, /* tp_richcompare */
191 0, /* tp_weaklistoffset */
192 0, /* tp_iter */
193 0, /* tp_iternext */
194 textiobase_methods, /* tp_methods */
195 0, /* tp_members */
196 textiobase_getset, /* tp_getset */
197 &PyIOBase_Type, /* tp_base */
198 0, /* tp_dict */
199 0, /* tp_descr_get */
200 0, /* tp_descr_set */
201 0, /* tp_dictoffset */
202 0, /* tp_init */
203 0, /* tp_alloc */
204 0, /* tp_new */
205 0, /* tp_free */
206 0, /* tp_is_gc */
207 0, /* tp_bases */
208 0, /* tp_mro */
209 0, /* tp_cache */
210 0, /* tp_subclasses */
211 0, /* tp_weaklist */
212 0, /* tp_del */
213 0, /* tp_version_tag */
214 0, /* tp_finalize */
215 };
216
217
218 /* IncrementalNewlineDecoder */
219
220 typedef struct {
221 PyObject_HEAD
222 PyObject *decoder;
223 PyObject *errors;
224 unsigned int pendingcr: 1;
225 unsigned int translate: 1;
226 unsigned int seennl: 3;
227 } nldecoder_object;
228
229 /*[clinic input]
230 _io.IncrementalNewlineDecoder.__init__
231 decoder: object
232 translate: int
233 errors: object(c_default="NULL") = "strict"
234
235 Codec used when reading a file in universal newlines mode.
236
237 It wraps another incremental decoder, translating \r\n and \r into \n.
238 It also records the types of newlines encountered. When used with
239 translate=False, it ensures that the newline sequence is returned in
240 one piece. When used with decoder=None, it expects unicode strings as
241 decode input and translates newlines without first invoking an external
242 decoder.
243 [clinic start generated code]*/
244
245 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)246 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
247 PyObject *decoder, int translate,
248 PyObject *errors)
249 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
250 {
251 self->decoder = decoder;
252 Py_INCREF(decoder);
253
254 if (errors == NULL) {
255 self->errors = _PyUnicode_FromId(&PyId_strict);
256 if (self->errors == NULL)
257 return -1;
258 }
259 else {
260 self->errors = errors;
261 }
262 Py_INCREF(self->errors);
263
264 self->translate = translate ? 1 : 0;
265 self->seennl = 0;
266 self->pendingcr = 0;
267
268 return 0;
269 }
270
271 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)272 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
273 {
274 Py_CLEAR(self->decoder);
275 Py_CLEAR(self->errors);
276 Py_TYPE(self)->tp_free((PyObject *)self);
277 }
278
279 static int
check_decoded(PyObject * decoded)280 check_decoded(PyObject *decoded)
281 {
282 if (decoded == NULL)
283 return -1;
284 if (!PyUnicode_Check(decoded)) {
285 PyErr_Format(PyExc_TypeError,
286 "decoder should return a string result, not '%.200s'",
287 Py_TYPE(decoded)->tp_name);
288 Py_DECREF(decoded);
289 return -1;
290 }
291 if (PyUnicode_READY(decoded) < 0) {
292 Py_DECREF(decoded);
293 return -1;
294 }
295 return 0;
296 }
297
298 #define SEEN_CR 1
299 #define SEEN_LF 2
300 #define SEEN_CRLF 4
301 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
302
303 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)304 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
305 PyObject *input, int final)
306 {
307 PyObject *output;
308 Py_ssize_t output_len;
309 nldecoder_object *self = (nldecoder_object *) myself;
310
311 if (self->decoder == NULL) {
312 PyErr_SetString(PyExc_ValueError,
313 "IncrementalNewlineDecoder.__init__ not called");
314 return NULL;
315 }
316
317 /* decode input (with the eventual \r from a previous pass) */
318 if (self->decoder != Py_None) {
319 output = PyObject_CallMethodObjArgs(self->decoder,
320 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
321 }
322 else {
323 output = input;
324 Py_INCREF(output);
325 }
326
327 if (check_decoded(output) < 0)
328 return NULL;
329
330 output_len = PyUnicode_GET_LENGTH(output);
331 if (self->pendingcr && (final || output_len > 0)) {
332 /* Prefix output with CR */
333 int kind;
334 PyObject *modified;
335 char *out;
336
337 modified = PyUnicode_New(output_len + 1,
338 PyUnicode_MAX_CHAR_VALUE(output));
339 if (modified == NULL)
340 goto error;
341 kind = PyUnicode_KIND(modified);
342 out = PyUnicode_DATA(modified);
343 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
344 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
345 Py_DECREF(output);
346 output = modified; /* output remains ready */
347 self->pendingcr = 0;
348 output_len++;
349 }
350
351 /* retain last \r even when not translating data:
352 * then readline() is sure to get \r\n in one pass
353 */
354 if (!final) {
355 if (output_len > 0
356 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
357 {
358 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
359 if (modified == NULL)
360 goto error;
361 Py_DECREF(output);
362 output = modified;
363 self->pendingcr = 1;
364 }
365 }
366
367 /* Record which newlines are read and do newline translation if desired,
368 all in one pass. */
369 {
370 void *in_str;
371 Py_ssize_t len;
372 int seennl = self->seennl;
373 int only_lf = 0;
374 int kind;
375
376 in_str = PyUnicode_DATA(output);
377 len = PyUnicode_GET_LENGTH(output);
378 kind = PyUnicode_KIND(output);
379
380 if (len == 0)
381 return output;
382
383 /* If, up to now, newlines are consistently \n, do a quick check
384 for the \r *byte* with the libc's optimized memchr.
385 */
386 if (seennl == SEEN_LF || seennl == 0) {
387 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
388 }
389
390 if (only_lf) {
391 /* If not already seen, quick scan for a possible "\n" character.
392 (there's nothing else to be done, even when in translation mode)
393 */
394 if (seennl == 0 &&
395 memchr(in_str, '\n', kind * len) != NULL) {
396 if (kind == PyUnicode_1BYTE_KIND)
397 seennl |= SEEN_LF;
398 else {
399 Py_ssize_t i = 0;
400 for (;;) {
401 Py_UCS4 c;
402 /* Fast loop for non-control characters */
403 while (PyUnicode_READ(kind, in_str, i) > '\n')
404 i++;
405 c = PyUnicode_READ(kind, in_str, i++);
406 if (c == '\n') {
407 seennl |= SEEN_LF;
408 break;
409 }
410 if (i >= len)
411 break;
412 }
413 }
414 }
415 /* Finished: we have scanned for newlines, and none of them
416 need translating */
417 }
418 else if (!self->translate) {
419 Py_ssize_t i = 0;
420 /* We have already seen all newline types, no need to scan again */
421 if (seennl == SEEN_ALL)
422 goto endscan;
423 for (;;) {
424 Py_UCS4 c;
425 /* Fast loop for non-control characters */
426 while (PyUnicode_READ(kind, in_str, i) > '\r')
427 i++;
428 c = PyUnicode_READ(kind, in_str, i++);
429 if (c == '\n')
430 seennl |= SEEN_LF;
431 else if (c == '\r') {
432 if (PyUnicode_READ(kind, in_str, i) == '\n') {
433 seennl |= SEEN_CRLF;
434 i++;
435 }
436 else
437 seennl |= SEEN_CR;
438 }
439 if (i >= len)
440 break;
441 if (seennl == SEEN_ALL)
442 break;
443 }
444 endscan:
445 ;
446 }
447 else {
448 void *translated;
449 int kind = PyUnicode_KIND(output);
450 void *in_str = PyUnicode_DATA(output);
451 Py_ssize_t in, out;
452 /* XXX: Previous in-place translation here is disabled as
453 resizing is not possible anymore */
454 /* We could try to optimize this so that we only do a copy
455 when there is something to translate. On the other hand,
456 we already know there is a \r byte, so chances are high
457 that something needs to be done. */
458 translated = PyMem_Malloc(kind * len);
459 if (translated == NULL) {
460 PyErr_NoMemory();
461 goto error;
462 }
463 in = out = 0;
464 for (;;) {
465 Py_UCS4 c;
466 /* Fast loop for non-control characters */
467 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
468 PyUnicode_WRITE(kind, translated, out++, c);
469 if (c == '\n') {
470 PyUnicode_WRITE(kind, translated, out++, c);
471 seennl |= SEEN_LF;
472 continue;
473 }
474 if (c == '\r') {
475 if (PyUnicode_READ(kind, in_str, in) == '\n') {
476 in++;
477 seennl |= SEEN_CRLF;
478 }
479 else
480 seennl |= SEEN_CR;
481 PyUnicode_WRITE(kind, translated, out++, '\n');
482 continue;
483 }
484 if (in > len)
485 break;
486 PyUnicode_WRITE(kind, translated, out++, c);
487 }
488 Py_DECREF(output);
489 output = PyUnicode_FromKindAndData(kind, translated, out);
490 PyMem_Free(translated);
491 if (!output)
492 return NULL;
493 }
494 self->seennl |= seennl;
495 }
496
497 return output;
498
499 error:
500 Py_DECREF(output);
501 return NULL;
502 }
503
504 /*[clinic input]
505 _io.IncrementalNewlineDecoder.decode
506 input: object
507 final: bool(accept={int}) = False
508 [clinic start generated code]*/
509
510 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)511 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
512 PyObject *input, int final)
513 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
514 {
515 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
516 }
517
518 /*[clinic input]
519 _io.IncrementalNewlineDecoder.getstate
520 [clinic start generated code]*/
521
522 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)523 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
524 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
525 {
526 PyObject *buffer;
527 unsigned long long flag;
528
529 if (self->decoder != Py_None) {
530 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
531 _PyIO_str_getstate, NULL);
532 if (state == NULL)
533 return NULL;
534 if (!PyTuple_Check(state)) {
535 PyErr_SetString(PyExc_TypeError,
536 "illegal decoder state");
537 Py_DECREF(state);
538 return NULL;
539 }
540 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
541 &buffer, &flag))
542 {
543 Py_DECREF(state);
544 return NULL;
545 }
546 Py_INCREF(buffer);
547 Py_DECREF(state);
548 }
549 else {
550 buffer = PyBytes_FromString("");
551 flag = 0;
552 }
553 flag <<= 1;
554 if (self->pendingcr)
555 flag |= 1;
556 return Py_BuildValue("NK", buffer, flag);
557 }
558
559 /*[clinic input]
560 _io.IncrementalNewlineDecoder.setstate
561 state: object
562 /
563 [clinic start generated code]*/
564
565 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)566 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
567 PyObject *state)
568 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
569 {
570 PyObject *buffer;
571 unsigned long long flag;
572
573 if (!PyTuple_Check(state)) {
574 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
575 return NULL;
576 }
577 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
578 &buffer, &flag))
579 {
580 return NULL;
581 }
582
583 self->pendingcr = (int) (flag & 1);
584 flag >>= 1;
585
586 if (self->decoder != Py_None)
587 return _PyObject_CallMethodId(self->decoder,
588 &PyId_setstate, "((OK))", buffer, flag);
589 else
590 Py_RETURN_NONE;
591 }
592
593 /*[clinic input]
594 _io.IncrementalNewlineDecoder.reset
595 [clinic start generated code]*/
596
597 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)598 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
599 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
600 {
601 self->seennl = 0;
602 self->pendingcr = 0;
603 if (self->decoder != Py_None)
604 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
605 else
606 Py_RETURN_NONE;
607 }
608
609 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)610 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
611 {
612 switch (self->seennl) {
613 case SEEN_CR:
614 return PyUnicode_FromString("\r");
615 case SEEN_LF:
616 return PyUnicode_FromString("\n");
617 case SEEN_CRLF:
618 return PyUnicode_FromString("\r\n");
619 case SEEN_CR | SEEN_LF:
620 return Py_BuildValue("ss", "\r", "\n");
621 case SEEN_CR | SEEN_CRLF:
622 return Py_BuildValue("ss", "\r", "\r\n");
623 case SEEN_LF | SEEN_CRLF:
624 return Py_BuildValue("ss", "\n", "\r\n");
625 case SEEN_CR | SEEN_LF | SEEN_CRLF:
626 return Py_BuildValue("sss", "\r", "\n", "\r\n");
627 default:
628 Py_RETURN_NONE;
629 }
630
631 }
632
633 /* TextIOWrapper */
634
635 typedef PyObject *
636 (*encodefunc_t)(PyObject *, PyObject *);
637
638 typedef struct
639 {
640 PyObject_HEAD
641 int ok; /* initialized? */
642 int detached;
643 Py_ssize_t chunk_size;
644 PyObject *buffer;
645 PyObject *encoding;
646 PyObject *encoder;
647 PyObject *decoder;
648 PyObject *readnl;
649 PyObject *errors;
650 const char *writenl; /* ASCII-encoded; NULL stands for \n */
651 char line_buffering;
652 char write_through;
653 char readuniversal;
654 char readtranslate;
655 char writetranslate;
656 char seekable;
657 char has_read1;
658 char telling;
659 char finalizing;
660 /* Specialized encoding func (see below) */
661 encodefunc_t encodefunc;
662 /* Whether or not it's the start of the stream */
663 char encoding_start_of_stream;
664
665 /* Reads and writes are internally buffered in order to speed things up.
666 However, any read will first flush the write buffer if itsn't empty.
667
668 Please also note that text to be written is first encoded before being
669 buffered. This is necessary so that encoding errors are immediately
670 reported to the caller, but it unfortunately means that the
671 IncrementalEncoder (whose encode() method is always written in Python)
672 becomes a bottleneck for small writes.
673 */
674 PyObject *decoded_chars; /* buffer for text returned from decoder */
675 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
676 PyObject *pending_bytes; /* list of bytes objects waiting to be
677 written, or NULL */
678 Py_ssize_t pending_bytes_count;
679
680 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
681 * dec_flags is the second (integer) item of the decoder state and
682 * next_input is the chunk of input bytes that comes next after the
683 * snapshot point. We use this to reconstruct decoder states in tell().
684 */
685 PyObject *snapshot;
686 /* Bytes-to-characters ratio for the current chunk. Serves as input for
687 the heuristic in tell(). */
688 double b2cratio;
689
690 /* Cache raw object if it's a FileIO object */
691 PyObject *raw;
692
693 PyObject *weakreflist;
694 PyObject *dict;
695 } textio;
696
697 static void
698 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
699
700 /* A couple of specialized cases in order to bypass the slow incremental
701 encoding methods for the most popular encodings. */
702
703 static PyObject *
ascii_encode(textio * self,PyObject * text)704 ascii_encode(textio *self, PyObject *text)
705 {
706 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
707 }
708
709 static PyObject *
utf16be_encode(textio * self,PyObject * text)710 utf16be_encode(textio *self, PyObject *text)
711 {
712 return _PyUnicode_EncodeUTF16(text,
713 PyUnicode_AsUTF8(self->errors), 1);
714 }
715
716 static PyObject *
utf16le_encode(textio * self,PyObject * text)717 utf16le_encode(textio *self, PyObject *text)
718 {
719 return _PyUnicode_EncodeUTF16(text,
720 PyUnicode_AsUTF8(self->errors), -1);
721 }
722
723 static PyObject *
utf16_encode(textio * self,PyObject * text)724 utf16_encode(textio *self, PyObject *text)
725 {
726 if (!self->encoding_start_of_stream) {
727 /* Skip the BOM and use native byte ordering */
728 #if PY_BIG_ENDIAN
729 return utf16be_encode(self, text);
730 #else
731 return utf16le_encode(self, text);
732 #endif
733 }
734 return _PyUnicode_EncodeUTF16(text,
735 PyUnicode_AsUTF8(self->errors), 0);
736 }
737
738 static PyObject *
utf32be_encode(textio * self,PyObject * text)739 utf32be_encode(textio *self, PyObject *text)
740 {
741 return _PyUnicode_EncodeUTF32(text,
742 PyUnicode_AsUTF8(self->errors), 1);
743 }
744
745 static PyObject *
utf32le_encode(textio * self,PyObject * text)746 utf32le_encode(textio *self, PyObject *text)
747 {
748 return _PyUnicode_EncodeUTF32(text,
749 PyUnicode_AsUTF8(self->errors), -1);
750 }
751
752 static PyObject *
utf32_encode(textio * self,PyObject * text)753 utf32_encode(textio *self, PyObject *text)
754 {
755 if (!self->encoding_start_of_stream) {
756 /* Skip the BOM and use native byte ordering */
757 #if PY_BIG_ENDIAN
758 return utf32be_encode(self, text);
759 #else
760 return utf32le_encode(self, text);
761 #endif
762 }
763 return _PyUnicode_EncodeUTF32(text,
764 PyUnicode_AsUTF8(self->errors), 0);
765 }
766
767 static PyObject *
utf8_encode(textio * self,PyObject * text)768 utf8_encode(textio *self, PyObject *text)
769 {
770 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
771 }
772
773 static PyObject *
latin1_encode(textio * self,PyObject * text)774 latin1_encode(textio *self, PyObject *text)
775 {
776 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
777 }
778
779 /* Map normalized encoding names onto the specialized encoding funcs */
780
781 typedef struct {
782 const char *name;
783 encodefunc_t encodefunc;
784 } encodefuncentry;
785
786 static const encodefuncentry encodefuncs[] = {
787 {"ascii", (encodefunc_t) ascii_encode},
788 {"iso8859-1", (encodefunc_t) latin1_encode},
789 {"utf-8", (encodefunc_t) utf8_encode},
790 {"utf-16-be", (encodefunc_t) utf16be_encode},
791 {"utf-16-le", (encodefunc_t) utf16le_encode},
792 {"utf-16", (encodefunc_t) utf16_encode},
793 {"utf-32-be", (encodefunc_t) utf32be_encode},
794 {"utf-32-le", (encodefunc_t) utf32le_encode},
795 {"utf-32", (encodefunc_t) utf32_encode},
796 {NULL, NULL}
797 };
798
799 static int
validate_newline(const char * newline)800 validate_newline(const char *newline)
801 {
802 if (newline && newline[0] != '\0'
803 && !(newline[0] == '\n' && newline[1] == '\0')
804 && !(newline[0] == '\r' && newline[1] == '\0')
805 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
806 PyErr_Format(PyExc_ValueError,
807 "illegal newline value: %s", newline);
808 return -1;
809 }
810 return 0;
811 }
812
813 static int
set_newline(textio * self,const char * newline)814 set_newline(textio *self, const char *newline)
815 {
816 PyObject *old = self->readnl;
817 if (newline == NULL) {
818 self->readnl = NULL;
819 }
820 else {
821 self->readnl = PyUnicode_FromString(newline);
822 if (self->readnl == NULL) {
823 self->readnl = old;
824 return -1;
825 }
826 }
827 self->readuniversal = (newline == NULL || newline[0] == '\0');
828 self->readtranslate = (newline == NULL);
829 self->writetranslate = (newline == NULL || newline[0] != '\0');
830 if (!self->readuniversal && self->readnl != NULL) {
831 // validate_newline() accepts only ASCII newlines.
832 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
833 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
834 if (strcmp(self->writenl, "\n") == 0) {
835 self->writenl = NULL;
836 }
837 }
838 else {
839 #ifdef MS_WINDOWS
840 self->writenl = "\r\n";
841 #else
842 self->writenl = NULL;
843 #endif
844 }
845 Py_XDECREF(old);
846 return 0;
847 }
848
849 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)850 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
851 const char *errors)
852 {
853 PyObject *res;
854 int r;
855
856 res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
857 if (res == NULL)
858 return -1;
859
860 r = PyObject_IsTrue(res);
861 Py_DECREF(res);
862 if (r == -1)
863 return -1;
864
865 if (r != 1)
866 return 0;
867
868 Py_CLEAR(self->decoder);
869 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
870 if (self->decoder == NULL)
871 return -1;
872
873 if (self->readuniversal) {
874 PyObject *incrementalDecoder = PyObject_CallFunction(
875 (PyObject *)&PyIncrementalNewlineDecoder_Type,
876 "Oi", self->decoder, (int)self->readtranslate);
877 if (incrementalDecoder == NULL)
878 return -1;
879 Py_CLEAR(self->decoder);
880 self->decoder = incrementalDecoder;
881 }
882
883 return 0;
884 }
885
886 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)887 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
888 {
889 PyObject *chars;
890
891 if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
892 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
893 else
894 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
895 eof ? Py_True : Py_False, NULL);
896
897 if (check_decoded(chars) < 0)
898 // check_decoded already decreases refcount
899 return NULL;
900
901 return chars;
902 }
903
904 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)905 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
906 const char *errors)
907 {
908 PyObject *res;
909 int r;
910
911 res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
912 if (res == NULL)
913 return -1;
914
915 r = PyObject_IsTrue(res);
916 Py_DECREF(res);
917 if (r == -1)
918 return -1;
919
920 if (r != 1)
921 return 0;
922
923 Py_CLEAR(self->encoder);
924 self->encodefunc = NULL;
925 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
926 if (self->encoder == NULL)
927 return -1;
928
929 /* Get the normalized named of the codec */
930 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
931 return -1;
932 }
933 if (res != NULL && PyUnicode_Check(res)) {
934 const encodefuncentry *e = encodefuncs;
935 while (e->name != NULL) {
936 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
937 self->encodefunc = e->encodefunc;
938 break;
939 }
940 e++;
941 }
942 }
943 Py_XDECREF(res);
944
945 return 0;
946 }
947
948 static int
_textiowrapper_fix_encoder_state(textio * self)949 _textiowrapper_fix_encoder_state(textio *self)
950 {
951 if (!self->seekable || !self->encoder) {
952 return 0;
953 }
954
955 self->encoding_start_of_stream = 1;
956
957 PyObject *cookieObj = PyObject_CallMethodObjArgs(
958 self->buffer, _PyIO_str_tell, NULL);
959 if (cookieObj == NULL) {
960 return -1;
961 }
962
963 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
964 Py_DECREF(cookieObj);
965 if (cmp < 0) {
966 return -1;
967 }
968
969 if (cmp == 0) {
970 self->encoding_start_of_stream = 0;
971 PyObject *res = PyObject_CallMethodObjArgs(
972 self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
973 if (res == NULL) {
974 return -1;
975 }
976 Py_DECREF(res);
977 }
978
979 return 0;
980 }
981
982 /*[clinic input]
983 _io.TextIOWrapper.__init__
984 buffer: object
985 encoding: str(accept={str, NoneType}) = NULL
986 errors: object = None
987 newline: str(accept={str, NoneType}) = NULL
988 line_buffering: bool(accept={int}) = False
989 write_through: bool(accept={int}) = False
990
991 Character and line based layer over a BufferedIOBase object, buffer.
992
993 encoding gives the name of the encoding that the stream will be
994 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
995
996 errors determines the strictness of encoding and decoding (see
997 help(codecs.Codec) or the documentation for codecs.register) and
998 defaults to "strict".
999
1000 newline controls how line endings are handled. It can be None, '',
1001 '\n', '\r', and '\r\n'. It works as follows:
1002
1003 * On input, if newline is None, universal newlines mode is
1004 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1005 these are translated into '\n' before being returned to the
1006 caller. If it is '', universal newline mode is enabled, but line
1007 endings are returned to the caller untranslated. If it has any of
1008 the other legal values, input lines are only terminated by the given
1009 string, and the line ending is returned to the caller untranslated.
1010
1011 * On output, if newline is None, any '\n' characters written are
1012 translated to the system default line separator, os.linesep. If
1013 newline is '' or '\n', no translation takes place. If newline is any
1014 of the other legal values, any '\n' characters written are translated
1015 to the given string.
1016
1017 If line_buffering is True, a call to flush is implied when a call to
1018 write contains a newline character.
1019 [clinic start generated code]*/
1020
1021 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1022 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1023 const char *encoding, PyObject *errors,
1024 const char *newline, int line_buffering,
1025 int write_through)
1026 /*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/
1027 {
1028 PyObject *raw, *codec_info = NULL;
1029 _PyIO_State *state = NULL;
1030 PyObject *res;
1031 int r;
1032
1033 self->ok = 0;
1034 self->detached = 0;
1035
1036 if (errors == Py_None) {
1037 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
1038 if (errors == NULL) {
1039 return -1;
1040 }
1041 }
1042 else if (!PyUnicode_Check(errors)) {
1043 // Check 'errors' argument here because Argument Clinic doesn't support
1044 // 'str(accept={str, NoneType})' converter.
1045 PyErr_Format(
1046 PyExc_TypeError,
1047 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1048 errors->ob_type->tp_name);
1049 return -1;
1050 }
1051
1052 if (validate_newline(newline) < 0) {
1053 return -1;
1054 }
1055
1056 Py_CLEAR(self->buffer);
1057 Py_CLEAR(self->encoding);
1058 Py_CLEAR(self->encoder);
1059 Py_CLEAR(self->decoder);
1060 Py_CLEAR(self->readnl);
1061 Py_CLEAR(self->decoded_chars);
1062 Py_CLEAR(self->pending_bytes);
1063 Py_CLEAR(self->snapshot);
1064 Py_CLEAR(self->errors);
1065 Py_CLEAR(self->raw);
1066 self->decoded_chars_used = 0;
1067 self->pending_bytes_count = 0;
1068 self->encodefunc = NULL;
1069 self->b2cratio = 0.0;
1070
1071 if (encoding == NULL) {
1072 /* Try os.device_encoding(fileno) */
1073 PyObject *fileno;
1074 state = IO_STATE();
1075 if (state == NULL)
1076 goto error;
1077 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
1078 /* Ignore only AttributeError and UnsupportedOperation */
1079 if (fileno == NULL) {
1080 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1081 PyErr_ExceptionMatches(state->unsupported_operation)) {
1082 PyErr_Clear();
1083 }
1084 else {
1085 goto error;
1086 }
1087 }
1088 else {
1089 int fd = _PyLong_AsInt(fileno);
1090 Py_DECREF(fileno);
1091 if (fd == -1 && PyErr_Occurred()) {
1092 goto error;
1093 }
1094
1095 self->encoding = _Py_device_encoding(fd);
1096 if (self->encoding == NULL)
1097 goto error;
1098 else if (!PyUnicode_Check(self->encoding))
1099 Py_CLEAR(self->encoding);
1100 }
1101 }
1102 if (encoding == NULL && self->encoding == NULL) {
1103 PyObject *locale_module = _PyIO_get_locale_module(state);
1104 if (locale_module == NULL)
1105 goto catch_ImportError;
1106 self->encoding = _PyObject_CallMethodIdObjArgs(
1107 locale_module, &PyId_getpreferredencoding, Py_False, NULL);
1108 Py_DECREF(locale_module);
1109 if (self->encoding == NULL) {
1110 catch_ImportError:
1111 /*
1112 Importing locale can raise an ImportError because of
1113 _functools, and locale.getpreferredencoding can raise an
1114 ImportError if _locale is not available. These will happen
1115 during module building.
1116 */
1117 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1118 PyErr_Clear();
1119 self->encoding = PyUnicode_FromString("ascii");
1120 }
1121 else
1122 goto error;
1123 }
1124 else if (!PyUnicode_Check(self->encoding))
1125 Py_CLEAR(self->encoding);
1126 }
1127 if (self->encoding != NULL) {
1128 encoding = PyUnicode_AsUTF8(self->encoding);
1129 if (encoding == NULL)
1130 goto error;
1131 }
1132 else if (encoding != NULL) {
1133 self->encoding = PyUnicode_FromString(encoding);
1134 if (self->encoding == NULL)
1135 goto error;
1136 }
1137 else {
1138 PyErr_SetString(PyExc_OSError,
1139 "could not determine default encoding");
1140 goto error;
1141 }
1142
1143 /* Check we have been asked for a real text encoding */
1144 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1145 if (codec_info == NULL) {
1146 Py_CLEAR(self->encoding);
1147 goto error;
1148 }
1149
1150 /* XXX: Failures beyond this point have the potential to leak elements
1151 * of the partially constructed object (like self->encoding)
1152 */
1153
1154 Py_INCREF(errors);
1155 self->errors = errors;
1156 self->chunk_size = 8192;
1157 self->line_buffering = line_buffering;
1158 self->write_through = write_through;
1159 if (set_newline(self, newline) < 0) {
1160 goto error;
1161 }
1162
1163 self->buffer = buffer;
1164 Py_INCREF(buffer);
1165
1166 /* Build the decoder object */
1167 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1168 goto error;
1169
1170 /* Build the encoder object */
1171 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1172 goto error;
1173
1174 /* Finished sorting out the codec details */
1175 Py_CLEAR(codec_info);
1176
1177 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1178 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1179 Py_TYPE(buffer) == &PyBufferedRandom_Type)
1180 {
1181 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1182 goto error;
1183 /* Cache the raw FileIO object to speed up 'closed' checks */
1184 if (raw != NULL) {
1185 if (Py_TYPE(raw) == &PyFileIO_Type)
1186 self->raw = raw;
1187 else
1188 Py_DECREF(raw);
1189 }
1190 }
1191
1192 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
1193 if (res == NULL)
1194 goto error;
1195 r = PyObject_IsTrue(res);
1196 Py_DECREF(res);
1197 if (r < 0)
1198 goto error;
1199 self->seekable = self->telling = r;
1200
1201 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1202 if (r < 0) {
1203 goto error;
1204 }
1205 Py_XDECREF(res);
1206 self->has_read1 = r;
1207
1208 self->encoding_start_of_stream = 0;
1209 if (_textiowrapper_fix_encoder_state(self) < 0) {
1210 goto error;
1211 }
1212
1213 self->ok = 1;
1214 return 0;
1215
1216 error:
1217 Py_XDECREF(codec_info);
1218 return -1;
1219 }
1220
1221 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1222 * -1 on error.
1223 */
1224 static int
convert_optional_bool(PyObject * obj,int default_value)1225 convert_optional_bool(PyObject *obj, int default_value)
1226 {
1227 long v;
1228 if (obj == Py_None) {
1229 v = default_value;
1230 }
1231 else {
1232 v = PyLong_AsLong(obj);
1233 if (v == -1 && PyErr_Occurred())
1234 return -1;
1235 }
1236 return v != 0;
1237 }
1238
1239 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1240 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1241 PyObject *errors, int newline_changed)
1242 {
1243 /* Use existing settings where new settings are not specified */
1244 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1245 return 0; // no change
1246 }
1247
1248 if (encoding == Py_None) {
1249 encoding = self->encoding;
1250 if (errors == Py_None) {
1251 errors = self->errors;
1252 }
1253 }
1254 else if (errors == Py_None) {
1255 errors = _PyUnicode_FromId(&PyId_strict);
1256 if (errors == NULL) {
1257 return -1;
1258 }
1259 }
1260
1261 const char *c_errors = PyUnicode_AsUTF8(errors);
1262 if (c_errors == NULL) {
1263 return -1;
1264 }
1265
1266 // Create new encoder & decoder
1267 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1268 PyUnicode_AsUTF8(encoding), "codecs.open()");
1269 if (codec_info == NULL) {
1270 return -1;
1271 }
1272 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1273 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1274 Py_DECREF(codec_info);
1275 return -1;
1276 }
1277 Py_DECREF(codec_info);
1278
1279 Py_INCREF(encoding);
1280 Py_INCREF(errors);
1281 Py_SETREF(self->encoding, encoding);
1282 Py_SETREF(self->errors, errors);
1283
1284 return _textiowrapper_fix_encoder_state(self);
1285 }
1286
1287 /*[clinic input]
1288 _io.TextIOWrapper.reconfigure
1289 *
1290 encoding: object = None
1291 errors: object = None
1292 newline as newline_obj: object(c_default="NULL") = None
1293 line_buffering as line_buffering_obj: object = None
1294 write_through as write_through_obj: object = None
1295
1296 Reconfigure the text stream with new parameters.
1297
1298 This also does an implicit stream flush.
1299
1300 [clinic start generated code]*/
1301
1302 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1303 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1304 PyObject *errors, PyObject *newline_obj,
1305 PyObject *line_buffering_obj,
1306 PyObject *write_through_obj)
1307 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1308 {
1309 int line_buffering;
1310 int write_through;
1311 const char *newline = NULL;
1312
1313 /* Check if something is in the read buffer */
1314 if (self->decoded_chars != NULL) {
1315 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1316 _unsupported("It is not possible to set the encoding or newline "
1317 "of stream after the first read");
1318 return NULL;
1319 }
1320 }
1321
1322 if (newline_obj != NULL && newline_obj != Py_None) {
1323 newline = PyUnicode_AsUTF8(newline_obj);
1324 if (newline == NULL || validate_newline(newline) < 0) {
1325 return NULL;
1326 }
1327 }
1328
1329 line_buffering = convert_optional_bool(line_buffering_obj,
1330 self->line_buffering);
1331 write_through = convert_optional_bool(write_through_obj,
1332 self->write_through);
1333 if (line_buffering < 0 || write_through < 0) {
1334 return NULL;
1335 }
1336
1337 PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1338 if (res == NULL) {
1339 return NULL;
1340 }
1341 Py_DECREF(res);
1342 self->b2cratio = 0;
1343
1344 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1345 return NULL;
1346 }
1347
1348 if (textiowrapper_change_encoding(
1349 self, encoding, errors, newline_obj != NULL) < 0) {
1350 return NULL;
1351 }
1352
1353 self->line_buffering = line_buffering;
1354 self->write_through = write_through;
1355 Py_RETURN_NONE;
1356 }
1357
1358 static int
textiowrapper_clear(textio * self)1359 textiowrapper_clear(textio *self)
1360 {
1361 self->ok = 0;
1362 Py_CLEAR(self->buffer);
1363 Py_CLEAR(self->encoding);
1364 Py_CLEAR(self->encoder);
1365 Py_CLEAR(self->decoder);
1366 Py_CLEAR(self->readnl);
1367 Py_CLEAR(self->decoded_chars);
1368 Py_CLEAR(self->pending_bytes);
1369 Py_CLEAR(self->snapshot);
1370 Py_CLEAR(self->errors);
1371 Py_CLEAR(self->raw);
1372
1373 Py_CLEAR(self->dict);
1374 return 0;
1375 }
1376
1377 static void
textiowrapper_dealloc(textio * self)1378 textiowrapper_dealloc(textio *self)
1379 {
1380 self->finalizing = 1;
1381 if (_PyIOBase_finalize((PyObject *) self) < 0)
1382 return;
1383 self->ok = 0;
1384 _PyObject_GC_UNTRACK(self);
1385 if (self->weakreflist != NULL)
1386 PyObject_ClearWeakRefs((PyObject *)self);
1387 textiowrapper_clear(self);
1388 Py_TYPE(self)->tp_free((PyObject *)self);
1389 }
1390
1391 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1392 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1393 {
1394 Py_VISIT(self->buffer);
1395 Py_VISIT(self->encoding);
1396 Py_VISIT(self->encoder);
1397 Py_VISIT(self->decoder);
1398 Py_VISIT(self->readnl);
1399 Py_VISIT(self->decoded_chars);
1400 Py_VISIT(self->pending_bytes);
1401 Py_VISIT(self->snapshot);
1402 Py_VISIT(self->errors);
1403 Py_VISIT(self->raw);
1404
1405 Py_VISIT(self->dict);
1406 return 0;
1407 }
1408
1409 static PyObject *
1410 textiowrapper_closed_get(textio *self, void *context);
1411
1412 /* This macro takes some shortcuts to make the common case faster. */
1413 #define CHECK_CLOSED(self) \
1414 do { \
1415 int r; \
1416 PyObject *_res; \
1417 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1418 if (self->raw != NULL) \
1419 r = _PyFileIO_closed(self->raw); \
1420 else { \
1421 _res = textiowrapper_closed_get(self, NULL); \
1422 if (_res == NULL) \
1423 return NULL; \
1424 r = PyObject_IsTrue(_res); \
1425 Py_DECREF(_res); \
1426 if (r < 0) \
1427 return NULL; \
1428 } \
1429 if (r > 0) { \
1430 PyErr_SetString(PyExc_ValueError, \
1431 "I/O operation on closed file."); \
1432 return NULL; \
1433 } \
1434 } \
1435 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1436 return NULL; \
1437 } while (0)
1438
1439 #define CHECK_INITIALIZED(self) \
1440 if (self->ok <= 0) { \
1441 PyErr_SetString(PyExc_ValueError, \
1442 "I/O operation on uninitialized object"); \
1443 return NULL; \
1444 }
1445
1446 #define CHECK_ATTACHED(self) \
1447 CHECK_INITIALIZED(self); \
1448 if (self->detached) { \
1449 PyErr_SetString(PyExc_ValueError, \
1450 "underlying buffer has been detached"); \
1451 return NULL; \
1452 }
1453
1454 #define CHECK_ATTACHED_INT(self) \
1455 if (self->ok <= 0) { \
1456 PyErr_SetString(PyExc_ValueError, \
1457 "I/O operation on uninitialized object"); \
1458 return -1; \
1459 } else if (self->detached) { \
1460 PyErr_SetString(PyExc_ValueError, \
1461 "underlying buffer has been detached"); \
1462 return -1; \
1463 }
1464
1465
1466 /*[clinic input]
1467 _io.TextIOWrapper.detach
1468 [clinic start generated code]*/
1469
1470 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1471 _io_TextIOWrapper_detach_impl(textio *self)
1472 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1473 {
1474 PyObject *buffer, *res;
1475 CHECK_ATTACHED(self);
1476 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1477 if (res == NULL)
1478 return NULL;
1479 Py_DECREF(res);
1480 buffer = self->buffer;
1481 self->buffer = NULL;
1482 self->detached = 1;
1483 return buffer;
1484 }
1485
1486 /* Flush the internal write buffer. This doesn't explicitly flush the
1487 underlying buffered object, though. */
1488 static int
_textiowrapper_writeflush(textio * self)1489 _textiowrapper_writeflush(textio *self)
1490 {
1491 PyObject *pending, *b, *ret;
1492
1493 if (self->pending_bytes == NULL)
1494 return 0;
1495
1496 pending = self->pending_bytes;
1497 Py_INCREF(pending);
1498 self->pending_bytes_count = 0;
1499 Py_CLEAR(self->pending_bytes);
1500
1501 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1502 Py_DECREF(pending);
1503 if (b == NULL)
1504 return -1;
1505 ret = NULL;
1506 do {
1507 ret = PyObject_CallMethodObjArgs(self->buffer,
1508 _PyIO_str_write, b, NULL);
1509 } while (ret == NULL && _PyIO_trap_eintr());
1510 Py_DECREF(b);
1511 if (ret == NULL)
1512 return -1;
1513 Py_DECREF(ret);
1514 return 0;
1515 }
1516
1517 /*[clinic input]
1518 _io.TextIOWrapper.write
1519 text: unicode
1520 /
1521 [clinic start generated code]*/
1522
1523 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1524 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1525 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1526 {
1527 PyObject *ret;
1528 PyObject *b;
1529 Py_ssize_t textlen;
1530 int haslf = 0;
1531 int needflush = 0, text_needflush = 0;
1532
1533 if (PyUnicode_READY(text) == -1)
1534 return NULL;
1535
1536 CHECK_ATTACHED(self);
1537 CHECK_CLOSED(self);
1538
1539 if (self->encoder == NULL)
1540 return _unsupported("not writable");
1541
1542 Py_INCREF(text);
1543
1544 textlen = PyUnicode_GET_LENGTH(text);
1545
1546 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1547 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1548 haslf = 1;
1549
1550 if (haslf && self->writetranslate && self->writenl != NULL) {
1551 PyObject *newtext = _PyObject_CallMethodId(
1552 text, &PyId_replace, "ss", "\n", self->writenl);
1553 Py_DECREF(text);
1554 if (newtext == NULL)
1555 return NULL;
1556 text = newtext;
1557 }
1558
1559 if (self->write_through)
1560 text_needflush = 1;
1561 if (self->line_buffering &&
1562 (haslf ||
1563 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1564 needflush = 1;
1565
1566 /* XXX What if we were just reading? */
1567 if (self->encodefunc != NULL) {
1568 b = (*self->encodefunc)((PyObject *) self, text);
1569 self->encoding_start_of_stream = 0;
1570 }
1571 else
1572 b = PyObject_CallMethodObjArgs(self->encoder,
1573 _PyIO_str_encode, text, NULL);
1574 Py_DECREF(text);
1575 if (b == NULL)
1576 return NULL;
1577 if (!PyBytes_Check(b)) {
1578 PyErr_Format(PyExc_TypeError,
1579 "encoder should return a bytes object, not '%.200s'",
1580 Py_TYPE(b)->tp_name);
1581 Py_DECREF(b);
1582 return NULL;
1583 }
1584
1585 if (self->pending_bytes == NULL) {
1586 self->pending_bytes = PyList_New(0);
1587 if (self->pending_bytes == NULL) {
1588 Py_DECREF(b);
1589 return NULL;
1590 }
1591 self->pending_bytes_count = 0;
1592 }
1593 if (PyList_Append(self->pending_bytes, b) < 0) {
1594 Py_DECREF(b);
1595 return NULL;
1596 }
1597 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1598 Py_DECREF(b);
1599 if (self->pending_bytes_count > self->chunk_size || needflush ||
1600 text_needflush) {
1601 if (_textiowrapper_writeflush(self) < 0)
1602 return NULL;
1603 }
1604
1605 if (needflush) {
1606 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1607 if (ret == NULL)
1608 return NULL;
1609 Py_DECREF(ret);
1610 }
1611
1612 textiowrapper_set_decoded_chars(self, NULL);
1613 Py_CLEAR(self->snapshot);
1614
1615 if (self->decoder) {
1616 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
1617 if (ret == NULL)
1618 return NULL;
1619 Py_DECREF(ret);
1620 }
1621
1622 return PyLong_FromSsize_t(textlen);
1623 }
1624
1625 /* Steal a reference to chars and store it in the decoded_char buffer;
1626 */
1627 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1628 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1629 {
1630 Py_XSETREF(self->decoded_chars, chars);
1631 self->decoded_chars_used = 0;
1632 }
1633
1634 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1635 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1636 {
1637 PyObject *chars;
1638 Py_ssize_t avail;
1639
1640 if (self->decoded_chars == NULL)
1641 return PyUnicode_FromStringAndSize(NULL, 0);
1642
1643 /* decoded_chars is guaranteed to be "ready". */
1644 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1645 - self->decoded_chars_used);
1646
1647 assert(avail >= 0);
1648
1649 if (n < 0 || n > avail)
1650 n = avail;
1651
1652 if (self->decoded_chars_used > 0 || n < avail) {
1653 chars = PyUnicode_Substring(self->decoded_chars,
1654 self->decoded_chars_used,
1655 self->decoded_chars_used + n);
1656 if (chars == NULL)
1657 return NULL;
1658 }
1659 else {
1660 chars = self->decoded_chars;
1661 Py_INCREF(chars);
1662 }
1663
1664 self->decoded_chars_used += n;
1665 return chars;
1666 }
1667
1668 /* Read and decode the next chunk of data from the BufferedReader.
1669 */
1670 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1671 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1672 {
1673 PyObject *dec_buffer = NULL;
1674 PyObject *dec_flags = NULL;
1675 PyObject *input_chunk = NULL;
1676 Py_buffer input_chunk_buf;
1677 PyObject *decoded_chars, *chunk_size;
1678 Py_ssize_t nbytes, nchars;
1679 int eof;
1680
1681 /* The return value is True unless EOF was reached. The decoded string is
1682 * placed in self._decoded_chars (replacing its previous value). The
1683 * entire input chunk is sent to the decoder, though some of it may remain
1684 * buffered in the decoder, yet to be converted.
1685 */
1686
1687 if (self->decoder == NULL) {
1688 _unsupported("not readable");
1689 return -1;
1690 }
1691
1692 if (self->telling) {
1693 /* To prepare for tell(), we need to snapshot a point in the file
1694 * where the decoder's input buffer is empty.
1695 */
1696
1697 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1698 _PyIO_str_getstate, NULL);
1699 if (state == NULL)
1700 return -1;
1701 /* Given this, we know there was a valid snapshot point
1702 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1703 */
1704 if (!PyTuple_Check(state)) {
1705 PyErr_SetString(PyExc_TypeError,
1706 "illegal decoder state");
1707 Py_DECREF(state);
1708 return -1;
1709 }
1710 if (!PyArg_ParseTuple(state,
1711 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1712 {
1713 Py_DECREF(state);
1714 return -1;
1715 }
1716
1717 if (!PyBytes_Check(dec_buffer)) {
1718 PyErr_Format(PyExc_TypeError,
1719 "illegal decoder state: the first item should be a "
1720 "bytes object, not '%.200s'",
1721 Py_TYPE(dec_buffer)->tp_name);
1722 Py_DECREF(state);
1723 return -1;
1724 }
1725 Py_INCREF(dec_buffer);
1726 Py_INCREF(dec_flags);
1727 Py_DECREF(state);
1728 }
1729
1730 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1731 if (size_hint > 0) {
1732 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1733 }
1734 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1735 if (chunk_size == NULL)
1736 goto fail;
1737
1738 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1739 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1740 chunk_size, NULL);
1741 Py_DECREF(chunk_size);
1742 if (input_chunk == NULL)
1743 goto fail;
1744
1745 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1746 PyErr_Format(PyExc_TypeError,
1747 "underlying %s() should have returned a bytes-like object, "
1748 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1749 Py_TYPE(input_chunk)->tp_name);
1750 goto fail;
1751 }
1752
1753 nbytes = input_chunk_buf.len;
1754 eof = (nbytes == 0);
1755
1756 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1757 PyBuffer_Release(&input_chunk_buf);
1758 if (decoded_chars == NULL)
1759 goto fail;
1760
1761 textiowrapper_set_decoded_chars(self, decoded_chars);
1762 nchars = PyUnicode_GET_LENGTH(decoded_chars);
1763 if (nchars > 0)
1764 self->b2cratio = (double) nbytes / nchars;
1765 else
1766 self->b2cratio = 0.0;
1767 if (nchars > 0)
1768 eof = 0;
1769
1770 if (self->telling) {
1771 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1772 * next input to be decoded is dec_buffer + input_chunk.
1773 */
1774 PyObject *next_input = dec_buffer;
1775 PyBytes_Concat(&next_input, input_chunk);
1776 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1777 if (next_input == NULL) {
1778 goto fail;
1779 }
1780 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1781 if (snapshot == NULL) {
1782 dec_flags = NULL;
1783 goto fail;
1784 }
1785 Py_XSETREF(self->snapshot, snapshot);
1786 }
1787 Py_DECREF(input_chunk);
1788
1789 return (eof == 0);
1790
1791 fail:
1792 Py_XDECREF(dec_buffer);
1793 Py_XDECREF(dec_flags);
1794 Py_XDECREF(input_chunk);
1795 return -1;
1796 }
1797
1798 /*[clinic input]
1799 _io.TextIOWrapper.read
1800 size as n: Py_ssize_t(accept={int, NoneType}) = -1
1801 /
1802 [clinic start generated code]*/
1803
1804 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1805 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1806 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1807 {
1808 PyObject *result = NULL, *chunks = NULL;
1809
1810 CHECK_ATTACHED(self);
1811 CHECK_CLOSED(self);
1812
1813 if (self->decoder == NULL)
1814 return _unsupported("not readable");
1815
1816 if (_textiowrapper_writeflush(self) < 0)
1817 return NULL;
1818
1819 if (n < 0) {
1820 /* Read everything */
1821 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
1822 PyObject *decoded;
1823 if (bytes == NULL)
1824 goto fail;
1825
1826 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1827 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1828 bytes, 1);
1829 else
1830 decoded = PyObject_CallMethodObjArgs(
1831 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1832 Py_DECREF(bytes);
1833 if (check_decoded(decoded) < 0)
1834 goto fail;
1835
1836 result = textiowrapper_get_decoded_chars(self, -1);
1837
1838 if (result == NULL) {
1839 Py_DECREF(decoded);
1840 return NULL;
1841 }
1842
1843 PyUnicode_AppendAndDel(&result, decoded);
1844 if (result == NULL)
1845 goto fail;
1846
1847 textiowrapper_set_decoded_chars(self, NULL);
1848 Py_CLEAR(self->snapshot);
1849 return result;
1850 }
1851 else {
1852 int res = 1;
1853 Py_ssize_t remaining = n;
1854
1855 result = textiowrapper_get_decoded_chars(self, n);
1856 if (result == NULL)
1857 goto fail;
1858 if (PyUnicode_READY(result) == -1)
1859 goto fail;
1860 remaining -= PyUnicode_GET_LENGTH(result);
1861
1862 /* Keep reading chunks until we have n characters to return */
1863 while (remaining > 0) {
1864 res = textiowrapper_read_chunk(self, remaining);
1865 if (res < 0) {
1866 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1867 when EINTR occurs so we needn't do it ourselves. */
1868 if (_PyIO_trap_eintr()) {
1869 continue;
1870 }
1871 goto fail;
1872 }
1873 if (res == 0) /* EOF */
1874 break;
1875 if (chunks == NULL) {
1876 chunks = PyList_New(0);
1877 if (chunks == NULL)
1878 goto fail;
1879 }
1880 if (PyUnicode_GET_LENGTH(result) > 0 &&
1881 PyList_Append(chunks, result) < 0)
1882 goto fail;
1883 Py_DECREF(result);
1884 result = textiowrapper_get_decoded_chars(self, remaining);
1885 if (result == NULL)
1886 goto fail;
1887 remaining -= PyUnicode_GET_LENGTH(result);
1888 }
1889 if (chunks != NULL) {
1890 if (result != NULL && PyList_Append(chunks, result) < 0)
1891 goto fail;
1892 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1893 if (result == NULL)
1894 goto fail;
1895 Py_CLEAR(chunks);
1896 }
1897 return result;
1898 }
1899 fail:
1900 Py_XDECREF(result);
1901 Py_XDECREF(chunks);
1902 return NULL;
1903 }
1904
1905
1906 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
1907 that is to the NUL character. Otherwise the function will produce
1908 incorrect results. */
1909 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)1910 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
1911 {
1912 if (kind == PyUnicode_1BYTE_KIND) {
1913 assert(ch < 256);
1914 return (char *) memchr((void *) s, (char) ch, end - s);
1915 }
1916 for (;;) {
1917 while (PyUnicode_READ(kind, s, 0) > ch)
1918 s += kind;
1919 if (PyUnicode_READ(kind, s, 0) == ch)
1920 return s;
1921 if (s == end)
1922 return NULL;
1923 s += kind;
1924 }
1925 }
1926
1927 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)1928 _PyIO_find_line_ending(
1929 int translated, int universal, PyObject *readnl,
1930 int kind, const char *start, const char *end, Py_ssize_t *consumed)
1931 {
1932 Py_ssize_t len = ((char*)end - (char*)start)/kind;
1933
1934 if (translated) {
1935 /* Newlines are already translated, only search for \n */
1936 const char *pos = find_control_char(kind, start, end, '\n');
1937 if (pos != NULL)
1938 return (pos - start)/kind + 1;
1939 else {
1940 *consumed = len;
1941 return -1;
1942 }
1943 }
1944 else if (universal) {
1945 /* Universal newline search. Find any of \r, \r\n, \n
1946 * The decoder ensures that \r\n are not split in two pieces
1947 */
1948 const char *s = start;
1949 for (;;) {
1950 Py_UCS4 ch;
1951 /* Fast path for non-control chars. The loop always ends
1952 since the Unicode string is NUL-terminated. */
1953 while (PyUnicode_READ(kind, s, 0) > '\r')
1954 s += kind;
1955 if (s >= end) {
1956 *consumed = len;
1957 return -1;
1958 }
1959 ch = PyUnicode_READ(kind, s, 0);
1960 s += kind;
1961 if (ch == '\n')
1962 return (s - start)/kind;
1963 if (ch == '\r') {
1964 if (PyUnicode_READ(kind, s, 0) == '\n')
1965 return (s - start)/kind + 1;
1966 else
1967 return (s - start)/kind;
1968 }
1969 }
1970 }
1971 else {
1972 /* Non-universal mode. */
1973 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1974 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
1975 /* Assume that readnl is an ASCII character. */
1976 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
1977 if (readnl_len == 1) {
1978 const char *pos = find_control_char(kind, start, end, nl[0]);
1979 if (pos != NULL)
1980 return (pos - start)/kind + 1;
1981 *consumed = len;
1982 return -1;
1983 }
1984 else {
1985 const char *s = start;
1986 const char *e = end - (readnl_len - 1)*kind;
1987 const char *pos;
1988 if (e < s)
1989 e = s;
1990 while (s < e) {
1991 Py_ssize_t i;
1992 const char *pos = find_control_char(kind, s, end, nl[0]);
1993 if (pos == NULL || pos >= e)
1994 break;
1995 for (i = 1; i < readnl_len; i++) {
1996 if (PyUnicode_READ(kind, pos, i) != nl[i])
1997 break;
1998 }
1999 if (i == readnl_len)
2000 return (pos - start)/kind + readnl_len;
2001 s = pos + kind;
2002 }
2003 pos = find_control_char(kind, e, end, nl[0]);
2004 if (pos == NULL)
2005 *consumed = len;
2006 else
2007 *consumed = (pos - start)/kind;
2008 return -1;
2009 }
2010 }
2011 }
2012
2013 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2014 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2015 {
2016 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2017 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2018 int res;
2019
2020 CHECK_CLOSED(self);
2021
2022 if (_textiowrapper_writeflush(self) < 0)
2023 return NULL;
2024
2025 chunked = 0;
2026
2027 while (1) {
2028 char *ptr;
2029 Py_ssize_t line_len;
2030 int kind;
2031 Py_ssize_t consumed = 0;
2032
2033 /* First, get some data if necessary */
2034 res = 1;
2035 while (!self->decoded_chars ||
2036 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2037 res = textiowrapper_read_chunk(self, 0);
2038 if (res < 0) {
2039 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2040 when EINTR occurs so we needn't do it ourselves. */
2041 if (_PyIO_trap_eintr()) {
2042 continue;
2043 }
2044 goto error;
2045 }
2046 if (res == 0)
2047 break;
2048 }
2049 if (res == 0) {
2050 /* end of file */
2051 textiowrapper_set_decoded_chars(self, NULL);
2052 Py_CLEAR(self->snapshot);
2053 start = endpos = offset_to_buffer = 0;
2054 break;
2055 }
2056
2057 if (remaining == NULL) {
2058 line = self->decoded_chars;
2059 start = self->decoded_chars_used;
2060 offset_to_buffer = 0;
2061 Py_INCREF(line);
2062 }
2063 else {
2064 assert(self->decoded_chars_used == 0);
2065 line = PyUnicode_Concat(remaining, self->decoded_chars);
2066 start = 0;
2067 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2068 Py_CLEAR(remaining);
2069 if (line == NULL)
2070 goto error;
2071 if (PyUnicode_READY(line) == -1)
2072 goto error;
2073 }
2074
2075 ptr = PyUnicode_DATA(line);
2076 line_len = PyUnicode_GET_LENGTH(line);
2077 kind = PyUnicode_KIND(line);
2078
2079 endpos = _PyIO_find_line_ending(
2080 self->readtranslate, self->readuniversal, self->readnl,
2081 kind,
2082 ptr + kind * start,
2083 ptr + kind * line_len,
2084 &consumed);
2085 if (endpos >= 0) {
2086 endpos += start;
2087 if (limit >= 0 && (endpos - start) + chunked >= limit)
2088 endpos = start + limit - chunked;
2089 break;
2090 }
2091
2092 /* We can put aside up to `endpos` */
2093 endpos = consumed + start;
2094 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2095 /* Didn't find line ending, but reached length limit */
2096 endpos = start + limit - chunked;
2097 break;
2098 }
2099
2100 if (endpos > start) {
2101 /* No line ending seen yet - put aside current data */
2102 PyObject *s;
2103 if (chunks == NULL) {
2104 chunks = PyList_New(0);
2105 if (chunks == NULL)
2106 goto error;
2107 }
2108 s = PyUnicode_Substring(line, start, endpos);
2109 if (s == NULL)
2110 goto error;
2111 if (PyList_Append(chunks, s) < 0) {
2112 Py_DECREF(s);
2113 goto error;
2114 }
2115 chunked += PyUnicode_GET_LENGTH(s);
2116 Py_DECREF(s);
2117 }
2118 /* There may be some remaining bytes we'll have to prepend to the
2119 next chunk of data */
2120 if (endpos < line_len) {
2121 remaining = PyUnicode_Substring(line, endpos, line_len);
2122 if (remaining == NULL)
2123 goto error;
2124 }
2125 Py_CLEAR(line);
2126 /* We have consumed the buffer */
2127 textiowrapper_set_decoded_chars(self, NULL);
2128 }
2129
2130 if (line != NULL) {
2131 /* Our line ends in the current buffer */
2132 self->decoded_chars_used = endpos - offset_to_buffer;
2133 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2134 PyObject *s = PyUnicode_Substring(line, start, endpos);
2135 Py_CLEAR(line);
2136 if (s == NULL)
2137 goto error;
2138 line = s;
2139 }
2140 }
2141 if (remaining != NULL) {
2142 if (chunks == NULL) {
2143 chunks = PyList_New(0);
2144 if (chunks == NULL)
2145 goto error;
2146 }
2147 if (PyList_Append(chunks, remaining) < 0)
2148 goto error;
2149 Py_CLEAR(remaining);
2150 }
2151 if (chunks != NULL) {
2152 if (line != NULL) {
2153 if (PyList_Append(chunks, line) < 0)
2154 goto error;
2155 Py_DECREF(line);
2156 }
2157 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2158 if (line == NULL)
2159 goto error;
2160 Py_CLEAR(chunks);
2161 }
2162 if (line == NULL) {
2163 Py_INCREF(_PyIO_empty_str);
2164 line = _PyIO_empty_str;
2165 }
2166
2167 return line;
2168
2169 error:
2170 Py_XDECREF(chunks);
2171 Py_XDECREF(remaining);
2172 Py_XDECREF(line);
2173 return NULL;
2174 }
2175
2176 /*[clinic input]
2177 _io.TextIOWrapper.readline
2178 size: Py_ssize_t = -1
2179 /
2180 [clinic start generated code]*/
2181
2182 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2183 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2184 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2185 {
2186 CHECK_ATTACHED(self);
2187 return _textiowrapper_readline(self, size);
2188 }
2189
2190 /* Seek and Tell */
2191
2192 typedef struct {
2193 Py_off_t start_pos;
2194 int dec_flags;
2195 int bytes_to_feed;
2196 int chars_to_skip;
2197 char need_eof;
2198 } cookie_type;
2199
2200 /*
2201 To speed up cookie packing/unpacking, we store the fields in a temporary
2202 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2203 The following macros define at which offsets in the intermediary byte
2204 string the various CookieStruct fields will be stored.
2205 */
2206
2207 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2208
2209 #if PY_BIG_ENDIAN
2210 /* We want the least significant byte of start_pos to also be the least
2211 significant byte of the cookie, which means that in big-endian mode we
2212 must copy the fields in reverse order. */
2213
2214 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2215 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2216 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2217 # define OFF_CHARS_TO_SKIP (sizeof(char))
2218 # define OFF_NEED_EOF 0
2219
2220 #else
2221 /* Little-endian mode: the least significant byte of start_pos will
2222 naturally end up the least significant byte of the cookie. */
2223
2224 # define OFF_START_POS 0
2225 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
2226 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2227 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2228 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2229
2230 #endif
2231
2232 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2233 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2234 {
2235 unsigned char buffer[COOKIE_BUF_LEN];
2236 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2237 if (cookieLong == NULL)
2238 return -1;
2239
2240 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2241 PY_LITTLE_ENDIAN, 0) < 0) {
2242 Py_DECREF(cookieLong);
2243 return -1;
2244 }
2245 Py_DECREF(cookieLong);
2246
2247 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2248 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2249 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2250 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2251 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2252
2253 return 0;
2254 }
2255
2256 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2257 textiowrapper_build_cookie(cookie_type *cookie)
2258 {
2259 unsigned char buffer[COOKIE_BUF_LEN];
2260
2261 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2262 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2263 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2264 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2265 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2266
2267 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2268 PY_LITTLE_ENDIAN, 0);
2269 }
2270
2271 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2272 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2273 {
2274 PyObject *res;
2275 /* When seeking to the start of the stream, we call decoder.reset()
2276 rather than decoder.getstate().
2277 This is for a few decoders such as utf-16 for which the state value
2278 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2279 utf-16, that we are expecting a BOM).
2280 */
2281 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2282 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2283 else
2284 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2285 "((yi))", "", cookie->dec_flags);
2286 if (res == NULL)
2287 return -1;
2288 Py_DECREF(res);
2289 return 0;
2290 }
2291
2292 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2293 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2294 {
2295 PyObject *res;
2296 if (start_of_stream) {
2297 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2298 self->encoding_start_of_stream = 1;
2299 }
2300 else {
2301 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2302 _PyLong_Zero, NULL);
2303 self->encoding_start_of_stream = 0;
2304 }
2305 if (res == NULL)
2306 return -1;
2307 Py_DECREF(res);
2308 return 0;
2309 }
2310
2311 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2312 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2313 {
2314 /* Same as _textiowrapper_decoder_setstate() above. */
2315 return _textiowrapper_encoder_reset(
2316 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2317 }
2318
2319 /*[clinic input]
2320 _io.TextIOWrapper.seek
2321 cookie as cookieObj: object
2322 whence: int = 0
2323 /
2324 [clinic start generated code]*/
2325
2326 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2327 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2328 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2329 {
2330 PyObject *posobj;
2331 cookie_type cookie;
2332 PyObject *res;
2333 int cmp;
2334 PyObject *snapshot;
2335
2336 CHECK_ATTACHED(self);
2337 CHECK_CLOSED(self);
2338
2339 Py_INCREF(cookieObj);
2340
2341 if (!self->seekable) {
2342 _unsupported("underlying stream is not seekable");
2343 goto fail;
2344 }
2345
2346 if (whence == 1) {
2347 /* seek relative to current position */
2348 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2349 if (cmp < 0)
2350 goto fail;
2351
2352 if (cmp == 0) {
2353 _unsupported("can't do nonzero cur-relative seeks");
2354 goto fail;
2355 }
2356
2357 /* Seeking to the current position should attempt to
2358 * sync the underlying buffer with the current position.
2359 */
2360 Py_DECREF(cookieObj);
2361 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
2362 if (cookieObj == NULL)
2363 goto fail;
2364 }
2365 else if (whence == 2) {
2366 /* seek relative to end of file */
2367 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2368 if (cmp < 0)
2369 goto fail;
2370
2371 if (cmp == 0) {
2372 _unsupported("can't do nonzero end-relative seeks");
2373 goto fail;
2374 }
2375
2376 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2377 if (res == NULL)
2378 goto fail;
2379 Py_DECREF(res);
2380
2381 textiowrapper_set_decoded_chars(self, NULL);
2382 Py_CLEAR(self->snapshot);
2383 if (self->decoder) {
2384 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
2385 if (res == NULL)
2386 goto fail;
2387 Py_DECREF(res);
2388 }
2389
2390 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2391 Py_CLEAR(cookieObj);
2392 if (res == NULL)
2393 goto fail;
2394 if (self->encoder) {
2395 /* If seek() == 0, we are at the start of stream, otherwise not */
2396 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
2397 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2398 Py_DECREF(res);
2399 goto fail;
2400 }
2401 }
2402 return res;
2403 }
2404 else if (whence != 0) {
2405 PyErr_Format(PyExc_ValueError,
2406 "invalid whence (%d, should be 0, 1 or 2)", whence);
2407 goto fail;
2408 }
2409
2410 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
2411 if (cmp < 0)
2412 goto fail;
2413
2414 if (cmp == 1) {
2415 PyErr_Format(PyExc_ValueError,
2416 "negative seek position %R", cookieObj);
2417 goto fail;
2418 }
2419
2420 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2421 if (res == NULL)
2422 goto fail;
2423 Py_DECREF(res);
2424
2425 /* The strategy of seek() is to go back to the safe start point
2426 * and replay the effect of read(chars_to_skip) from there.
2427 */
2428 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2429 goto fail;
2430
2431 /* Seek back to the safe start point. */
2432 posobj = PyLong_FromOff_t(cookie.start_pos);
2433 if (posobj == NULL)
2434 goto fail;
2435 res = PyObject_CallMethodObjArgs(self->buffer,
2436 _PyIO_str_seek, posobj, NULL);
2437 Py_DECREF(posobj);
2438 if (res == NULL)
2439 goto fail;
2440 Py_DECREF(res);
2441
2442 textiowrapper_set_decoded_chars(self, NULL);
2443 Py_CLEAR(self->snapshot);
2444
2445 /* Restore the decoder to its state from the safe start point. */
2446 if (self->decoder) {
2447 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2448 goto fail;
2449 }
2450
2451 if (cookie.chars_to_skip) {
2452 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2453 PyObject *input_chunk = _PyObject_CallMethodId(
2454 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2455 PyObject *decoded;
2456
2457 if (input_chunk == NULL)
2458 goto fail;
2459
2460 if (!PyBytes_Check(input_chunk)) {
2461 PyErr_Format(PyExc_TypeError,
2462 "underlying read() should have returned a bytes "
2463 "object, not '%.200s'",
2464 Py_TYPE(input_chunk)->tp_name);
2465 Py_DECREF(input_chunk);
2466 goto fail;
2467 }
2468
2469 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2470 if (snapshot == NULL) {
2471 goto fail;
2472 }
2473 Py_XSETREF(self->snapshot, snapshot);
2474
2475 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2476 "Oi", input_chunk, (int)cookie.need_eof);
2477
2478 if (check_decoded(decoded) < 0)
2479 goto fail;
2480
2481 textiowrapper_set_decoded_chars(self, decoded);
2482
2483 /* Skip chars_to_skip of the decoded characters. */
2484 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2485 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2486 goto fail;
2487 }
2488 self->decoded_chars_used = cookie.chars_to_skip;
2489 }
2490 else {
2491 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2492 if (snapshot == NULL)
2493 goto fail;
2494 Py_XSETREF(self->snapshot, snapshot);
2495 }
2496
2497 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2498 if (self->encoder) {
2499 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2500 goto fail;
2501 }
2502 return cookieObj;
2503 fail:
2504 Py_XDECREF(cookieObj);
2505 return NULL;
2506
2507 }
2508
2509 /*[clinic input]
2510 _io.TextIOWrapper.tell
2511 [clinic start generated code]*/
2512
2513 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2514 _io_TextIOWrapper_tell_impl(textio *self)
2515 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2516 {
2517 PyObject *res;
2518 PyObject *posobj = NULL;
2519 cookie_type cookie = {0,0,0,0,0};
2520 PyObject *next_input;
2521 Py_ssize_t chars_to_skip, chars_decoded;
2522 Py_ssize_t skip_bytes, skip_back;
2523 PyObject *saved_state = NULL;
2524 char *input, *input_end;
2525 Py_ssize_t dec_buffer_len;
2526 int dec_flags;
2527
2528 CHECK_ATTACHED(self);
2529 CHECK_CLOSED(self);
2530
2531 if (!self->seekable) {
2532 _unsupported("underlying stream is not seekable");
2533 goto fail;
2534 }
2535 if (!self->telling) {
2536 PyErr_SetString(PyExc_OSError,
2537 "telling position disabled by next() call");
2538 goto fail;
2539 }
2540
2541 if (_textiowrapper_writeflush(self) < 0)
2542 return NULL;
2543 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2544 if (res == NULL)
2545 goto fail;
2546 Py_DECREF(res);
2547
2548 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
2549 if (posobj == NULL)
2550 goto fail;
2551
2552 if (self->decoder == NULL || self->snapshot == NULL) {
2553 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2554 return posobj;
2555 }
2556
2557 #if defined(HAVE_LARGEFILE_SUPPORT)
2558 cookie.start_pos = PyLong_AsLongLong(posobj);
2559 #else
2560 cookie.start_pos = PyLong_AsLong(posobj);
2561 #endif
2562 Py_DECREF(posobj);
2563 if (PyErr_Occurred())
2564 goto fail;
2565
2566 /* Skip backward to the snapshot point (see _read_chunk). */
2567 assert(PyTuple_Check(self->snapshot));
2568 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2569 goto fail;
2570
2571 assert (PyBytes_Check(next_input));
2572
2573 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2574
2575 /* How many decoded characters have been used up since the snapshot? */
2576 if (self->decoded_chars_used == 0) {
2577 /* We haven't moved from the snapshot point. */
2578 return textiowrapper_build_cookie(&cookie);
2579 }
2580
2581 chars_to_skip = self->decoded_chars_used;
2582
2583 /* Decoder state will be restored at the end */
2584 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2585 _PyIO_str_getstate, NULL);
2586 if (saved_state == NULL)
2587 goto fail;
2588
2589 #define DECODER_GETSTATE() do { \
2590 PyObject *dec_buffer; \
2591 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2592 _PyIO_str_getstate, NULL); \
2593 if (_state == NULL) \
2594 goto fail; \
2595 if (!PyTuple_Check(_state)) { \
2596 PyErr_SetString(PyExc_TypeError, \
2597 "illegal decoder state"); \
2598 Py_DECREF(_state); \
2599 goto fail; \
2600 } \
2601 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2602 &dec_buffer, &dec_flags)) \
2603 { \
2604 Py_DECREF(_state); \
2605 goto fail; \
2606 } \
2607 if (!PyBytes_Check(dec_buffer)) { \
2608 PyErr_Format(PyExc_TypeError, \
2609 "illegal decoder state: the first item should be a " \
2610 "bytes object, not '%.200s'", \
2611 Py_TYPE(dec_buffer)->tp_name); \
2612 Py_DECREF(_state); \
2613 goto fail; \
2614 } \
2615 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2616 Py_DECREF(_state); \
2617 } while (0)
2618
2619 #define DECODER_DECODE(start, len, res) do { \
2620 PyObject *_decoded = _PyObject_CallMethodId( \
2621 self->decoder, &PyId_decode, "y#", start, len); \
2622 if (check_decoded(_decoded) < 0) \
2623 goto fail; \
2624 res = PyUnicode_GET_LENGTH(_decoded); \
2625 Py_DECREF(_decoded); \
2626 } while (0)
2627
2628 /* Fast search for an acceptable start point, close to our
2629 current pos */
2630 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2631 skip_back = 1;
2632 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2633 input = PyBytes_AS_STRING(next_input);
2634 while (skip_bytes > 0) {
2635 /* Decode up to temptative start point */
2636 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2637 goto fail;
2638 DECODER_DECODE(input, skip_bytes, chars_decoded);
2639 if (chars_decoded <= chars_to_skip) {
2640 DECODER_GETSTATE();
2641 if (dec_buffer_len == 0) {
2642 /* Before pos and no bytes buffered in decoder => OK */
2643 cookie.dec_flags = dec_flags;
2644 chars_to_skip -= chars_decoded;
2645 break;
2646 }
2647 /* Skip back by buffered amount and reset heuristic */
2648 skip_bytes -= dec_buffer_len;
2649 skip_back = 1;
2650 }
2651 else {
2652 /* We're too far ahead, skip back a bit */
2653 skip_bytes -= skip_back;
2654 skip_back *= 2;
2655 }
2656 }
2657 if (skip_bytes <= 0) {
2658 skip_bytes = 0;
2659 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2660 goto fail;
2661 }
2662
2663 /* Note our initial start point. */
2664 cookie.start_pos += skip_bytes;
2665 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2666 if (chars_to_skip == 0)
2667 goto finally;
2668
2669 /* We should be close to the desired position. Now feed the decoder one
2670 * byte at a time until we reach the `chars_to_skip` target.
2671 * As we go, note the nearest "safe start point" before the current
2672 * location (a point where the decoder has nothing buffered, so seek()
2673 * can safely start from there and advance to this location).
2674 */
2675 chars_decoded = 0;
2676 input = PyBytes_AS_STRING(next_input);
2677 input_end = input + PyBytes_GET_SIZE(next_input);
2678 input += skip_bytes;
2679 while (input < input_end) {
2680 Py_ssize_t n;
2681
2682 DECODER_DECODE(input, (Py_ssize_t)1, n);
2683 /* We got n chars for 1 byte */
2684 chars_decoded += n;
2685 cookie.bytes_to_feed += 1;
2686 DECODER_GETSTATE();
2687
2688 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2689 /* Decoder buffer is empty, so this is a safe start point. */
2690 cookie.start_pos += cookie.bytes_to_feed;
2691 chars_to_skip -= chars_decoded;
2692 cookie.dec_flags = dec_flags;
2693 cookie.bytes_to_feed = 0;
2694 chars_decoded = 0;
2695 }
2696 if (chars_decoded >= chars_to_skip)
2697 break;
2698 input++;
2699 }
2700 if (input == input_end) {
2701 /* We didn't get enough decoded data; signal EOF to get more. */
2702 PyObject *decoded = _PyObject_CallMethodId(
2703 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
2704 if (check_decoded(decoded) < 0)
2705 goto fail;
2706 chars_decoded += PyUnicode_GET_LENGTH(decoded);
2707 Py_DECREF(decoded);
2708 cookie.need_eof = 1;
2709
2710 if (chars_decoded < chars_to_skip) {
2711 PyErr_SetString(PyExc_OSError,
2712 "can't reconstruct logical file position");
2713 goto fail;
2714 }
2715 }
2716
2717 finally:
2718 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2719 Py_DECREF(saved_state);
2720 if (res == NULL)
2721 return NULL;
2722 Py_DECREF(res);
2723
2724 /* The returned cookie corresponds to the last safe start point. */
2725 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2726 return textiowrapper_build_cookie(&cookie);
2727
2728 fail:
2729 if (saved_state) {
2730 PyObject *type, *value, *traceback;
2731 PyErr_Fetch(&type, &value, &traceback);
2732 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2733 _PyErr_ChainExceptions(type, value, traceback);
2734 Py_DECREF(saved_state);
2735 Py_XDECREF(res);
2736 }
2737 return NULL;
2738 }
2739
2740 /*[clinic input]
2741 _io.TextIOWrapper.truncate
2742 pos: object = None
2743 /
2744 [clinic start generated code]*/
2745
2746 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2747 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2748 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2749 {
2750 PyObject *res;
2751
2752 CHECK_ATTACHED(self)
2753
2754 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2755 if (res == NULL)
2756 return NULL;
2757 Py_DECREF(res);
2758
2759 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2760 }
2761
2762 static PyObject *
textiowrapper_repr(textio * self)2763 textiowrapper_repr(textio *self)
2764 {
2765 PyObject *nameobj, *modeobj, *res, *s;
2766 int status;
2767
2768 CHECK_INITIALIZED(self);
2769
2770 res = PyUnicode_FromString("<_io.TextIOWrapper");
2771 if (res == NULL)
2772 return NULL;
2773
2774 status = Py_ReprEnter((PyObject *)self);
2775 if (status != 0) {
2776 if (status > 0) {
2777 PyErr_Format(PyExc_RuntimeError,
2778 "reentrant call inside %s.__repr__",
2779 Py_TYPE(self)->tp_name);
2780 }
2781 goto error;
2782 }
2783 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
2784 if (nameobj == NULL) {
2785 if (PyErr_ExceptionMatches(PyExc_Exception))
2786 PyErr_Clear();
2787 else
2788 goto error;
2789 }
2790 else {
2791 s = PyUnicode_FromFormat(" name=%R", nameobj);
2792 Py_DECREF(nameobj);
2793 if (s == NULL)
2794 goto error;
2795 PyUnicode_AppendAndDel(&res, s);
2796 if (res == NULL)
2797 goto error;
2798 }
2799 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
2800 if (modeobj == NULL) {
2801 if (PyErr_ExceptionMatches(PyExc_Exception))
2802 PyErr_Clear();
2803 else
2804 goto error;
2805 }
2806 else {
2807 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2808 Py_DECREF(modeobj);
2809 if (s == NULL)
2810 goto error;
2811 PyUnicode_AppendAndDel(&res, s);
2812 if (res == NULL)
2813 goto error;
2814 }
2815 s = PyUnicode_FromFormat("%U encoding=%R>",
2816 res, self->encoding);
2817 Py_DECREF(res);
2818 if (status == 0) {
2819 Py_ReprLeave((PyObject *)self);
2820 }
2821 return s;
2822
2823 error:
2824 Py_XDECREF(res);
2825 if (status == 0) {
2826 Py_ReprLeave((PyObject *)self);
2827 }
2828 return NULL;
2829 }
2830
2831
2832 /* Inquiries */
2833
2834 /*[clinic input]
2835 _io.TextIOWrapper.fileno
2836 [clinic start generated code]*/
2837
2838 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2839 _io_TextIOWrapper_fileno_impl(textio *self)
2840 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2841 {
2842 CHECK_ATTACHED(self);
2843 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
2844 }
2845
2846 /*[clinic input]
2847 _io.TextIOWrapper.seekable
2848 [clinic start generated code]*/
2849
2850 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2851 _io_TextIOWrapper_seekable_impl(textio *self)
2852 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2853 {
2854 CHECK_ATTACHED(self);
2855 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
2856 }
2857
2858 /*[clinic input]
2859 _io.TextIOWrapper.readable
2860 [clinic start generated code]*/
2861
2862 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2863 _io_TextIOWrapper_readable_impl(textio *self)
2864 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2865 {
2866 CHECK_ATTACHED(self);
2867 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
2868 }
2869
2870 /*[clinic input]
2871 _io.TextIOWrapper.writable
2872 [clinic start generated code]*/
2873
2874 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)2875 _io_TextIOWrapper_writable_impl(textio *self)
2876 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2877 {
2878 CHECK_ATTACHED(self);
2879 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
2880 }
2881
2882 /*[clinic input]
2883 _io.TextIOWrapper.isatty
2884 [clinic start generated code]*/
2885
2886 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)2887 _io_TextIOWrapper_isatty_impl(textio *self)
2888 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
2889 {
2890 CHECK_ATTACHED(self);
2891 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
2892 }
2893
2894 static PyObject *
textiowrapper_getstate(textio * self,PyObject * args)2895 textiowrapper_getstate(textio *self, PyObject *args)
2896 {
2897 PyErr_Format(PyExc_TypeError,
2898 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2899 return NULL;
2900 }
2901
2902 /*[clinic input]
2903 _io.TextIOWrapper.flush
2904 [clinic start generated code]*/
2905
2906 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)2907 _io_TextIOWrapper_flush_impl(textio *self)
2908 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
2909 {
2910 CHECK_ATTACHED(self);
2911 CHECK_CLOSED(self);
2912 self->telling = self->seekable;
2913 if (_textiowrapper_writeflush(self) < 0)
2914 return NULL;
2915 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
2916 }
2917
2918 /*[clinic input]
2919 _io.TextIOWrapper.close
2920 [clinic start generated code]*/
2921
2922 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)2923 _io_TextIOWrapper_close_impl(textio *self)
2924 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
2925 {
2926 PyObject *res;
2927 int r;
2928 CHECK_ATTACHED(self);
2929
2930 res = textiowrapper_closed_get(self, NULL);
2931 if (res == NULL)
2932 return NULL;
2933 r = PyObject_IsTrue(res);
2934 Py_DECREF(res);
2935 if (r < 0)
2936 return NULL;
2937
2938 if (r > 0) {
2939 Py_RETURN_NONE; /* stream already closed */
2940 }
2941 else {
2942 PyObject *exc = NULL, *val, *tb;
2943 if (self->finalizing) {
2944 res = _PyObject_CallMethodIdObjArgs(self->buffer,
2945 &PyId__dealloc_warn,
2946 self, NULL);
2947 if (res)
2948 Py_DECREF(res);
2949 else
2950 PyErr_Clear();
2951 }
2952 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2953 if (res == NULL)
2954 PyErr_Fetch(&exc, &val, &tb);
2955 else
2956 Py_DECREF(res);
2957
2958 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2959 if (exc != NULL) {
2960 _PyErr_ChainExceptions(exc, val, tb);
2961 Py_CLEAR(res);
2962 }
2963 return res;
2964 }
2965 }
2966
2967 static PyObject *
textiowrapper_iternext(textio * self)2968 textiowrapper_iternext(textio *self)
2969 {
2970 PyObject *line;
2971
2972 CHECK_ATTACHED(self);
2973
2974 self->telling = 0;
2975 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2976 /* Skip method call overhead for speed */
2977 line = _textiowrapper_readline(self, -1);
2978 }
2979 else {
2980 line = PyObject_CallMethodObjArgs((PyObject *)self,
2981 _PyIO_str_readline, NULL);
2982 if (line && !PyUnicode_Check(line)) {
2983 PyErr_Format(PyExc_OSError,
2984 "readline() should have returned a str object, "
2985 "not '%.200s'", Py_TYPE(line)->tp_name);
2986 Py_DECREF(line);
2987 return NULL;
2988 }
2989 }
2990
2991 if (line == NULL || PyUnicode_READY(line) == -1)
2992 return NULL;
2993
2994 if (PyUnicode_GET_LENGTH(line) == 0) {
2995 /* Reached EOF or would have blocked */
2996 Py_DECREF(line);
2997 Py_CLEAR(self->snapshot);
2998 self->telling = self->seekable;
2999 return NULL;
3000 }
3001
3002 return line;
3003 }
3004
3005 static PyObject *
textiowrapper_name_get(textio * self,void * context)3006 textiowrapper_name_get(textio *self, void *context)
3007 {
3008 CHECK_ATTACHED(self);
3009 return _PyObject_GetAttrId(self->buffer, &PyId_name);
3010 }
3011
3012 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3013 textiowrapper_closed_get(textio *self, void *context)
3014 {
3015 CHECK_ATTACHED(self);
3016 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3017 }
3018
3019 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3020 textiowrapper_newlines_get(textio *self, void *context)
3021 {
3022 PyObject *res;
3023 CHECK_ATTACHED(self);
3024 if (self->decoder == NULL ||
3025 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3026 {
3027 Py_RETURN_NONE;
3028 }
3029 return res;
3030 }
3031
3032 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3033 textiowrapper_errors_get(textio *self, void *context)
3034 {
3035 CHECK_INITIALIZED(self);
3036 Py_INCREF(self->errors);
3037 return self->errors;
3038 }
3039
3040 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3041 textiowrapper_chunk_size_get(textio *self, void *context)
3042 {
3043 CHECK_ATTACHED(self);
3044 return PyLong_FromSsize_t(self->chunk_size);
3045 }
3046
3047 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3048 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3049 {
3050 Py_ssize_t n;
3051 CHECK_ATTACHED_INT(self);
3052 if (arg == NULL) {
3053 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3054 return -1;
3055 }
3056 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3057 if (n == -1 && PyErr_Occurred())
3058 return -1;
3059 if (n <= 0) {
3060 PyErr_SetString(PyExc_ValueError,
3061 "a strictly positive integer is required");
3062 return -1;
3063 }
3064 self->chunk_size = n;
3065 return 0;
3066 }
3067
3068 #include "clinic/textio.c.h"
3069
3070 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3071 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3072 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3073 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3074 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3075 {NULL}
3076 };
3077
3078 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3079 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3080 {NULL}
3081 };
3082
3083 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3084 PyVarObject_HEAD_INIT(NULL, 0)
3085 "_io.IncrementalNewlineDecoder", /*tp_name*/
3086 sizeof(nldecoder_object), /*tp_basicsize*/
3087 0, /*tp_itemsize*/
3088 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3089 0, /*tp_print*/
3090 0, /*tp_getattr*/
3091 0, /*tp_setattr*/
3092 0, /*tp_compare */
3093 0, /*tp_repr*/
3094 0, /*tp_as_number*/
3095 0, /*tp_as_sequence*/
3096 0, /*tp_as_mapping*/
3097 0, /*tp_hash */
3098 0, /*tp_call*/
3099 0, /*tp_str*/
3100 0, /*tp_getattro*/
3101 0, /*tp_setattro*/
3102 0, /*tp_as_buffer*/
3103 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3104 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3105 0, /* tp_traverse */
3106 0, /* tp_clear */
3107 0, /* tp_richcompare */
3108 0, /*tp_weaklistoffset*/
3109 0, /* tp_iter */
3110 0, /* tp_iternext */
3111 incrementalnewlinedecoder_methods, /* tp_methods */
3112 0, /* tp_members */
3113 incrementalnewlinedecoder_getset, /* tp_getset */
3114 0, /* tp_base */
3115 0, /* tp_dict */
3116 0, /* tp_descr_get */
3117 0, /* tp_descr_set */
3118 0, /* tp_dictoffset */
3119 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3120 0, /* tp_alloc */
3121 PyType_GenericNew, /* tp_new */
3122 };
3123
3124
3125 static PyMethodDef textiowrapper_methods[] = {
3126 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3127 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3128 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3129 _IO_TEXTIOWRAPPER_READ_METHODDEF
3130 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3131 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3132 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3133
3134 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3135 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3136 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3137 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3138 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3139 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
3140
3141 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3142 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3143 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3144 {NULL, NULL}
3145 };
3146
3147 static PyMemberDef textiowrapper_members[] = {
3148 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3149 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3150 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3151 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3152 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3153 {NULL}
3154 };
3155
3156 static PyGetSetDef textiowrapper_getset[] = {
3157 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3158 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3159 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3160 */
3161 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3162 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3163 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3164 (setter)textiowrapper_chunk_size_set, NULL},
3165 {NULL}
3166 };
3167
3168 PyTypeObject PyTextIOWrapper_Type = {
3169 PyVarObject_HEAD_INIT(NULL, 0)
3170 "_io.TextIOWrapper", /*tp_name*/
3171 sizeof(textio), /*tp_basicsize*/
3172 0, /*tp_itemsize*/
3173 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3174 0, /*tp_print*/
3175 0, /*tp_getattr*/
3176 0, /*tps_etattr*/
3177 0, /*tp_compare */
3178 (reprfunc)textiowrapper_repr,/*tp_repr*/
3179 0, /*tp_as_number*/
3180 0, /*tp_as_sequence*/
3181 0, /*tp_as_mapping*/
3182 0, /*tp_hash */
3183 0, /*tp_call*/
3184 0, /*tp_str*/
3185 0, /*tp_getattro*/
3186 0, /*tp_setattro*/
3187 0, /*tp_as_buffer*/
3188 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3189 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
3190 _io_TextIOWrapper___init____doc__, /* tp_doc */
3191 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3192 (inquiry)textiowrapper_clear, /* tp_clear */
3193 0, /* tp_richcompare */
3194 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3195 0, /* tp_iter */
3196 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3197 textiowrapper_methods, /* tp_methods */
3198 textiowrapper_members, /* tp_members */
3199 textiowrapper_getset, /* tp_getset */
3200 0, /* tp_base */
3201 0, /* tp_dict */
3202 0, /* tp_descr_get */
3203 0, /* tp_descr_set */
3204 offsetof(textio, dict), /*tp_dictoffset*/
3205 _io_TextIOWrapper___init__, /* tp_init */
3206 0, /* tp_alloc */
3207 PyType_GenericNew, /* tp_new */
3208 0, /* tp_free */
3209 0, /* tp_is_gc */
3210 0, /* tp_bases */
3211 0, /* tp_mro */
3212 0, /* tp_cache */
3213 0, /* tp_subclasses */
3214 0, /* tp_weaklist */
3215 0, /* tp_del */
3216 0, /* tp_version_tag */
3217 0, /* tp_finalize */
3218 };
3219