1 /*
2  * multibytecodec.c: Common Multibyte Codec Implementation
3  *
4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
5  */
6 
7 #define PY_SSIZE_T_CLEAN
8 #include "Python.h"
9 #include "structmember.h"         // PyMemberDef
10 #include "multibytecodec.h"
11 #include "clinic/multibytecodec.c.h"
12 
13 /*[clinic input]
14 module _multibytecodec
15 class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "&MultibyteCodec_Type"
16 [clinic start generated code]*/
17 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=6ad689546cbb5450]*/
18 
19 typedef struct {
20     PyObject            *inobj;
21     Py_ssize_t          inpos, inlen;
22     unsigned char       *outbuf, *outbuf_end;
23     PyObject            *excobj, *outobj;
24 } MultibyteEncodeBuffer;
25 
26 typedef struct {
27     const unsigned char *inbuf, *inbuf_top, *inbuf_end;
28     PyObject            *excobj;
29     _PyUnicodeWriter    writer;
30 } MultibyteDecodeBuffer;
31 
32 static char *incnewkwarglist[] = {"errors", NULL};
33 static char *streamkwarglist[] = {"stream", "errors", NULL};
34 
35 static PyObject *multibytecodec_encode(MultibyteCodec *,
36                 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
37                 PyObject *, int);
38 
39 #define MBENC_RESET     MBENC_MAX<<1 /* reset after an encoding session */
40 
41 _Py_IDENTIFIER(write);
42 
43 static PyObject *
make_tuple(PyObject * object,Py_ssize_t len)44 make_tuple(PyObject *object, Py_ssize_t len)
45 {
46     PyObject *v, *w;
47 
48     if (object == NULL)
49         return NULL;
50 
51     v = PyTuple_New(2);
52     if (v == NULL) {
53         Py_DECREF(object);
54         return NULL;
55     }
56     PyTuple_SET_ITEM(v, 0, object);
57 
58     w = PyLong_FromSsize_t(len);
59     if (w == NULL) {
60         Py_DECREF(v);
61         return NULL;
62     }
63     PyTuple_SET_ITEM(v, 1, w);
64 
65     return v;
66 }
67 
68 static PyObject *
internal_error_callback(const char * errors)69 internal_error_callback(const char *errors)
70 {
71     if (errors == NULL || strcmp(errors, "strict") == 0)
72         return ERROR_STRICT;
73     else if (strcmp(errors, "ignore") == 0)
74         return ERROR_IGNORE;
75     else if (strcmp(errors, "replace") == 0)
76         return ERROR_REPLACE;
77     else
78         return PyUnicode_FromString(errors);
79 }
80 
81 static PyObject *
call_error_callback(PyObject * errors,PyObject * exc)82 call_error_callback(PyObject *errors, PyObject *exc)
83 {
84     PyObject *cb, *r;
85     const char *str;
86 
87     assert(PyUnicode_Check(errors));
88     str = PyUnicode_AsUTF8(errors);
89     if (str == NULL)
90         return NULL;
91     cb = PyCodec_LookupError(str);
92     if (cb == NULL)
93         return NULL;
94 
95     r = PyObject_CallOneArg(cb, exc);
96     Py_DECREF(cb);
97     return r;
98 }
99 
100 static PyObject *
codecctx_errors_get(MultibyteStatefulCodecContext * self,void * Py_UNUSED (ignored))101 codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
102 {
103     const char *errors;
104 
105     if (self->errors == ERROR_STRICT)
106         errors = "strict";
107     else if (self->errors == ERROR_IGNORE)
108         errors = "ignore";
109     else if (self->errors == ERROR_REPLACE)
110         errors = "replace";
111     else {
112         Py_INCREF(self->errors);
113         return self->errors;
114     }
115 
116     return PyUnicode_FromString(errors);
117 }
118 
119 static int
codecctx_errors_set(MultibyteStatefulCodecContext * self,PyObject * value,void * closure)120 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
121                     void *closure)
122 {
123     PyObject *cb;
124     const char *str;
125 
126     if (value == NULL) {
127         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
128         return -1;
129     }
130     if (!PyUnicode_Check(value)) {
131         PyErr_SetString(PyExc_TypeError, "errors must be a string");
132         return -1;
133     }
134 
135     str = PyUnicode_AsUTF8(value);
136     if (str == NULL)
137         return -1;
138 
139     cb = internal_error_callback(str);
140     if (cb == NULL)
141         return -1;
142 
143     ERROR_DECREF(self->errors);
144     self->errors = cb;
145     return 0;
146 }
147 
148 /* This getset handlers list is used by all the stateful codec objects */
149 static PyGetSetDef codecctx_getsets[] = {
150     {"errors",          (getter)codecctx_errors_get,
151                     (setter)codecctx_errors_set,
152                     PyDoc_STR("how to treat errors")},
153     {NULL,}
154 };
155 
156 static int
expand_encodebuffer(MultibyteEncodeBuffer * buf,Py_ssize_t esize)157 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
158 {
159     Py_ssize_t orgpos, orgsize, incsize;
160 
161     orgpos = (Py_ssize_t)((char *)buf->outbuf -
162                             PyBytes_AS_STRING(buf->outobj));
163     orgsize = PyBytes_GET_SIZE(buf->outobj);
164     incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
165 
166     if (orgsize > PY_SSIZE_T_MAX - incsize) {
167         PyErr_NoMemory();
168         return -1;
169     }
170 
171     if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
172         return -1;
173 
174     buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
175     buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
176         + PyBytes_GET_SIZE(buf->outobj);
177 
178     return 0;
179 }
180 #define REQUIRE_ENCODEBUFFER(buf, s) do {                               \
181     if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf)             \
182         if (expand_encodebuffer(buf, s) == -1)                          \
183             goto errorexit;                                             \
184 } while(0)
185 
186 
187 /**
188  * MultibyteCodec object
189  */
190 
191 static int
multibytecodec_encerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteEncodeBuffer * buf,PyObject * errors,Py_ssize_t e)192 multibytecodec_encerror(MultibyteCodec *codec,
193                         MultibyteCodec_State *state,
194                         MultibyteEncodeBuffer *buf,
195                         PyObject *errors, Py_ssize_t e)
196 {
197     PyObject *retobj = NULL, *retstr = NULL, *tobj;
198     Py_ssize_t retstrsize, newpos;
199     Py_ssize_t esize, start, end;
200     const char *reason;
201 
202     if (e > 0) {
203         reason = "illegal multibyte sequence";
204         esize = e;
205     }
206     else {
207         switch (e) {
208         case MBERR_TOOSMALL:
209             REQUIRE_ENCODEBUFFER(buf, -1);
210             return 0; /* retry it */
211         case MBERR_TOOFEW:
212             reason = "incomplete multibyte sequence";
213             esize = (Py_ssize_t)buf->inpos;
214             break;
215         case MBERR_INTERNAL:
216             PyErr_SetString(PyExc_RuntimeError,
217                             "internal codec error");
218             return -1;
219         default:
220             PyErr_SetString(PyExc_RuntimeError,
221                             "unknown runtime error");
222             return -1;
223         }
224     }
225 
226     if (errors == ERROR_REPLACE) {
227         PyObject *replchar;
228         Py_ssize_t r;
229         Py_ssize_t inpos;
230         int kind;
231         const void *data;
232 
233         replchar = PyUnicode_FromOrdinal('?');
234         if (replchar == NULL)
235             goto errorexit;
236         kind = PyUnicode_KIND(replchar);
237         data = PyUnicode_DATA(replchar);
238 
239         inpos = 0;
240         for (;;) {
241             Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
242 
243             r = codec->encode(state, codec->config,
244                               kind, data, &inpos, 1,
245                               &buf->outbuf, outleft, 0);
246             if (r == MBERR_TOOSMALL) {
247                 REQUIRE_ENCODEBUFFER(buf, -1);
248                 continue;
249             }
250             else
251                 break;
252         }
253 
254         Py_DECREF(replchar);
255 
256         if (r != 0) {
257             REQUIRE_ENCODEBUFFER(buf, 1);
258             *buf->outbuf++ = '?';
259         }
260     }
261     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
262         buf->inpos += esize;
263         return 0;
264     }
265 
266     start = (Py_ssize_t)buf->inpos;
267     end = start + esize;
268 
269     /* use cached exception object if available */
270     if (buf->excobj == NULL) {
271         buf->excobj =  PyObject_CallFunction(PyExc_UnicodeEncodeError,
272                                              "sOnns",
273                                              codec->encoding, buf->inobj,
274                                              start, end, reason);
275         if (buf->excobj == NULL)
276             goto errorexit;
277     }
278     else
279         if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
280             PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
281             PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
282             goto errorexit;
283 
284     if (errors == ERROR_STRICT) {
285         PyCodec_StrictErrors(buf->excobj);
286         goto errorexit;
287     }
288 
289     retobj = call_error_callback(errors, buf->excobj);
290     if (retobj == NULL)
291         goto errorexit;
292 
293     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
294         (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
295         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
296         PyErr_SetString(PyExc_TypeError,
297                         "encoding error handler must return "
298                         "(str, int) tuple");
299         goto errorexit;
300     }
301 
302     if (PyUnicode_Check(tobj)) {
303         Py_ssize_t inpos;
304 
305         retstr = multibytecodec_encode(codec, state, tobj,
306                         &inpos, ERROR_STRICT,
307                         MBENC_FLUSH);
308         if (retstr == NULL)
309             goto errorexit;
310     }
311     else {
312         Py_INCREF(tobj);
313         retstr = tobj;
314     }
315 
316     assert(PyBytes_Check(retstr));
317     retstrsize = PyBytes_GET_SIZE(retstr);
318     if (retstrsize > 0) {
319         REQUIRE_ENCODEBUFFER(buf, retstrsize);
320         memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
321         buf->outbuf += retstrsize;
322     }
323 
324     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
325     if (newpos < 0 && !PyErr_Occurred())
326         newpos += (Py_ssize_t)buf->inlen;
327     if (newpos < 0 || newpos > buf->inlen) {
328         PyErr_Clear();
329         PyErr_Format(PyExc_IndexError,
330                      "position %zd from error handler out of bounds",
331                      newpos);
332         goto errorexit;
333     }
334     buf->inpos = newpos;
335 
336     Py_DECREF(retobj);
337     Py_DECREF(retstr);
338     return 0;
339 
340 errorexit:
341     Py_XDECREF(retobj);
342     Py_XDECREF(retstr);
343     return -1;
344 }
345 
346 static int
multibytecodec_decerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteDecodeBuffer * buf,PyObject * errors,Py_ssize_t e)347 multibytecodec_decerror(MultibyteCodec *codec,
348                         MultibyteCodec_State *state,
349                         MultibyteDecodeBuffer *buf,
350                         PyObject *errors, Py_ssize_t e)
351 {
352     PyObject *retobj = NULL, *retuni = NULL;
353     Py_ssize_t newpos;
354     const char *reason;
355     Py_ssize_t esize, start, end;
356 
357     if (e > 0) {
358         reason = "illegal multibyte sequence";
359         esize = e;
360     }
361     else {
362         switch (e) {
363         case MBERR_TOOSMALL:
364             return 0; /* retry it */
365         case MBERR_TOOFEW:
366             reason = "incomplete multibyte sequence";
367             esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
368             break;
369         case MBERR_INTERNAL:
370             PyErr_SetString(PyExc_RuntimeError,
371                             "internal codec error");
372             return -1;
373         case MBERR_EXCEPTION:
374             return -1;
375         default:
376             PyErr_SetString(PyExc_RuntimeError,
377                             "unknown runtime error");
378             return -1;
379         }
380     }
381 
382     if (errors == ERROR_REPLACE) {
383         if (_PyUnicodeWriter_WriteChar(&buf->writer,
384                                        Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
385             goto errorexit;
386     }
387     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
388         buf->inbuf += esize;
389         return 0;
390     }
391 
392     start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
393     end = start + esize;
394 
395     /* use cached exception object if available */
396     if (buf->excobj == NULL) {
397         buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
398                         (const char *)buf->inbuf_top,
399                         (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
400                         start, end, reason);
401         if (buf->excobj == NULL)
402             goto errorexit;
403     }
404     else
405         if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
406             PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
407             PyUnicodeDecodeError_SetReason(buf->excobj, reason))
408             goto errorexit;
409 
410     if (errors == ERROR_STRICT) {
411         PyCodec_StrictErrors(buf->excobj);
412         goto errorexit;
413     }
414 
415     retobj = call_error_callback(errors, buf->excobj);
416     if (retobj == NULL)
417         goto errorexit;
418 
419     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
420         !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
421         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
422         PyErr_SetString(PyExc_TypeError,
423                         "decoding error handler must return "
424                         "(str, int) tuple");
425         goto errorexit;
426     }
427 
428     if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
429         goto errorexit;
430 
431     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
432     if (newpos < 0 && !PyErr_Occurred())
433         newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
434     if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
435         PyErr_Clear();
436         PyErr_Format(PyExc_IndexError,
437                      "position %zd from error handler out of bounds",
438                      newpos);
439         goto errorexit;
440     }
441     buf->inbuf = buf->inbuf_top + newpos;
442     Py_DECREF(retobj);
443     return 0;
444 
445 errorexit:
446     Py_XDECREF(retobj);
447     return -1;
448 }
449 
450 static PyObject *
multibytecodec_encode(MultibyteCodec * codec,MultibyteCodec_State * state,PyObject * text,Py_ssize_t * inpos_t,PyObject * errors,int flags)451 multibytecodec_encode(MultibyteCodec *codec,
452                       MultibyteCodec_State *state,
453                       PyObject *text, Py_ssize_t *inpos_t,
454                       PyObject *errors, int flags)
455 {
456     MultibyteEncodeBuffer buf;
457     Py_ssize_t finalsize, r = 0;
458     Py_ssize_t datalen;
459     int kind;
460     const void *data;
461 
462     if (PyUnicode_READY(text) < 0)
463         return NULL;
464     datalen = PyUnicode_GET_LENGTH(text);
465 
466     if (datalen == 0 && !(flags & MBENC_RESET))
467         return PyBytes_FromStringAndSize(NULL, 0);
468 
469     buf.excobj = NULL;
470     buf.outobj = NULL;
471     buf.inobj = text;   /* borrowed reference */
472     buf.inpos = 0;
473     buf.inlen = datalen;
474     kind = PyUnicode_KIND(buf.inobj);
475     data = PyUnicode_DATA(buf.inobj);
476 
477     if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
478         PyErr_NoMemory();
479         goto errorexit;
480     }
481 
482     buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
483     if (buf.outobj == NULL)
484         goto errorexit;
485     buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
486     buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
487 
488     while (buf.inpos < buf.inlen) {
489         /* we don't reuse inleft and outleft here.
490          * error callbacks can relocate the cursor anywhere on buffer*/
491         Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
492 
493         r = codec->encode(state, codec->config,
494                           kind, data,
495                           &buf.inpos, buf.inlen,
496                           &buf.outbuf, outleft, flags);
497         if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
498             break;
499         else if (multibytecodec_encerror(codec, state, &buf, errors,r))
500             goto errorexit;
501         else if (r == MBERR_TOOFEW)
502             break;
503     }
504 
505     if (codec->encreset != NULL && (flags & MBENC_RESET))
506         for (;;) {
507             Py_ssize_t outleft;
508 
509             outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
510             r = codec->encreset(state, codec->config, &buf.outbuf,
511                                 outleft);
512             if (r == 0)
513                 break;
514             else if (multibytecodec_encerror(codec, state,
515                                              &buf, errors, r))
516                 goto errorexit;
517         }
518 
519     finalsize = (Py_ssize_t)((char *)buf.outbuf -
520                              PyBytes_AS_STRING(buf.outobj));
521 
522     if (finalsize != PyBytes_GET_SIZE(buf.outobj))
523         if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
524             goto errorexit;
525 
526     if (inpos_t)
527         *inpos_t = buf.inpos;
528     Py_XDECREF(buf.excobj);
529     return buf.outobj;
530 
531 errorexit:
532     Py_XDECREF(buf.excobj);
533     Py_XDECREF(buf.outobj);
534     return NULL;
535 }
536 
537 /*[clinic input]
538 _multibytecodec.MultibyteCodec.encode
539 
540   input: object
541   errors: str(accept={str, NoneType}) = None
542 
543 Return an encoded string version of `input'.
544 
545 'errors' may be given to set a different error handling scheme. Default is
546 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
547 values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
548 registered with codecs.register_error that can handle UnicodeEncodeErrors.
549 [clinic start generated code]*/
550 
551 static PyObject *
_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject * self,PyObject * input,const char * errors)552 _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
553                                            PyObject *input,
554                                            const char *errors)
555 /*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
556 {
557     MultibyteCodec_State state;
558     PyObject *errorcb, *r, *ucvt;
559     Py_ssize_t datalen;
560 
561     if (PyUnicode_Check(input))
562         ucvt = NULL;
563     else {
564         input = ucvt = PyObject_Str(input);
565         if (input == NULL)
566             return NULL;
567         else if (!PyUnicode_Check(input)) {
568             PyErr_SetString(PyExc_TypeError,
569                 "couldn't convert the object to unicode.");
570             Py_DECREF(ucvt);
571             return NULL;
572         }
573     }
574 
575     if (PyUnicode_READY(input) < 0) {
576         Py_XDECREF(ucvt);
577         return NULL;
578     }
579     datalen = PyUnicode_GET_LENGTH(input);
580 
581     errorcb = internal_error_callback(errors);
582     if (errorcb == NULL) {
583         Py_XDECREF(ucvt);
584         return NULL;
585     }
586 
587     if (self->codec->encinit != NULL &&
588         self->codec->encinit(&state, self->codec->config) != 0)
589         goto errorexit;
590     r = multibytecodec_encode(self->codec, &state,
591                     input, NULL, errorcb,
592                     MBENC_FLUSH | MBENC_RESET);
593     if (r == NULL)
594         goto errorexit;
595 
596     ERROR_DECREF(errorcb);
597     Py_XDECREF(ucvt);
598     return make_tuple(r, datalen);
599 
600 errorexit:
601     ERROR_DECREF(errorcb);
602     Py_XDECREF(ucvt);
603     return NULL;
604 }
605 
606 /*[clinic input]
607 _multibytecodec.MultibyteCodec.decode
608 
609   input: Py_buffer
610   errors: str(accept={str, NoneType}) = None
611 
612 Decodes 'input'.
613 
614 'errors' may be given to set a different error handling scheme. Default is
615 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
616 values are 'ignore' and 'replace' as well as any other name registered with
617 codecs.register_error that is able to handle UnicodeDecodeErrors."
618 [clinic start generated code]*/
619 
620 static PyObject *
_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject * self,Py_buffer * input,const char * errors)621 _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
622                                            Py_buffer *input,
623                                            const char *errors)
624 /*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
625 {
626     MultibyteCodec_State state;
627     MultibyteDecodeBuffer buf;
628     PyObject *errorcb, *res;
629     const char *data;
630     Py_ssize_t datalen;
631 
632     data = input->buf;
633     datalen = input->len;
634 
635     errorcb = internal_error_callback(errors);
636     if (errorcb == NULL) {
637         return NULL;
638     }
639 
640     if (datalen == 0) {
641         ERROR_DECREF(errorcb);
642         return make_tuple(PyUnicode_New(0, 0), 0);
643     }
644 
645     _PyUnicodeWriter_Init(&buf.writer);
646     buf.writer.min_length = datalen;
647     buf.excobj = NULL;
648     buf.inbuf = buf.inbuf_top = (unsigned char *)data;
649     buf.inbuf_end = buf.inbuf_top + datalen;
650 
651     if (self->codec->decinit != NULL &&
652         self->codec->decinit(&state, self->codec->config) != 0)
653         goto errorexit;
654 
655     while (buf.inbuf < buf.inbuf_end) {
656         Py_ssize_t inleft, r;
657 
658         inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
659 
660         r = self->codec->decode(&state, self->codec->config,
661                         &buf.inbuf, inleft, &buf.writer);
662         if (r == 0)
663             break;
664         else if (multibytecodec_decerror(self->codec, &state,
665                                          &buf, errorcb, r))
666             goto errorexit;
667     }
668 
669     res = _PyUnicodeWriter_Finish(&buf.writer);
670     if (res == NULL)
671         goto errorexit;
672 
673     Py_XDECREF(buf.excobj);
674     ERROR_DECREF(errorcb);
675     return make_tuple(res, datalen);
676 
677 errorexit:
678     ERROR_DECREF(errorcb);
679     Py_XDECREF(buf.excobj);
680     _PyUnicodeWriter_Dealloc(&buf.writer);
681 
682     return NULL;
683 }
684 
685 static struct PyMethodDef multibytecodec_methods[] = {
686     _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
687     _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
688     {NULL, NULL},
689 };
690 
691 static void
multibytecodec_dealloc(MultibyteCodecObject * self)692 multibytecodec_dealloc(MultibyteCodecObject *self)
693 {
694     PyObject_Del(self);
695 }
696 
697 static PyTypeObject MultibyteCodec_Type = {
698     PyVarObject_HEAD_INIT(NULL, 0)
699     "MultibyteCodec",                   /* tp_name */
700     sizeof(MultibyteCodecObject),       /* tp_basicsize */
701     0,                                  /* tp_itemsize */
702     /* methods */
703     (destructor)multibytecodec_dealloc, /* tp_dealloc */
704     0,                                  /* tp_vectorcall_offset */
705     0,                                  /* tp_getattr */
706     0,                                  /* tp_setattr */
707     0,                                  /* tp_as_async */
708     0,                                  /* tp_repr */
709     0,                                  /* tp_as_number */
710     0,                                  /* tp_as_sequence */
711     0,                                  /* tp_as_mapping */
712     0,                                  /* tp_hash */
713     0,                                  /* tp_call */
714     0,                                  /* tp_str */
715     PyObject_GenericGetAttr,            /* tp_getattro */
716     0,                                  /* tp_setattro */
717     0,                                  /* tp_as_buffer */
718     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
719     0,                                  /* tp_doc */
720     0,                                  /* tp_traverse */
721     0,                                  /* tp_clear */
722     0,                                  /* tp_richcompare */
723     0,                                  /* tp_weaklistoffset */
724     0,                                  /* tp_iter */
725     0,                                  /* tp_iterext */
726     multibytecodec_methods,             /* tp_methods */
727 };
728 
729 
730 /**
731  * Utility functions for stateful codec mechanism
732  */
733 
734 #define STATEFUL_DCTX(o)        ((MultibyteStatefulDecoderContext *)(o))
735 #define STATEFUL_ECTX(o)        ((MultibyteStatefulEncoderContext *)(o))
736 
737 static PyObject *
encoder_encode_stateful(MultibyteStatefulEncoderContext * ctx,PyObject * unistr,int final)738 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
739                         PyObject *unistr, int final)
740 {
741     PyObject *ucvt, *r = NULL;
742     PyObject *inbuf = NULL;
743     Py_ssize_t inpos, datalen;
744     PyObject *origpending = NULL;
745 
746     if (PyUnicode_Check(unistr))
747         ucvt = NULL;
748     else {
749         unistr = ucvt = PyObject_Str(unistr);
750         if (unistr == NULL)
751             return NULL;
752         else if (!PyUnicode_Check(unistr)) {
753             PyErr_SetString(PyExc_TypeError,
754                 "couldn't convert the object to str.");
755             Py_DECREF(ucvt);
756             return NULL;
757         }
758     }
759 
760     if (ctx->pending) {
761         PyObject *inbuf_tmp;
762 
763         Py_INCREF(ctx->pending);
764         origpending = ctx->pending;
765 
766         Py_INCREF(ctx->pending);
767         inbuf_tmp = ctx->pending;
768         PyUnicode_Append(&inbuf_tmp, unistr);
769         if (inbuf_tmp == NULL)
770             goto errorexit;
771         Py_CLEAR(ctx->pending);
772         inbuf = inbuf_tmp;
773     }
774     else {
775         origpending = NULL;
776 
777         Py_INCREF(unistr);
778         inbuf = unistr;
779     }
780     if (PyUnicode_READY(inbuf) < 0)
781         goto errorexit;
782     inpos = 0;
783     datalen = PyUnicode_GET_LENGTH(inbuf);
784 
785     r = multibytecodec_encode(ctx->codec, &ctx->state,
786                               inbuf, &inpos,
787                               ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
788     if (r == NULL) {
789         /* recover the original pending buffer */
790         Py_XSETREF(ctx->pending, origpending);
791         origpending = NULL;
792         goto errorexit;
793     }
794     Py_XDECREF(origpending);
795 
796     if (inpos < datalen) {
797         if (datalen - inpos > MAXENCPENDING) {
798             /* normal codecs can't reach here */
799             PyErr_SetString(PyExc_UnicodeError,
800                             "pending buffer overflow");
801             goto errorexit;
802         }
803         ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
804         if (ctx->pending == NULL) {
805             /* normal codecs can't reach here */
806             goto errorexit;
807         }
808     }
809 
810     Py_DECREF(inbuf);
811     Py_XDECREF(ucvt);
812     return r;
813 
814 errorexit:
815     Py_XDECREF(r);
816     Py_XDECREF(ucvt);
817     Py_XDECREF(origpending);
818     Py_XDECREF(inbuf);
819     return NULL;
820 }
821 
822 static int
decoder_append_pending(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)823 decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
824                        MultibyteDecodeBuffer *buf)
825 {
826     Py_ssize_t npendings;
827 
828     npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
829     if (npendings + ctx->pendingsize > MAXDECPENDING ||
830         npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
831             PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
832             return -1;
833     }
834     memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
835     ctx->pendingsize += npendings;
836     return 0;
837 }
838 
839 static int
decoder_prepare_buffer(MultibyteDecodeBuffer * buf,const char * data,Py_ssize_t size)840 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
841                        Py_ssize_t size)
842 {
843     buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
844     buf->inbuf_end = buf->inbuf_top + size;
845     buf->writer.min_length += size;
846     return 0;
847 }
848 
849 static int
decoder_feed_buffer(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)850 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
851                     MultibyteDecodeBuffer *buf)
852 {
853     while (buf->inbuf < buf->inbuf_end) {
854         Py_ssize_t inleft;
855         Py_ssize_t r;
856 
857         inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
858 
859         r = ctx->codec->decode(&ctx->state, ctx->codec->config,
860             &buf->inbuf, inleft, &buf->writer);
861         if (r == 0 || r == MBERR_TOOFEW)
862             break;
863         else if (multibytecodec_decerror(ctx->codec, &ctx->state,
864                                          buf, ctx->errors, r))
865             return -1;
866     }
867     return 0;
868 }
869 
870 
871 /*[clinic input]
872  class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "&MultibyteIncrementalEncoder_Type"
873 [clinic start generated code]*/
874 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=3be82909cd08924d]*/
875 
876 /*[clinic input]
877 _multibytecodec.MultibyteIncrementalEncoder.encode
878 
879     input: object
880     final: bool(accept={int}) = False
881 [clinic start generated code]*/
882 
883 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject * self,PyObject * input,int final)884 _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
885                                                         PyObject *input,
886                                                         int final)
887 /*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
888 {
889     return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
890 }
891 
892 /*[clinic input]
893 _multibytecodec.MultibyteIncrementalEncoder.getstate
894 [clinic start generated code]*/
895 
896 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject * self)897 _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
898 /*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
899 {
900     /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
901        for UTF-8 encoded buffer (each character can use up to 4
902        bytes), and required bytes for MultibyteCodec_State.c. A byte
903        array is used to avoid different compilers generating different
904        values for the same state, e.g. as a result of struct padding.
905     */
906     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
907     Py_ssize_t statesize;
908     const char *pendingbuffer = NULL;
909     Py_ssize_t pendingsize;
910 
911     if (self->pending != NULL) {
912         pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
913         if (pendingbuffer == NULL) {
914             return NULL;
915         }
916         if (pendingsize > MAXENCPENDING*4) {
917             PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
918             return NULL;
919         }
920         statebytes[0] = (unsigned char)pendingsize;
921         memcpy(statebytes + 1, pendingbuffer, pendingsize);
922         statesize = 1 + pendingsize;
923     } else {
924         statebytes[0] = 0;
925         statesize = 1;
926     }
927     memcpy(statebytes+statesize, self->state.c,
928            sizeof(self->state.c));
929     statesize += sizeof(self->state.c);
930 
931     return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
932                                              1 /* little-endian */ ,
933                                              0 /* unsigned */ );
934 }
935 
936 /*[clinic input]
937 _multibytecodec.MultibyteIncrementalEncoder.setstate
938     state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
939     /
940 [clinic start generated code]*/
941 
942 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject * self,PyLongObject * statelong)943 _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
944                                                           PyLongObject *statelong)
945 /*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
946 {
947     PyObject *pending = NULL;
948     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
949 
950     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
951                             1 /* little-endian */ ,
952                             0 /* unsigned */ ) < 0) {
953         goto errorexit;
954     }
955 
956     if (statebytes[0] > MAXENCPENDING*4) {
957         PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
958         return NULL;
959     }
960 
961     pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
962                                    statebytes[0], "strict");
963     if (pending == NULL) {
964         goto errorexit;
965     }
966 
967     Py_CLEAR(self->pending);
968     self->pending = pending;
969     memcpy(self->state.c, statebytes+1+statebytes[0],
970            sizeof(self->state.c));
971 
972     Py_RETURN_NONE;
973 
974 errorexit:
975     Py_XDECREF(pending);
976     return NULL;
977 }
978 
979 /*[clinic input]
980 _multibytecodec.MultibyteIncrementalEncoder.reset
981 [clinic start generated code]*/
982 
983 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject * self)984 _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
985 /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
986 {
987     /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
988     unsigned char buffer[4], *outbuf;
989     Py_ssize_t r;
990     if (self->codec->encreset != NULL) {
991         outbuf = buffer;
992         r = self->codec->encreset(&self->state, self->codec->config,
993                                   &outbuf, sizeof(buffer));
994         if (r != 0)
995             return NULL;
996     }
997     Py_CLEAR(self->pending);
998     Py_RETURN_NONE;
999 }
1000 
1001 static struct PyMethodDef mbiencoder_methods[] = {
1002     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
1003     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1004     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
1005     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1006     {NULL, NULL},
1007 };
1008 
1009 static PyObject *
mbiencoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1010 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1011 {
1012     MultibyteIncrementalEncoderObject *self;
1013     PyObject *codec = NULL;
1014     char *errors = NULL;
1015 
1016     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1017                                      incnewkwarglist, &errors))
1018         return NULL;
1019 
1020     self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1021     if (self == NULL)
1022         return NULL;
1023 
1024     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1025     if (codec == NULL)
1026         goto errorexit;
1027     if (!MultibyteCodec_Check(codec)) {
1028         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1029         goto errorexit;
1030     }
1031 
1032     self->codec = ((MultibyteCodecObject *)codec)->codec;
1033     self->pending = NULL;
1034     self->errors = internal_error_callback(errors);
1035     if (self->errors == NULL)
1036         goto errorexit;
1037     if (self->codec->encinit != NULL &&
1038         self->codec->encinit(&self->state, self->codec->config) != 0)
1039         goto errorexit;
1040 
1041     Py_DECREF(codec);
1042     return (PyObject *)self;
1043 
1044 errorexit:
1045     Py_XDECREF(self);
1046     Py_XDECREF(codec);
1047     return NULL;
1048 }
1049 
1050 static int
mbiencoder_init(PyObject * self,PyObject * args,PyObject * kwds)1051 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1052 {
1053     return 0;
1054 }
1055 
1056 static int
mbiencoder_traverse(MultibyteIncrementalEncoderObject * self,visitproc visit,void * arg)1057 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
1058                     visitproc visit, void *arg)
1059 {
1060     if (ERROR_ISCUSTOM(self->errors))
1061         Py_VISIT(self->errors);
1062     return 0;
1063 }
1064 
1065 static void
mbiencoder_dealloc(MultibyteIncrementalEncoderObject * self)1066 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1067 {
1068     PyObject_GC_UnTrack(self);
1069     ERROR_DECREF(self->errors);
1070     Py_CLEAR(self->pending);
1071     Py_TYPE(self)->tp_free(self);
1072 }
1073 
1074 static PyTypeObject MultibyteIncrementalEncoder_Type = {
1075     PyVarObject_HEAD_INIT(NULL, 0)
1076     "MultibyteIncrementalEncoder",      /* tp_name */
1077     sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */
1078     0,                                  /* tp_itemsize */
1079     /*  methods  */
1080     (destructor)mbiencoder_dealloc, /* tp_dealloc */
1081     0,                                  /* tp_vectorcall_offset */
1082     0,                                  /* tp_getattr */
1083     0,                                  /* tp_setattr */
1084     0,                                  /* tp_as_async */
1085     0,                                  /* tp_repr */
1086     0,                                  /* tp_as_number */
1087     0,                                  /* tp_as_sequence */
1088     0,                                  /* tp_as_mapping */
1089     0,                                  /* tp_hash */
1090     0,                                  /* tp_call */
1091     0,                                  /* tp_str */
1092     PyObject_GenericGetAttr,            /* tp_getattro */
1093     0,                                  /* tp_setattro */
1094     0,                                  /* tp_as_buffer */
1095     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1096         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1097     0,                                  /* tp_doc */
1098     (traverseproc)mbiencoder_traverse,          /* tp_traverse */
1099     0,                                  /* tp_clear */
1100     0,                                  /* tp_richcompare */
1101     0,                                  /* tp_weaklistoffset */
1102     0,                                  /* tp_iter */
1103     0,                                  /* tp_iterext */
1104     mbiencoder_methods,                 /* tp_methods */
1105     0,                                  /* tp_members */
1106     codecctx_getsets,                   /* tp_getset */
1107     0,                                  /* tp_base */
1108     0,                                  /* tp_dict */
1109     0,                                  /* tp_descr_get */
1110     0,                                  /* tp_descr_set */
1111     0,                                  /* tp_dictoffset */
1112     mbiencoder_init,                    /* tp_init */
1113     0,                                  /* tp_alloc */
1114     mbiencoder_new,                     /* tp_new */
1115 };
1116 
1117 
1118 /*[clinic input]
1119  class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "&MultibyteIncrementalDecoder_Type"
1120 [clinic start generated code]*/
1121 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=f6003faaf2cea692]*/
1122 
1123 /*[clinic input]
1124 _multibytecodec.MultibyteIncrementalDecoder.decode
1125 
1126     input: Py_buffer
1127     final: bool(accept={int}) = False
1128 [clinic start generated code]*/
1129 
1130 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject * self,Py_buffer * input,int final)1131 _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1132                                                         Py_buffer *input,
1133                                                         int final)
1134 /*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
1135 {
1136     MultibyteDecodeBuffer buf;
1137     char *data, *wdata = NULL;
1138     Py_ssize_t wsize, size, origpending;
1139     PyObject *res;
1140 
1141     data = input->buf;
1142     size = input->len;
1143 
1144     _PyUnicodeWriter_Init(&buf.writer);
1145     buf.excobj = NULL;
1146     origpending = self->pendingsize;
1147 
1148     if (self->pendingsize == 0) {
1149         wsize = size;
1150         wdata = data;
1151     }
1152     else {
1153         if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1154             PyErr_NoMemory();
1155             goto errorexit;
1156         }
1157         wsize = size + self->pendingsize;
1158         wdata = PyMem_Malloc(wsize);
1159         if (wdata == NULL) {
1160             PyErr_NoMemory();
1161             goto errorexit;
1162         }
1163         memcpy(wdata, self->pending, self->pendingsize);
1164         memcpy(wdata + self->pendingsize, data, size);
1165         self->pendingsize = 0;
1166     }
1167 
1168     if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1169         goto errorexit;
1170 
1171     if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1172         goto errorexit;
1173 
1174     if (final && buf.inbuf < buf.inbuf_end) {
1175         if (multibytecodec_decerror(self->codec, &self->state,
1176                         &buf, self->errors, MBERR_TOOFEW)) {
1177             /* recover the original pending buffer */
1178             memcpy(self->pending, wdata, origpending);
1179             self->pendingsize = origpending;
1180             goto errorexit;
1181         }
1182     }
1183 
1184     if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1185         if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1186             goto errorexit;
1187     }
1188 
1189     res = _PyUnicodeWriter_Finish(&buf.writer);
1190     if (res == NULL)
1191         goto errorexit;
1192 
1193     if (wdata != data)
1194         PyMem_Del(wdata);
1195     Py_XDECREF(buf.excobj);
1196     return res;
1197 
1198 errorexit:
1199     if (wdata != NULL && wdata != data)
1200         PyMem_Del(wdata);
1201     Py_XDECREF(buf.excobj);
1202     _PyUnicodeWriter_Dealloc(&buf.writer);
1203     return NULL;
1204 }
1205 
1206 /*[clinic input]
1207 _multibytecodec.MultibyteIncrementalDecoder.getstate
1208 [clinic start generated code]*/
1209 
1210 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject * self)1211 _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1212 /*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1213 {
1214     PyObject *buffer;
1215     PyObject *statelong;
1216 
1217     buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1218                                        self->pendingsize);
1219     if (buffer == NULL) {
1220         return NULL;
1221     }
1222 
1223     statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1224                                                   sizeof(self->state.c),
1225                                                   1 /* little-endian */ ,
1226                                                   0 /* unsigned */ );
1227     if (statelong == NULL) {
1228         Py_DECREF(buffer);
1229         return NULL;
1230     }
1231 
1232     return Py_BuildValue("NN", buffer, statelong);
1233 }
1234 
1235 /*[clinic input]
1236 _multibytecodec.MultibyteIncrementalDecoder.setstate
1237     state: object(subclass_of='&PyTuple_Type')
1238     /
1239 [clinic start generated code]*/
1240 
1241 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject * self,PyObject * state)1242 _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1243                                                           PyObject *state)
1244 /*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1245 {
1246     PyObject *buffer;
1247     PyLongObject *statelong;
1248     Py_ssize_t buffersize;
1249     const char *bufferstr;
1250     unsigned char statebytes[8];
1251 
1252     if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1253                           &buffer, &PyLong_Type, &statelong))
1254     {
1255         return NULL;
1256     }
1257 
1258     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1259                             1 /* little-endian */ ,
1260                             0 /* unsigned */ ) < 0) {
1261         return NULL;
1262     }
1263 
1264     buffersize = PyBytes_Size(buffer);
1265     if (buffersize == -1) {
1266         return NULL;
1267     }
1268 
1269     if (buffersize > MAXDECPENDING) {
1270         PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1271         return NULL;
1272     }
1273 
1274     bufferstr = PyBytes_AsString(buffer);
1275     if (bufferstr == NULL) {
1276         return NULL;
1277     }
1278     self->pendingsize = buffersize;
1279     memcpy(self->pending, bufferstr, self->pendingsize);
1280     memcpy(self->state.c, statebytes, sizeof(statebytes));
1281 
1282     Py_RETURN_NONE;
1283 }
1284 
1285 /*[clinic input]
1286 _multibytecodec.MultibyteIncrementalDecoder.reset
1287 [clinic start generated code]*/
1288 
1289 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject * self)1290 _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1291 /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
1292 {
1293     if (self->codec->decreset != NULL &&
1294         self->codec->decreset(&self->state, self->codec->config) != 0)
1295         return NULL;
1296     self->pendingsize = 0;
1297 
1298     Py_RETURN_NONE;
1299 }
1300 
1301 static struct PyMethodDef mbidecoder_methods[] = {
1302     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
1303     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1304     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
1305     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1306     {NULL, NULL},
1307 };
1308 
1309 static PyObject *
mbidecoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1310 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1311 {
1312     MultibyteIncrementalDecoderObject *self;
1313     PyObject *codec = NULL;
1314     char *errors = NULL;
1315 
1316     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1317                                      incnewkwarglist, &errors))
1318         return NULL;
1319 
1320     self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1321     if (self == NULL)
1322         return NULL;
1323 
1324     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1325     if (codec == NULL)
1326         goto errorexit;
1327     if (!MultibyteCodec_Check(codec)) {
1328         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1329         goto errorexit;
1330     }
1331 
1332     self->codec = ((MultibyteCodecObject *)codec)->codec;
1333     self->pendingsize = 0;
1334     self->errors = internal_error_callback(errors);
1335     if (self->errors == NULL)
1336         goto errorexit;
1337     if (self->codec->decinit != NULL &&
1338         self->codec->decinit(&self->state, self->codec->config) != 0)
1339         goto errorexit;
1340 
1341     Py_DECREF(codec);
1342     return (PyObject *)self;
1343 
1344 errorexit:
1345     Py_XDECREF(self);
1346     Py_XDECREF(codec);
1347     return NULL;
1348 }
1349 
1350 static int
mbidecoder_init(PyObject * self,PyObject * args,PyObject * kwds)1351 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1352 {
1353     return 0;
1354 }
1355 
1356 static int
mbidecoder_traverse(MultibyteIncrementalDecoderObject * self,visitproc visit,void * arg)1357 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1358                     visitproc visit, void *arg)
1359 {
1360     if (ERROR_ISCUSTOM(self->errors))
1361         Py_VISIT(self->errors);
1362     return 0;
1363 }
1364 
1365 static void
mbidecoder_dealloc(MultibyteIncrementalDecoderObject * self)1366 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1367 {
1368     PyObject_GC_UnTrack(self);
1369     ERROR_DECREF(self->errors);
1370     Py_TYPE(self)->tp_free(self);
1371 }
1372 
1373 static PyTypeObject MultibyteIncrementalDecoder_Type = {
1374     PyVarObject_HEAD_INIT(NULL, 0)
1375     "MultibyteIncrementalDecoder",      /* tp_name */
1376     sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */
1377     0,                                  /* tp_itemsize */
1378     /*  methods  */
1379     (destructor)mbidecoder_dealloc, /* tp_dealloc */
1380     0,                                  /* tp_vectorcall_offset */
1381     0,                                  /* tp_getattr */
1382     0,                                  /* tp_setattr */
1383     0,                                  /* tp_as_async */
1384     0,                                  /* tp_repr */
1385     0,                                  /* tp_as_number */
1386     0,                                  /* tp_as_sequence */
1387     0,                                  /* tp_as_mapping */
1388     0,                                  /* tp_hash */
1389     0,                                  /* tp_call */
1390     0,                                  /* tp_str */
1391     PyObject_GenericGetAttr,            /* tp_getattro */
1392     0,                                  /* tp_setattro */
1393     0,                                  /* tp_as_buffer */
1394     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1395         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1396     0,                                  /* tp_doc */
1397     (traverseproc)mbidecoder_traverse,          /* tp_traverse */
1398     0,                                  /* tp_clear */
1399     0,                                  /* tp_richcompare */
1400     0,                                  /* tp_weaklistoffset */
1401     0,                                  /* tp_iter */
1402     0,                                  /* tp_iterext */
1403     mbidecoder_methods,                 /* tp_methods */
1404     0,                                  /* tp_members */
1405     codecctx_getsets,                   /* tp_getset */
1406     0,                                  /* tp_base */
1407     0,                                  /* tp_dict */
1408     0,                                  /* tp_descr_get */
1409     0,                                  /* tp_descr_set */
1410     0,                                  /* tp_dictoffset */
1411     mbidecoder_init,                    /* tp_init */
1412     0,                                  /* tp_alloc */
1413     mbidecoder_new,                     /* tp_new */
1414 };
1415 
1416 
1417 /*[clinic input]
1418  class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "MultibyteStreamReader_Type"
1419 [clinic start generated code]*/
1420 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=d323634b74976f09]*/
1421 
1422 static PyObject *
mbstreamreader_iread(MultibyteStreamReaderObject * self,const char * method,Py_ssize_t sizehint)1423 mbstreamreader_iread(MultibyteStreamReaderObject *self,
1424                      const char *method, Py_ssize_t sizehint)
1425 {
1426     MultibyteDecodeBuffer buf;
1427     PyObject *cres, *res;
1428     Py_ssize_t rsize;
1429 
1430     if (sizehint == 0)
1431         return PyUnicode_New(0, 0);
1432 
1433     _PyUnicodeWriter_Init(&buf.writer);
1434     buf.excobj = NULL;
1435     cres = NULL;
1436 
1437     for (;;) {
1438         int endoffile;
1439 
1440         if (sizehint < 0)
1441             cres = PyObject_CallMethod(self->stream,
1442                             method, NULL);
1443         else
1444             cres = PyObject_CallMethod(self->stream,
1445                             method, "i", sizehint);
1446         if (cres == NULL)
1447             goto errorexit;
1448 
1449         if (!PyBytes_Check(cres)) {
1450             PyErr_Format(PyExc_TypeError,
1451                          "stream function returned a "
1452                          "non-bytes object (%.100s)",
1453                          Py_TYPE(cres)->tp_name);
1454             goto errorexit;
1455         }
1456 
1457         endoffile = (PyBytes_GET_SIZE(cres) == 0);
1458 
1459         if (self->pendingsize > 0) {
1460             PyObject *ctr;
1461             char *ctrdata;
1462 
1463             if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1464                 PyErr_NoMemory();
1465                 goto errorexit;
1466             }
1467             rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1468             ctr = PyBytes_FromStringAndSize(NULL, rsize);
1469             if (ctr == NULL)
1470                 goto errorexit;
1471             ctrdata = PyBytes_AS_STRING(ctr);
1472             memcpy(ctrdata, self->pending, self->pendingsize);
1473             memcpy(ctrdata + self->pendingsize,
1474                     PyBytes_AS_STRING(cres),
1475                     PyBytes_GET_SIZE(cres));
1476             Py_DECREF(cres);
1477             cres = ctr;
1478             self->pendingsize = 0;
1479         }
1480 
1481         rsize = PyBytes_GET_SIZE(cres);
1482         if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1483                                    rsize) != 0)
1484             goto errorexit;
1485 
1486         if (rsize > 0 && decoder_feed_buffer(
1487                         (MultibyteStatefulDecoderContext *)self, &buf))
1488             goto errorexit;
1489 
1490         if (endoffile || sizehint < 0) {
1491             if (buf.inbuf < buf.inbuf_end &&
1492                 multibytecodec_decerror(self->codec, &self->state,
1493                             &buf, self->errors, MBERR_TOOFEW))
1494                 goto errorexit;
1495         }
1496 
1497         if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1498             if (decoder_append_pending(STATEFUL_DCTX(self),
1499                                        &buf) != 0)
1500                 goto errorexit;
1501         }
1502 
1503         Py_DECREF(cres);
1504         cres = NULL;
1505 
1506         if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
1507             break;
1508 
1509         sizehint = 1; /* read 1 more byte and retry */
1510     }
1511 
1512     res = _PyUnicodeWriter_Finish(&buf.writer);
1513     if (res == NULL)
1514         goto errorexit;
1515 
1516     Py_XDECREF(cres);
1517     Py_XDECREF(buf.excobj);
1518     return res;
1519 
1520 errorexit:
1521     Py_XDECREF(cres);
1522     Py_XDECREF(buf.excobj);
1523     _PyUnicodeWriter_Dealloc(&buf.writer);
1524     return NULL;
1525 }
1526 
1527 /*[clinic input]
1528  _multibytecodec.MultibyteStreamReader.read
1529 
1530     sizeobj: object = None
1531     /
1532 [clinic start generated code]*/
1533 
1534 static PyObject *
_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1535 _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1536                                                 PyObject *sizeobj)
1537 /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
1538 {
1539     Py_ssize_t size;
1540 
1541     if (sizeobj == Py_None)
1542         size = -1;
1543     else if (PyLong_Check(sizeobj))
1544         size = PyLong_AsSsize_t(sizeobj);
1545     else {
1546         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1547         return NULL;
1548     }
1549 
1550     if (size == -1 && PyErr_Occurred())
1551         return NULL;
1552 
1553     return mbstreamreader_iread(self, "read", size);
1554 }
1555 
1556 /*[clinic input]
1557  _multibytecodec.MultibyteStreamReader.readline
1558 
1559     sizeobj: object = None
1560     /
1561 [clinic start generated code]*/
1562 
1563 static PyObject *
_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1564 _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1565                                                     PyObject *sizeobj)
1566 /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
1567 {
1568     Py_ssize_t size;
1569 
1570     if (sizeobj == Py_None)
1571         size = -1;
1572     else if (PyLong_Check(sizeobj))
1573         size = PyLong_AsSsize_t(sizeobj);
1574     else {
1575         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1576         return NULL;
1577     }
1578 
1579     if (size == -1 && PyErr_Occurred())
1580         return NULL;
1581 
1582     return mbstreamreader_iread(self, "readline", size);
1583 }
1584 
1585 /*[clinic input]
1586  _multibytecodec.MultibyteStreamReader.readlines
1587 
1588     sizehintobj: object = None
1589     /
1590 [clinic start generated code]*/
1591 
1592 static PyObject *
_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject * self,PyObject * sizehintobj)1593 _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1594                                                      PyObject *sizehintobj)
1595 /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
1596 {
1597     PyObject *r, *sr;
1598     Py_ssize_t sizehint;
1599 
1600     if (sizehintobj == Py_None)
1601         sizehint = -1;
1602     else if (PyLong_Check(sizehintobj))
1603         sizehint = PyLong_AsSsize_t(sizehintobj);
1604     else {
1605         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1606         return NULL;
1607     }
1608 
1609     if (sizehint == -1 && PyErr_Occurred())
1610         return NULL;
1611 
1612     r = mbstreamreader_iread(self, "read", sizehint);
1613     if (r == NULL)
1614         return NULL;
1615 
1616     sr = PyUnicode_Splitlines(r, 1);
1617     Py_DECREF(r);
1618     return sr;
1619 }
1620 
1621 /*[clinic input]
1622  _multibytecodec.MultibyteStreamReader.reset
1623 [clinic start generated code]*/
1624 
1625 static PyObject *
_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject * self)1626 _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1627 /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
1628 {
1629     if (self->codec->decreset != NULL &&
1630         self->codec->decreset(&self->state, self->codec->config) != 0)
1631         return NULL;
1632     self->pendingsize = 0;
1633 
1634     Py_RETURN_NONE;
1635 }
1636 
1637 static struct PyMethodDef mbstreamreader_methods[] = {
1638     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1639     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1640     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1641     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
1642     {NULL,              NULL},
1643 };
1644 
1645 static PyMemberDef mbstreamreader_members[] = {
1646     {"stream",          T_OBJECT,
1647                     offsetof(MultibyteStreamReaderObject, stream),
1648                     READONLY, NULL},
1649     {NULL,}
1650 };
1651 
1652 static PyObject *
mbstreamreader_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1653 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1654 {
1655     MultibyteStreamReaderObject *self;
1656     PyObject *stream, *codec = NULL;
1657     char *errors = NULL;
1658 
1659     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1660                             streamkwarglist, &stream, &errors))
1661         return NULL;
1662 
1663     self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1664     if (self == NULL)
1665         return NULL;
1666 
1667     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1668     if (codec == NULL)
1669         goto errorexit;
1670     if (!MultibyteCodec_Check(codec)) {
1671         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1672         goto errorexit;
1673     }
1674 
1675     self->codec = ((MultibyteCodecObject *)codec)->codec;
1676     self->stream = stream;
1677     Py_INCREF(stream);
1678     self->pendingsize = 0;
1679     self->errors = internal_error_callback(errors);
1680     if (self->errors == NULL)
1681         goto errorexit;
1682     if (self->codec->decinit != NULL &&
1683         self->codec->decinit(&self->state, self->codec->config) != 0)
1684         goto errorexit;
1685 
1686     Py_DECREF(codec);
1687     return (PyObject *)self;
1688 
1689 errorexit:
1690     Py_XDECREF(self);
1691     Py_XDECREF(codec);
1692     return NULL;
1693 }
1694 
1695 static int
mbstreamreader_init(PyObject * self,PyObject * args,PyObject * kwds)1696 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1697 {
1698     return 0;
1699 }
1700 
1701 static int
mbstreamreader_traverse(MultibyteStreamReaderObject * self,visitproc visit,void * arg)1702 mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1703                         visitproc visit, void *arg)
1704 {
1705     if (ERROR_ISCUSTOM(self->errors))
1706         Py_VISIT(self->errors);
1707     Py_VISIT(self->stream);
1708     return 0;
1709 }
1710 
1711 static void
mbstreamreader_dealloc(MultibyteStreamReaderObject * self)1712 mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1713 {
1714     PyObject_GC_UnTrack(self);
1715     ERROR_DECREF(self->errors);
1716     Py_XDECREF(self->stream);
1717     Py_TYPE(self)->tp_free(self);
1718 }
1719 
1720 static PyTypeObject MultibyteStreamReader_Type = {
1721     PyVarObject_HEAD_INIT(NULL, 0)
1722     "MultibyteStreamReader",            /* tp_name */
1723     sizeof(MultibyteStreamReaderObject), /* tp_basicsize */
1724     0,                                  /* tp_itemsize */
1725     /*  methods  */
1726     (destructor)mbstreamreader_dealloc, /* tp_dealloc */
1727     0,                                  /* tp_vectorcall_offset */
1728     0,                                  /* tp_getattr */
1729     0,                                  /* tp_setattr */
1730     0,                                  /* tp_as_async */
1731     0,                                  /* tp_repr */
1732     0,                                  /* tp_as_number */
1733     0,                                  /* tp_as_sequence */
1734     0,                                  /* tp_as_mapping */
1735     0,                                  /* tp_hash */
1736     0,                                  /* tp_call */
1737     0,                                  /* tp_str */
1738     PyObject_GenericGetAttr,            /* tp_getattro */
1739     0,                                  /* tp_setattro */
1740     0,                                  /* tp_as_buffer */
1741     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1742         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1743     0,                                  /* tp_doc */
1744     (traverseproc)mbstreamreader_traverse,      /* tp_traverse */
1745     0,                                  /* tp_clear */
1746     0,                                  /* tp_richcompare */
1747     0,                                  /* tp_weaklistoffset */
1748     0,                                  /* tp_iter */
1749     0,                                  /* tp_iterext */
1750     mbstreamreader_methods,             /* tp_methods */
1751     mbstreamreader_members,             /* tp_members */
1752     codecctx_getsets,                   /* tp_getset */
1753     0,                                  /* tp_base */
1754     0,                                  /* tp_dict */
1755     0,                                  /* tp_descr_get */
1756     0,                                  /* tp_descr_set */
1757     0,                                  /* tp_dictoffset */
1758     mbstreamreader_init,                /* tp_init */
1759     0,                                  /* tp_alloc */
1760     mbstreamreader_new,                 /* tp_new */
1761 };
1762 
1763 
1764 /*[clinic input]
1765  class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "&MultibyteStreamWriter_Type"
1766 [clinic start generated code]*/
1767 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=cde22780a215d6ac]*/
1768 
1769 static int
mbstreamwriter_iwrite(MultibyteStreamWriterObject * self,PyObject * unistr)1770 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
1771                       PyObject *unistr)
1772 {
1773     PyObject *str, *wr;
1774 
1775     str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1776     if (str == NULL)
1777         return -1;
1778 
1779     wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, str);
1780     Py_DECREF(str);
1781     if (wr == NULL)
1782         return -1;
1783 
1784     Py_DECREF(wr);
1785     return 0;
1786 }
1787 
1788 /*[clinic input]
1789  _multibytecodec.MultibyteStreamWriter.write
1790 
1791     strobj: object
1792     /
1793 [clinic start generated code]*/
1794 
1795 static PyObject *
_multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject * self,PyObject * strobj)1796 _multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject *self,
1797                                             PyObject *strobj)
1798 /*[clinic end generated code: output=e13ae841c895251e input=551dc4c018c10a2b]*/
1799 {
1800     if (mbstreamwriter_iwrite(self, strobj))
1801         return NULL;
1802     else
1803         Py_RETURN_NONE;
1804 }
1805 
1806 /*[clinic input]
1807  _multibytecodec.MultibyteStreamWriter.writelines
1808 
1809     lines: object
1810     /
1811 [clinic start generated code]*/
1812 
1813 static PyObject *
_multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject * self,PyObject * lines)1814 _multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject *self,
1815                                                  PyObject *lines)
1816 /*[clinic end generated code: output=e5c4285ac8e7d522 input=57797fe7008d4e96]*/
1817 {
1818     PyObject *strobj;
1819     int i, r;
1820 
1821     if (!PySequence_Check(lines)) {
1822         PyErr_SetString(PyExc_TypeError,
1823                         "arg must be a sequence object");
1824         return NULL;
1825     }
1826 
1827     for (i = 0; i < PySequence_Length(lines); i++) {
1828         /* length can be changed even within this loop */
1829         strobj = PySequence_GetItem(lines, i);
1830         if (strobj == NULL)
1831             return NULL;
1832 
1833         r = mbstreamwriter_iwrite(self, strobj);
1834         Py_DECREF(strobj);
1835         if (r == -1)
1836             return NULL;
1837     }
1838     /* PySequence_Length() can fail */
1839     if (PyErr_Occurred())
1840         return NULL;
1841 
1842     Py_RETURN_NONE;
1843 }
1844 
1845 /*[clinic input]
1846  _multibytecodec.MultibyteStreamWriter.reset
1847 [clinic start generated code]*/
1848 
1849 static PyObject *
_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject * self)1850 _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self)
1851 /*[clinic end generated code: output=8f54a4d9b03db5ff input=b56dbcbaf35cc10c]*/
1852 {
1853     PyObject *pwrt;
1854 
1855     if (!self->pending)
1856         Py_RETURN_NONE;
1857 
1858     pwrt = multibytecodec_encode(self->codec, &self->state,
1859                     self->pending, NULL, self->errors,
1860                     MBENC_FLUSH | MBENC_RESET);
1861     /* some pending buffer can be truncated when UnicodeEncodeError is
1862      * raised on 'strict' mode. but, 'reset' method is designed to
1863      * reset the pending buffer or states so failed string sequence
1864      * ought to be missed */
1865     Py_CLEAR(self->pending);
1866     if (pwrt == NULL)
1867         return NULL;
1868 
1869     assert(PyBytes_Check(pwrt));
1870     if (PyBytes_Size(pwrt) > 0) {
1871         PyObject *wr;
1872 
1873         wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, pwrt);
1874         if (wr == NULL) {
1875             Py_DECREF(pwrt);
1876             return NULL;
1877         }
1878     }
1879     Py_DECREF(pwrt);
1880 
1881     Py_RETURN_NONE;
1882 }
1883 
1884 static PyObject *
mbstreamwriter_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1885 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1886 {
1887     MultibyteStreamWriterObject *self;
1888     PyObject *stream, *codec = NULL;
1889     char *errors = NULL;
1890 
1891     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1892                             streamkwarglist, &stream, &errors))
1893         return NULL;
1894 
1895     self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1896     if (self == NULL)
1897         return NULL;
1898 
1899     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1900     if (codec == NULL)
1901         goto errorexit;
1902     if (!MultibyteCodec_Check(codec)) {
1903         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1904         goto errorexit;
1905     }
1906 
1907     self->codec = ((MultibyteCodecObject *)codec)->codec;
1908     self->stream = stream;
1909     Py_INCREF(stream);
1910     self->pending = NULL;
1911     self->errors = internal_error_callback(errors);
1912     if (self->errors == NULL)
1913         goto errorexit;
1914     if (self->codec->encinit != NULL &&
1915         self->codec->encinit(&self->state, self->codec->config) != 0)
1916         goto errorexit;
1917 
1918     Py_DECREF(codec);
1919     return (PyObject *)self;
1920 
1921 errorexit:
1922     Py_XDECREF(self);
1923     Py_XDECREF(codec);
1924     return NULL;
1925 }
1926 
1927 static int
mbstreamwriter_init(PyObject * self,PyObject * args,PyObject * kwds)1928 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1929 {
1930     return 0;
1931 }
1932 
1933 static int
mbstreamwriter_traverse(MultibyteStreamWriterObject * self,visitproc visit,void * arg)1934 mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1935                         visitproc visit, void *arg)
1936 {
1937     if (ERROR_ISCUSTOM(self->errors))
1938         Py_VISIT(self->errors);
1939     Py_VISIT(self->stream);
1940     return 0;
1941 }
1942 
1943 static void
mbstreamwriter_dealloc(MultibyteStreamWriterObject * self)1944 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1945 {
1946     PyObject_GC_UnTrack(self);
1947     ERROR_DECREF(self->errors);
1948     Py_XDECREF(self->stream);
1949     Py_TYPE(self)->tp_free(self);
1950 }
1951 
1952 static struct PyMethodDef mbstreamwriter_methods[] = {
1953     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1954     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1955     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1956     {NULL, NULL},
1957 };
1958 
1959 static PyMemberDef mbstreamwriter_members[] = {
1960     {"stream",          T_OBJECT,
1961                     offsetof(MultibyteStreamWriterObject, stream),
1962                     READONLY, NULL},
1963     {NULL,}
1964 };
1965 
1966 static PyTypeObject MultibyteStreamWriter_Type = {
1967     PyVarObject_HEAD_INIT(NULL, 0)
1968     "MultibyteStreamWriter",            /* tp_name */
1969     sizeof(MultibyteStreamWriterObject), /* tp_basicsize */
1970     0,                                  /* tp_itemsize */
1971     /*  methods  */
1972     (destructor)mbstreamwriter_dealloc, /* tp_dealloc */
1973     0,                                  /* tp_vectorcall_offset */
1974     0,                                  /* tp_getattr */
1975     0,                                  /* tp_setattr */
1976     0,                                  /* tp_as_async */
1977     0,                                  /* tp_repr */
1978     0,                                  /* tp_as_number */
1979     0,                                  /* tp_as_sequence */
1980     0,                                  /* tp_as_mapping */
1981     0,                                  /* tp_hash */
1982     0,                                  /* tp_call */
1983     0,                                  /* tp_str */
1984     PyObject_GenericGetAttr,            /* tp_getattro */
1985     0,                                  /* tp_setattro */
1986     0,                                  /* tp_as_buffer */
1987     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1988         | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1989     0,                                  /* tp_doc */
1990     (traverseproc)mbstreamwriter_traverse,      /* tp_traverse */
1991     0,                                  /* tp_clear */
1992     0,                                  /* tp_richcompare */
1993     0,                                  /* tp_weaklistoffset */
1994     0,                                  /* tp_iter */
1995     0,                                  /* tp_iterext */
1996     mbstreamwriter_methods,             /* tp_methods */
1997     mbstreamwriter_members,             /* tp_members */
1998     codecctx_getsets,                   /* tp_getset */
1999     0,                                  /* tp_base */
2000     0,                                  /* tp_dict */
2001     0,                                  /* tp_descr_get */
2002     0,                                  /* tp_descr_set */
2003     0,                                  /* tp_dictoffset */
2004     mbstreamwriter_init,                /* tp_init */
2005     0,                                  /* tp_alloc */
2006     mbstreamwriter_new,                 /* tp_new */
2007 };
2008 
2009 
2010 /*[clinic input]
2011 _multibytecodec.__create_codec
2012 
2013     arg: object
2014     /
2015 [clinic start generated code]*/
2016 
2017 static PyObject *
_multibytecodec___create_codec(PyObject * module,PyObject * arg)2018 _multibytecodec___create_codec(PyObject *module, PyObject *arg)
2019 /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
2020 {
2021     MultibyteCodecObject *self;
2022     MultibyteCodec *codec;
2023 
2024     if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
2025         PyErr_SetString(PyExc_ValueError, "argument type invalid");
2026         return NULL;
2027     }
2028 
2029     codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
2030     if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
2031         return NULL;
2032 
2033     self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);
2034     if (self == NULL)
2035         return NULL;
2036     self->codec = codec;
2037 
2038     return (PyObject *)self;
2039 }
2040 
2041 static struct PyMethodDef __methods[] = {
2042     _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
2043     {NULL, NULL},
2044 };
2045 
2046 
2047 static struct PyModuleDef _multibytecodecmodule = {
2048     PyModuleDef_HEAD_INIT,
2049     "_multibytecodec",
2050     NULL,
2051     -1,
2052     __methods,
2053     NULL,
2054     NULL,
2055     NULL,
2056     NULL
2057 };
2058 
2059 PyMODINIT_FUNC
PyInit__multibytecodec(void)2060 PyInit__multibytecodec(void)
2061 {
2062     PyObject *m;
2063     PyTypeObject *typelist[] = {
2064         &MultibyteIncrementalEncoder_Type,
2065         &MultibyteIncrementalDecoder_Type,
2066         &MultibyteStreamReader_Type,
2067         &MultibyteStreamWriter_Type
2068     };
2069 
2070     if (PyType_Ready(&MultibyteCodec_Type) < 0)
2071         return NULL;
2072 
2073     m = PyModule_Create(&_multibytecodecmodule);
2074     if (m == NULL)
2075         return NULL;
2076 
2077     for (size_t i = 0; i < Py_ARRAY_LENGTH(typelist); i++) {
2078         if (PyModule_AddType(m, typelist[i]) < 0) {
2079             return NULL;
2080         }
2081     }
2082 
2083     if (PyErr_Occurred()) {
2084         Py_DECREF(m);
2085         m = NULL;
2086     }
2087     return m;
2088 }
2089