1 /* bytes object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include "internal/mem.h"
7 #include "internal/pystate.h"
8 
9 #include "bytes_methods.h"
10 #include "pystrhex.h"
11 #include <stddef.h>
12 
13 /*[clinic input]
14 class bytes "PyBytesObject *" "&PyBytes_Type"
15 [clinic start generated code]*/
16 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
17 
18 #include "clinic/bytesobject.c.h"
19 
20 #ifdef COUNT_ALLOCS
21 Py_ssize_t null_strings, one_strings;
22 #endif
23 
24 static PyBytesObject *characters[UCHAR_MAX + 1];
25 static PyBytesObject *nullstring;
26 
27 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
28    for a string of length n should request PyBytesObject_SIZE + n bytes.
29 
30    Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31    3 bytes per string allocation on a typical system.
32 */
33 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34 
35 /* Forward declaration */
36 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37                                                    char *str);
38 
39 /*
40    For PyBytes_FromString(), the parameter `str' points to a null-terminated
41    string containing exactly `size' bytes.
42 
43    For PyBytes_FromStringAndSize(), the parameter `str' is
44    either NULL or else points to a string containing at least `size' bytes.
45    For PyBytes_FromStringAndSize(), the string in the `str' parameter does
46    not have to be null-terminated.  (Therefore it is safe to construct a
47    substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
48    If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
49    bytes (setting the last byte to the null terminating character) and you can
50    fill in the data yourself.  If `str' is non-NULL then the resulting
51    PyBytes object must be treated as immutable and you must not fill in nor
52    alter the data yourself, since the strings may be shared.
53 
54    The PyObject member `op->ob_size', which denotes the number of "extra
55    items" in a variable-size object, will contain the number of bytes
56    allocated for string data, not counting the null terminating character.
57    It is therefore equal to the `size' parameter (for
58    PyBytes_FromStringAndSize()) or the length of the string in the `str'
59    parameter (for PyBytes_FromString()).
60 */
61 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)62 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
63 {
64     PyBytesObject *op;
65     assert(size >= 0);
66 
67     if (size == 0 && (op = nullstring) != NULL) {
68 #ifdef COUNT_ALLOCS
69         null_strings++;
70 #endif
71         Py_INCREF(op);
72         return (PyObject *)op;
73     }
74 
75     if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
76         PyErr_SetString(PyExc_OverflowError,
77                         "byte string is too large");
78         return NULL;
79     }
80 
81     /* Inline PyObject_NewVar */
82     if (use_calloc)
83         op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
84     else
85         op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
86     if (op == NULL)
87         return PyErr_NoMemory();
88     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
89     op->ob_shash = -1;
90     if (!use_calloc)
91         op->ob_sval[size] = '\0';
92     /* empty byte string singleton */
93     if (size == 0) {
94         nullstring = op;
95         Py_INCREF(op);
96     }
97     return (PyObject *) op;
98 }
99 
100 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)101 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
102 {
103     PyBytesObject *op;
104     if (size < 0) {
105         PyErr_SetString(PyExc_SystemError,
106             "Negative size passed to PyBytes_FromStringAndSize");
107         return NULL;
108     }
109     if (size == 1 && str != NULL &&
110         (op = characters[*str & UCHAR_MAX]) != NULL)
111     {
112 #ifdef COUNT_ALLOCS
113         one_strings++;
114 #endif
115         Py_INCREF(op);
116         return (PyObject *)op;
117     }
118 
119     op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
120     if (op == NULL)
121         return NULL;
122     if (str == NULL)
123         return (PyObject *) op;
124 
125     memcpy(op->ob_sval, str, size);
126     /* share short strings */
127     if (size == 1) {
128         characters[*str & UCHAR_MAX] = op;
129         Py_INCREF(op);
130     }
131     return (PyObject *) op;
132 }
133 
134 PyObject *
PyBytes_FromString(const char * str)135 PyBytes_FromString(const char *str)
136 {
137     size_t size;
138     PyBytesObject *op;
139 
140     assert(str != NULL);
141     size = strlen(str);
142     if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
143         PyErr_SetString(PyExc_OverflowError,
144             "byte string is too long");
145         return NULL;
146     }
147     if (size == 0 && (op = nullstring) != NULL) {
148 #ifdef COUNT_ALLOCS
149         null_strings++;
150 #endif
151         Py_INCREF(op);
152         return (PyObject *)op;
153     }
154     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
155 #ifdef COUNT_ALLOCS
156         one_strings++;
157 #endif
158         Py_INCREF(op);
159         return (PyObject *)op;
160     }
161 
162     /* Inline PyObject_NewVar */
163     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
164     if (op == NULL)
165         return PyErr_NoMemory();
166     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
167     op->ob_shash = -1;
168     memcpy(op->ob_sval, str, size+1);
169     /* share short strings */
170     if (size == 0) {
171         nullstring = op;
172         Py_INCREF(op);
173     } else if (size == 1) {
174         characters[*str & UCHAR_MAX] = op;
175         Py_INCREF(op);
176     }
177     return (PyObject *) op;
178 }
179 
180 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)181 PyBytes_FromFormatV(const char *format, va_list vargs)
182 {
183     char *s;
184     const char *f;
185     const char *p;
186     Py_ssize_t prec;
187     int longflag;
188     int size_tflag;
189     /* Longest 64-bit formatted numbers:
190        - "18446744073709551615\0" (21 bytes)
191        - "-9223372036854775808\0" (21 bytes)
192        Decimal takes the most space (it isn't enough for octal.)
193 
194        Longest 64-bit pointer representation:
195        "0xffffffffffffffff\0" (19 bytes). */
196     char buffer[21];
197     _PyBytesWriter writer;
198 
199     _PyBytesWriter_Init(&writer);
200 
201     s = _PyBytesWriter_Alloc(&writer, strlen(format));
202     if (s == NULL)
203         return NULL;
204     writer.overallocate = 1;
205 
206 #define WRITE_BYTES(str) \
207     do { \
208         s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
209         if (s == NULL) \
210             goto error; \
211     } while (0)
212 
213     for (f = format; *f; f++) {
214         if (*f != '%') {
215             *s++ = *f;
216             continue;
217         }
218 
219         p = f++;
220 
221         /* ignore the width (ex: 10 in "%10s") */
222         while (Py_ISDIGIT(*f))
223             f++;
224 
225         /* parse the precision (ex: 10 in "%.10s") */
226         prec = 0;
227         if (*f == '.') {
228             f++;
229             for (; Py_ISDIGIT(*f); f++) {
230                 prec = (prec * 10) + (*f - '0');
231             }
232         }
233 
234         while (*f && *f != '%' && !Py_ISALPHA(*f))
235             f++;
236 
237         /* handle the long flag ('l'), but only for %ld and %lu.
238            others can be added when necessary. */
239         longflag = 0;
240         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
241             longflag = 1;
242             ++f;
243         }
244 
245         /* handle the size_t flag ('z'). */
246         size_tflag = 0;
247         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
248             size_tflag = 1;
249             ++f;
250         }
251 
252         /* subtract bytes preallocated for the format string
253            (ex: 2 for "%s") */
254         writer.min_size -= (f - p + 1);
255 
256         switch (*f) {
257         case 'c':
258         {
259             int c = va_arg(vargs, int);
260             if (c < 0 || c > 255) {
261                 PyErr_SetString(PyExc_OverflowError,
262                                 "PyBytes_FromFormatV(): %c format "
263                                 "expects an integer in range [0; 255]");
264                 goto error;
265             }
266             writer.min_size++;
267             *s++ = (unsigned char)c;
268             break;
269         }
270 
271         case 'd':
272             if (longflag)
273                 sprintf(buffer, "%ld", va_arg(vargs, long));
274             else if (size_tflag)
275                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
276                     va_arg(vargs, Py_ssize_t));
277             else
278                 sprintf(buffer, "%d", va_arg(vargs, int));
279             assert(strlen(buffer) < sizeof(buffer));
280             WRITE_BYTES(buffer);
281             break;
282 
283         case 'u':
284             if (longflag)
285                 sprintf(buffer, "%lu",
286                     va_arg(vargs, unsigned long));
287             else if (size_tflag)
288                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
289                     va_arg(vargs, size_t));
290             else
291                 sprintf(buffer, "%u",
292                     va_arg(vargs, unsigned int));
293             assert(strlen(buffer) < sizeof(buffer));
294             WRITE_BYTES(buffer);
295             break;
296 
297         case 'i':
298             sprintf(buffer, "%i", va_arg(vargs, int));
299             assert(strlen(buffer) < sizeof(buffer));
300             WRITE_BYTES(buffer);
301             break;
302 
303         case 'x':
304             sprintf(buffer, "%x", va_arg(vargs, int));
305             assert(strlen(buffer) < sizeof(buffer));
306             WRITE_BYTES(buffer);
307             break;
308 
309         case 's':
310         {
311             Py_ssize_t i;
312 
313             p = va_arg(vargs, const char*);
314             if (prec <= 0) {
315                 i = strlen(p);
316             }
317             else {
318                 i = 0;
319                 while (i < prec && p[i]) {
320                     i++;
321                 }
322             }
323             s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
324             if (s == NULL)
325                 goto error;
326             break;
327         }
328 
329         case 'p':
330             sprintf(buffer, "%p", va_arg(vargs, void*));
331             assert(strlen(buffer) < sizeof(buffer));
332             /* %p is ill-defined:  ensure leading 0x. */
333             if (buffer[1] == 'X')
334                 buffer[1] = 'x';
335             else if (buffer[1] != 'x') {
336                 memmove(buffer+2, buffer, strlen(buffer)+1);
337                 buffer[0] = '0';
338                 buffer[1] = 'x';
339             }
340             WRITE_BYTES(buffer);
341             break;
342 
343         case '%':
344             writer.min_size++;
345             *s++ = '%';
346             break;
347 
348         default:
349             if (*f == 0) {
350                 /* fix min_size if we reached the end of the format string */
351                 writer.min_size++;
352             }
353 
354             /* invalid format string: copy unformatted string and exit */
355             WRITE_BYTES(p);
356             return _PyBytesWriter_Finish(&writer, s);
357         }
358     }
359 
360 #undef WRITE_BYTES
361 
362     return _PyBytesWriter_Finish(&writer, s);
363 
364  error:
365     _PyBytesWriter_Dealloc(&writer);
366     return NULL;
367 }
368 
369 PyObject *
PyBytes_FromFormat(const char * format,...)370 PyBytes_FromFormat(const char *format, ...)
371 {
372     PyObject* ret;
373     va_list vargs;
374 
375 #ifdef HAVE_STDARG_PROTOTYPES
376     va_start(vargs, format);
377 #else
378     va_start(vargs);
379 #endif
380     ret = PyBytes_FromFormatV(format, vargs);
381     va_end(vargs);
382     return ret;
383 }
384 
385 /* Helpers for formatstring */
386 
387 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)388 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
389 {
390     Py_ssize_t argidx = *p_argidx;
391     if (argidx < arglen) {
392         (*p_argidx)++;
393         if (arglen < 0)
394             return args;
395         else
396             return PyTuple_GetItem(args, argidx);
397     }
398     PyErr_SetString(PyExc_TypeError,
399                     "not enough arguments for format string");
400     return NULL;
401 }
402 
403 /* Format codes
404  * F_LJUST      '-'
405  * F_SIGN       '+'
406  * F_BLANK      ' '
407  * F_ALT        '#'
408  * F_ZERO       '0'
409  */
410 #define F_LJUST (1<<0)
411 #define F_SIGN  (1<<1)
412 #define F_BLANK (1<<2)
413 #define F_ALT   (1<<3)
414 #define F_ZERO  (1<<4)
415 
416 /* Returns a new reference to a PyBytes object, or NULL on failure. */
417 
418 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)419 formatfloat(PyObject *v, int flags, int prec, int type,
420             PyObject **p_result, _PyBytesWriter *writer, char *str)
421 {
422     char *p;
423     PyObject *result;
424     double x;
425     size_t len;
426 
427     x = PyFloat_AsDouble(v);
428     if (x == -1.0 && PyErr_Occurred()) {
429         PyErr_Format(PyExc_TypeError, "float argument required, "
430                      "not %.200s", Py_TYPE(v)->tp_name);
431         return NULL;
432     }
433 
434     if (prec < 0)
435         prec = 6;
436 
437     p = PyOS_double_to_string(x, type, prec,
438                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
439 
440     if (p == NULL)
441         return NULL;
442 
443     len = strlen(p);
444     if (writer != NULL) {
445         str = _PyBytesWriter_Prepare(writer, str, len);
446         if (str == NULL)
447             return NULL;
448         memcpy(str, p, len);
449         PyMem_Free(p);
450         str += len;
451         return str;
452     }
453 
454     result = PyBytes_FromStringAndSize(p, len);
455     PyMem_Free(p);
456     *p_result = result;
457     return result != NULL ? str : NULL;
458 }
459 
460 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)461 formatlong(PyObject *v, int flags, int prec, int type)
462 {
463     PyObject *result, *iobj;
464     if (type == 'i')
465         type = 'd';
466     if (PyLong_Check(v))
467         return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
468     if (PyNumber_Check(v)) {
469         /* make sure number is a type of integer for o, x, and X */
470         if (type == 'o' || type == 'x' || type == 'X')
471             iobj = PyNumber_Index(v);
472         else
473             iobj = PyNumber_Long(v);
474         if (iobj == NULL) {
475             if (!PyErr_ExceptionMatches(PyExc_TypeError))
476                 return NULL;
477         }
478         else if (!PyLong_Check(iobj))
479             Py_CLEAR(iobj);
480         if (iobj != NULL) {
481             result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
482             Py_DECREF(iobj);
483             return result;
484         }
485     }
486     PyErr_Format(PyExc_TypeError,
487         "%%%c format: %s is required, not %.200s", type,
488         (type == 'o' || type == 'x' || type == 'X') ? "an integer"
489                                                     : "a number",
490         Py_TYPE(v)->tp_name);
491     return NULL;
492 }
493 
494 static int
byte_converter(PyObject * arg,char * p)495 byte_converter(PyObject *arg, char *p)
496 {
497     if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
498         *p = PyBytes_AS_STRING(arg)[0];
499         return 1;
500     }
501     else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
502         *p = PyByteArray_AS_STRING(arg)[0];
503         return 1;
504     }
505     else {
506         PyObject *iobj;
507         long ival;
508         int overflow;
509         /* make sure number is a type of integer */
510         if (PyLong_Check(arg)) {
511             ival = PyLong_AsLongAndOverflow(arg, &overflow);
512         }
513         else {
514             iobj = PyNumber_Index(arg);
515             if (iobj == NULL) {
516                 if (!PyErr_ExceptionMatches(PyExc_TypeError))
517                     return 0;
518                 goto onError;
519             }
520             ival = PyLong_AsLongAndOverflow(iobj, &overflow);
521             Py_DECREF(iobj);
522         }
523         if (!overflow && ival == -1 && PyErr_Occurred())
524             goto onError;
525         if (overflow || !(0 <= ival && ival <= 255)) {
526             PyErr_SetString(PyExc_OverflowError,
527                             "%c arg not in range(256)");
528             return 0;
529         }
530         *p = (char)ival;
531         return 1;
532     }
533   onError:
534     PyErr_SetString(PyExc_TypeError,
535         "%c requires an integer in range(256) or a single byte");
536     return 0;
537 }
538 
539 static PyObject *_PyBytes_FromBuffer(PyObject *x);
540 
541 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)542 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
543 {
544     PyObject *func, *result;
545     _Py_IDENTIFIER(__bytes__);
546     /* is it a bytes object? */
547     if (PyBytes_Check(v)) {
548         *pbuf = PyBytes_AS_STRING(v);
549         *plen = PyBytes_GET_SIZE(v);
550         Py_INCREF(v);
551         return v;
552     }
553     if (PyByteArray_Check(v)) {
554         *pbuf = PyByteArray_AS_STRING(v);
555         *plen = PyByteArray_GET_SIZE(v);
556         Py_INCREF(v);
557         return v;
558     }
559     /* does it support __bytes__? */
560     func = _PyObject_LookupSpecial(v, &PyId___bytes__);
561     if (func != NULL) {
562         result = _PyObject_CallNoArg(func);
563         Py_DECREF(func);
564         if (result == NULL)
565             return NULL;
566         if (!PyBytes_Check(result)) {
567             PyErr_Format(PyExc_TypeError,
568                          "__bytes__ returned non-bytes (type %.200s)",
569                          Py_TYPE(result)->tp_name);
570             Py_DECREF(result);
571             return NULL;
572         }
573         *pbuf = PyBytes_AS_STRING(result);
574         *plen = PyBytes_GET_SIZE(result);
575         return result;
576     }
577     /* does it support buffer protocol? */
578     if (PyObject_CheckBuffer(v)) {
579         /* maybe we can avoid making a copy of the buffer object here? */
580         result = _PyBytes_FromBuffer(v);
581         if (result == NULL)
582             return NULL;
583         *pbuf = PyBytes_AS_STRING(result);
584         *plen = PyBytes_GET_SIZE(result);
585         return result;
586     }
587     PyErr_Format(PyExc_TypeError,
588                  "%%b requires a bytes-like object, "
589                  "or an object that implements __bytes__, not '%.100s'",
590                  Py_TYPE(v)->tp_name);
591     return NULL;
592 }
593 
594 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
595 
596 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)597 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
598                   PyObject *args, int use_bytearray)
599 {
600     const char *fmt;
601     char *res;
602     Py_ssize_t arglen, argidx;
603     Py_ssize_t fmtcnt;
604     int args_owned = 0;
605     PyObject *dict = NULL;
606     _PyBytesWriter writer;
607 
608     if (args == NULL) {
609         PyErr_BadInternalCall();
610         return NULL;
611     }
612     fmt = format;
613     fmtcnt = format_len;
614 
615     _PyBytesWriter_Init(&writer);
616     writer.use_bytearray = use_bytearray;
617 
618     res = _PyBytesWriter_Alloc(&writer, fmtcnt);
619     if (res == NULL)
620         return NULL;
621     if (!use_bytearray)
622         writer.overallocate = 1;
623 
624     if (PyTuple_Check(args)) {
625         arglen = PyTuple_GET_SIZE(args);
626         argidx = 0;
627     }
628     else {
629         arglen = -1;
630         argidx = -2;
631     }
632     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
633         !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
634         !PyByteArray_Check(args)) {
635             dict = args;
636     }
637 
638     while (--fmtcnt >= 0) {
639         if (*fmt != '%') {
640             Py_ssize_t len;
641             char *pos;
642 
643             pos = (char *)memchr(fmt + 1, '%', fmtcnt);
644             if (pos != NULL)
645                 len = pos - fmt;
646             else
647                 len = fmtcnt + 1;
648             assert(len != 0);
649 
650             memcpy(res, fmt, len);
651             res += len;
652             fmt += len;
653             fmtcnt -= (len - 1);
654         }
655         else {
656             /* Got a format specifier */
657             int flags = 0;
658             Py_ssize_t width = -1;
659             int prec = -1;
660             int c = '\0';
661             int fill;
662             PyObject *v = NULL;
663             PyObject *temp = NULL;
664             const char *pbuf = NULL;
665             int sign;
666             Py_ssize_t len = 0;
667             char onechar; /* For byte_converter() */
668             Py_ssize_t alloc;
669 #ifdef Py_DEBUG
670             char *before;
671 #endif
672 
673             fmt++;
674             if (*fmt == '%') {
675                 *res++ = '%';
676                 fmt++;
677                 fmtcnt--;
678                 continue;
679             }
680             if (*fmt == '(') {
681                 const char *keystart;
682                 Py_ssize_t keylen;
683                 PyObject *key;
684                 int pcount = 1;
685 
686                 if (dict == NULL) {
687                     PyErr_SetString(PyExc_TypeError,
688                              "format requires a mapping");
689                     goto error;
690                 }
691                 ++fmt;
692                 --fmtcnt;
693                 keystart = fmt;
694                 /* Skip over balanced parentheses */
695                 while (pcount > 0 && --fmtcnt >= 0) {
696                     if (*fmt == ')')
697                         --pcount;
698                     else if (*fmt == '(')
699                         ++pcount;
700                     fmt++;
701                 }
702                 keylen = fmt - keystart - 1;
703                 if (fmtcnt < 0 || pcount > 0) {
704                     PyErr_SetString(PyExc_ValueError,
705                                "incomplete format key");
706                     goto error;
707                 }
708                 key = PyBytes_FromStringAndSize(keystart,
709                                                  keylen);
710                 if (key == NULL)
711                     goto error;
712                 if (args_owned) {
713                     Py_DECREF(args);
714                     args_owned = 0;
715                 }
716                 args = PyObject_GetItem(dict, key);
717                 Py_DECREF(key);
718                 if (args == NULL) {
719                     goto error;
720                 }
721                 args_owned = 1;
722                 arglen = -1;
723                 argidx = -2;
724             }
725 
726             /* Parse flags. Example: "%+i" => flags=F_SIGN. */
727             while (--fmtcnt >= 0) {
728                 switch (c = *fmt++) {
729                 case '-': flags |= F_LJUST; continue;
730                 case '+': flags |= F_SIGN; continue;
731                 case ' ': flags |= F_BLANK; continue;
732                 case '#': flags |= F_ALT; continue;
733                 case '0': flags |= F_ZERO; continue;
734                 }
735                 break;
736             }
737 
738             /* Parse width. Example: "%10s" => width=10 */
739             if (c == '*') {
740                 v = getnextarg(args, arglen, &argidx);
741                 if (v == NULL)
742                     goto error;
743                 if (!PyLong_Check(v)) {
744                     PyErr_SetString(PyExc_TypeError,
745                                     "* wants int");
746                     goto error;
747                 }
748                 width = PyLong_AsSsize_t(v);
749                 if (width == -1 && PyErr_Occurred())
750                     goto error;
751                 if (width < 0) {
752                     flags |= F_LJUST;
753                     width = -width;
754                 }
755                 if (--fmtcnt >= 0)
756                     c = *fmt++;
757             }
758             else if (c >= 0 && isdigit(c)) {
759                 width = c - '0';
760                 while (--fmtcnt >= 0) {
761                     c = Py_CHARMASK(*fmt++);
762                     if (!isdigit(c))
763                         break;
764                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
765                         PyErr_SetString(
766                             PyExc_ValueError,
767                             "width too big");
768                         goto error;
769                     }
770                     width = width*10 + (c - '0');
771                 }
772             }
773 
774             /* Parse precision. Example: "%.3f" => prec=3 */
775             if (c == '.') {
776                 prec = 0;
777                 if (--fmtcnt >= 0)
778                     c = *fmt++;
779                 if (c == '*') {
780                     v = getnextarg(args, arglen, &argidx);
781                     if (v == NULL)
782                         goto error;
783                     if (!PyLong_Check(v)) {
784                         PyErr_SetString(
785                             PyExc_TypeError,
786                             "* wants int");
787                         goto error;
788                     }
789                     prec = _PyLong_AsInt(v);
790                     if (prec == -1 && PyErr_Occurred())
791                         goto error;
792                     if (prec < 0)
793                         prec = 0;
794                     if (--fmtcnt >= 0)
795                         c = *fmt++;
796                 }
797                 else if (c >= 0 && isdigit(c)) {
798                     prec = c - '0';
799                     while (--fmtcnt >= 0) {
800                         c = Py_CHARMASK(*fmt++);
801                         if (!isdigit(c))
802                             break;
803                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
804                             PyErr_SetString(
805                                 PyExc_ValueError,
806                                 "prec too big");
807                             goto error;
808                         }
809                         prec = prec*10 + (c - '0');
810                     }
811                 }
812             } /* prec */
813             if (fmtcnt >= 0) {
814                 if (c == 'h' || c == 'l' || c == 'L') {
815                     if (--fmtcnt >= 0)
816                         c = *fmt++;
817                 }
818             }
819             if (fmtcnt < 0) {
820                 PyErr_SetString(PyExc_ValueError,
821                                 "incomplete format");
822                 goto error;
823             }
824             v = getnextarg(args, arglen, &argidx);
825             if (v == NULL)
826                 goto error;
827 
828             if (fmtcnt == 0) {
829                 /* last write: disable writer overallocation */
830                 writer.overallocate = 0;
831             }
832 
833             sign = 0;
834             fill = ' ';
835             switch (c) {
836             case 'r':
837                 // %r is only for 2/3 code; 3 only code should use %a
838             case 'a':
839                 temp = PyObject_ASCII(v);
840                 if (temp == NULL)
841                     goto error;
842                 assert(PyUnicode_IS_ASCII(temp));
843                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
844                 len = PyUnicode_GET_LENGTH(temp);
845                 if (prec >= 0 && len > prec)
846                     len = prec;
847                 break;
848 
849             case 's':
850                 // %s is only for 2/3 code; 3 only code should use %b
851             case 'b':
852                 temp = format_obj(v, &pbuf, &len);
853                 if (temp == NULL)
854                     goto error;
855                 if (prec >= 0 && len > prec)
856                     len = prec;
857                 break;
858 
859             case 'i':
860             case 'd':
861             case 'u':
862             case 'o':
863             case 'x':
864             case 'X':
865                 if (PyLong_CheckExact(v)
866                     && width == -1 && prec == -1
867                     && !(flags & (F_SIGN | F_BLANK))
868                     && c != 'X')
869                 {
870                     /* Fast path */
871                     int alternate = flags & F_ALT;
872                     int base;
873 
874                     switch(c)
875                     {
876                         default:
877                             Py_UNREACHABLE();
878                         case 'd':
879                         case 'i':
880                         case 'u':
881                             base = 10;
882                             break;
883                         case 'o':
884                             base = 8;
885                             break;
886                         case 'x':
887                         case 'X':
888                             base = 16;
889                             break;
890                     }
891 
892                     /* Fast path */
893                     writer.min_size -= 2; /* size preallocated for "%d" */
894                     res = _PyLong_FormatBytesWriter(&writer, res,
895                                                     v, base, alternate);
896                     if (res == NULL)
897                         goto error;
898                     continue;
899                 }
900 
901                 temp = formatlong(v, flags, prec, c);
902                 if (!temp)
903                     goto error;
904                 assert(PyUnicode_IS_ASCII(temp));
905                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
906                 len = PyUnicode_GET_LENGTH(temp);
907                 sign = 1;
908                 if (flags & F_ZERO)
909                     fill = '0';
910                 break;
911 
912             case 'e':
913             case 'E':
914             case 'f':
915             case 'F':
916             case 'g':
917             case 'G':
918                 if (width == -1 && prec == -1
919                     && !(flags & (F_SIGN | F_BLANK)))
920                 {
921                     /* Fast path */
922                     writer.min_size -= 2; /* size preallocated for "%f" */
923                     res = formatfloat(v, flags, prec, c, NULL, &writer, res);
924                     if (res == NULL)
925                         goto error;
926                     continue;
927                 }
928 
929                 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
930                     goto error;
931                 pbuf = PyBytes_AS_STRING(temp);
932                 len = PyBytes_GET_SIZE(temp);
933                 sign = 1;
934                 if (flags & F_ZERO)
935                     fill = '0';
936                 break;
937 
938             case 'c':
939                 pbuf = &onechar;
940                 len = byte_converter(v, &onechar);
941                 if (!len)
942                     goto error;
943                 if (width == -1) {
944                     /* Fast path */
945                     *res++ = onechar;
946                     continue;
947                 }
948                 break;
949 
950             default:
951                 PyErr_Format(PyExc_ValueError,
952                   "unsupported format character '%c' (0x%x) "
953                   "at index %zd",
954                   c, c,
955                   (Py_ssize_t)(fmt - 1 - format));
956                 goto error;
957             }
958 
959             if (sign) {
960                 if (*pbuf == '-' || *pbuf == '+') {
961                     sign = *pbuf++;
962                     len--;
963                 }
964                 else if (flags & F_SIGN)
965                     sign = '+';
966                 else if (flags & F_BLANK)
967                     sign = ' ';
968                 else
969                     sign = 0;
970             }
971             if (width < len)
972                 width = len;
973 
974             alloc = width;
975             if (sign != 0 && len == width)
976                 alloc++;
977             /* 2: size preallocated for %s */
978             if (alloc > 2) {
979                 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
980                 if (res == NULL)
981                     goto error;
982             }
983 #ifdef Py_DEBUG
984             before = res;
985 #endif
986 
987             /* Write the sign if needed */
988             if (sign) {
989                 if (fill != ' ')
990                     *res++ = sign;
991                 if (width > len)
992                     width--;
993             }
994 
995             /* Write the numeric prefix for "x", "X" and "o" formats
996                if the alternate form is used.
997                For example, write "0x" for the "%#x" format. */
998             if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
999                 assert(pbuf[0] == '0');
1000                 assert(pbuf[1] == c);
1001                 if (fill != ' ') {
1002                     *res++ = *pbuf++;
1003                     *res++ = *pbuf++;
1004                 }
1005                 width -= 2;
1006                 if (width < 0)
1007                     width = 0;
1008                 len -= 2;
1009             }
1010 
1011             /* Pad left with the fill character if needed */
1012             if (width > len && !(flags & F_LJUST)) {
1013                 memset(res, fill, width - len);
1014                 res += (width - len);
1015                 width = len;
1016             }
1017 
1018             /* If padding with spaces: write sign if needed and/or numeric
1019                prefix if the alternate form is used */
1020             if (fill == ' ') {
1021                 if (sign)
1022                     *res++ = sign;
1023                 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1024                     assert(pbuf[0] == '0');
1025                     assert(pbuf[1] == c);
1026                     *res++ = *pbuf++;
1027                     *res++ = *pbuf++;
1028                 }
1029             }
1030 
1031             /* Copy bytes */
1032             memcpy(res, pbuf, len);
1033             res += len;
1034 
1035             /* Pad right with the fill character if needed */
1036             if (width > len) {
1037                 memset(res, ' ', width - len);
1038                 res += (width - len);
1039             }
1040 
1041             if (dict && (argidx < arglen)) {
1042                 PyErr_SetString(PyExc_TypeError,
1043                            "not all arguments converted during bytes formatting");
1044                 Py_XDECREF(temp);
1045                 goto error;
1046             }
1047             Py_XDECREF(temp);
1048 
1049 #ifdef Py_DEBUG
1050             /* check that we computed the exact size for this write */
1051             assert((res - before) == alloc);
1052 #endif
1053         } /* '%' */
1054 
1055         /* If overallocation was disabled, ensure that it was the last
1056            write. Otherwise, we missed an optimization */
1057         assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1058     } /* until end */
1059 
1060     if (argidx < arglen && !dict) {
1061         PyErr_SetString(PyExc_TypeError,
1062                         "not all arguments converted during bytes formatting");
1063         goto error;
1064     }
1065 
1066     if (args_owned) {
1067         Py_DECREF(args);
1068     }
1069     return _PyBytesWriter_Finish(&writer, res);
1070 
1071  error:
1072     _PyBytesWriter_Dealloc(&writer);
1073     if (args_owned) {
1074         Py_DECREF(args);
1075     }
1076     return NULL;
1077 }
1078 
1079 /* =-= */
1080 
1081 static void
bytes_dealloc(PyObject * op)1082 bytes_dealloc(PyObject *op)
1083 {
1084     Py_TYPE(op)->tp_free(op);
1085 }
1086 
1087 /* Unescape a backslash-escaped string. If unicode is non-zero,
1088    the string is a u-literal. If recode_encoding is non-zero,
1089    the string is UTF-8 encoded and should be re-encoded in the
1090    specified encoding.  */
1091 
1092 static char *
_PyBytes_DecodeEscapeRecode(const char ** s,const char * end,const char * errors,const char * recode_encoding,_PyBytesWriter * writer,char * p)1093 _PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1094                             const char *errors, const char *recode_encoding,
1095                             _PyBytesWriter *writer, char *p)
1096 {
1097     PyObject *u, *w;
1098     const char* t;
1099 
1100     t = *s;
1101     /* Decode non-ASCII bytes as UTF-8. */
1102     while (t < end && (*t & 0x80))
1103         t++;
1104     u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1105     if (u == NULL)
1106         return NULL;
1107 
1108     /* Recode them in target encoding. */
1109     w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1110     Py_DECREF(u);
1111     if  (w == NULL)
1112         return NULL;
1113     assert(PyBytes_Check(w));
1114 
1115     /* Append bytes to output buffer. */
1116     writer->min_size--;   /* subtract 1 preallocated byte */
1117     p = _PyBytesWriter_WriteBytes(writer, p,
1118                                   PyBytes_AS_STRING(w),
1119                                   PyBytes_GET_SIZE(w));
1120     Py_DECREF(w);
1121     if (p == NULL)
1122         return NULL;
1123 
1124     *s = t;
1125     return p;
1126 }
1127 
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding,const char ** first_invalid_escape)1128 PyObject *_PyBytes_DecodeEscape(const char *s,
1129                                 Py_ssize_t len,
1130                                 const char *errors,
1131                                 Py_ssize_t unicode,
1132                                 const char *recode_encoding,
1133                                 const char **first_invalid_escape)
1134 {
1135     int c;
1136     char *p;
1137     const char *end;
1138     _PyBytesWriter writer;
1139 
1140     _PyBytesWriter_Init(&writer);
1141 
1142     p = _PyBytesWriter_Alloc(&writer, len);
1143     if (p == NULL)
1144         return NULL;
1145     writer.overallocate = 1;
1146 
1147     *first_invalid_escape = NULL;
1148 
1149     end = s + len;
1150     while (s < end) {
1151         if (*s != '\\') {
1152           non_esc:
1153             if (!(recode_encoding && (*s & 0x80))) {
1154                 *p++ = *s++;
1155             }
1156             else {
1157                 /* non-ASCII character and need to recode */
1158                 p = _PyBytes_DecodeEscapeRecode(&s, end,
1159                                                 errors, recode_encoding,
1160                                                 &writer, p);
1161                 if (p == NULL)
1162                     goto failed;
1163             }
1164             continue;
1165         }
1166 
1167         s++;
1168         if (s == end) {
1169             PyErr_SetString(PyExc_ValueError,
1170                             "Trailing \\ in string");
1171             goto failed;
1172         }
1173 
1174         switch (*s++) {
1175         /* XXX This assumes ASCII! */
1176         case '\n': break;
1177         case '\\': *p++ = '\\'; break;
1178         case '\'': *p++ = '\''; break;
1179         case '\"': *p++ = '\"'; break;
1180         case 'b': *p++ = '\b'; break;
1181         case 'f': *p++ = '\014'; break; /* FF */
1182         case 't': *p++ = '\t'; break;
1183         case 'n': *p++ = '\n'; break;
1184         case 'r': *p++ = '\r'; break;
1185         case 'v': *p++ = '\013'; break; /* VT */
1186         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1187         case '0': case '1': case '2': case '3':
1188         case '4': case '5': case '6': case '7':
1189             c = s[-1] - '0';
1190             if (s < end && '0' <= *s && *s <= '7') {
1191                 c = (c<<3) + *s++ - '0';
1192                 if (s < end && '0' <= *s && *s <= '7')
1193                     c = (c<<3) + *s++ - '0';
1194             }
1195             *p++ = c;
1196             break;
1197         case 'x':
1198             if (s+1 < end) {
1199                 int digit1, digit2;
1200                 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1201                 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1202                 if (digit1 < 16 && digit2 < 16) {
1203                     *p++ = (unsigned char)((digit1 << 4) + digit2);
1204                     s += 2;
1205                     break;
1206                 }
1207             }
1208             /* invalid hexadecimal digits */
1209 
1210             if (!errors || strcmp(errors, "strict") == 0) {
1211                 PyErr_Format(PyExc_ValueError,
1212                              "invalid \\x escape at position %d",
1213                              s - 2 - (end - len));
1214                 goto failed;
1215             }
1216             if (strcmp(errors, "replace") == 0) {
1217                 *p++ = '?';
1218             } else if (strcmp(errors, "ignore") == 0)
1219                 /* do nothing */;
1220             else {
1221                 PyErr_Format(PyExc_ValueError,
1222                              "decoding error; unknown "
1223                              "error handling code: %.400s",
1224                              errors);
1225                 goto failed;
1226             }
1227             /* skip \x */
1228             if (s < end && Py_ISXDIGIT(s[0]))
1229                 s++; /* and a hexdigit */
1230             break;
1231 
1232         default:
1233             if (*first_invalid_escape == NULL) {
1234                 *first_invalid_escape = s-1; /* Back up one char, since we've
1235                                                 already incremented s. */
1236             }
1237             *p++ = '\\';
1238             s--;
1239             goto non_esc; /* an arbitrary number of unescaped
1240                              UTF-8 bytes may follow. */
1241         }
1242     }
1243 
1244     return _PyBytesWriter_Finish(&writer, p);
1245 
1246   failed:
1247     _PyBytesWriter_Dealloc(&writer);
1248     return NULL;
1249 }
1250 
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)1251 PyObject *PyBytes_DecodeEscape(const char *s,
1252                                 Py_ssize_t len,
1253                                 const char *errors,
1254                                 Py_ssize_t unicode,
1255                                 const char *recode_encoding)
1256 {
1257     const char* first_invalid_escape;
1258     PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1259                                              recode_encoding,
1260                                              &first_invalid_escape);
1261     if (result == NULL)
1262         return NULL;
1263     if (first_invalid_escape != NULL) {
1264         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1265                              "invalid escape sequence '\\%c'",
1266                              (unsigned char)*first_invalid_escape) < 0) {
1267             Py_DECREF(result);
1268             return NULL;
1269         }
1270     }
1271     return result;
1272 
1273 }
1274 /* -------------------------------------------------------------------- */
1275 /* object api */
1276 
1277 Py_ssize_t
PyBytes_Size(PyObject * op)1278 PyBytes_Size(PyObject *op)
1279 {
1280     if (!PyBytes_Check(op)) {
1281         PyErr_Format(PyExc_TypeError,
1282              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1283         return -1;
1284     }
1285     return Py_SIZE(op);
1286 }
1287 
1288 char *
PyBytes_AsString(PyObject * op)1289 PyBytes_AsString(PyObject *op)
1290 {
1291     if (!PyBytes_Check(op)) {
1292         PyErr_Format(PyExc_TypeError,
1293              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1294         return NULL;
1295     }
1296     return ((PyBytesObject *)op)->ob_sval;
1297 }
1298 
1299 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1300 PyBytes_AsStringAndSize(PyObject *obj,
1301                          char **s,
1302                          Py_ssize_t *len)
1303 {
1304     if (s == NULL) {
1305         PyErr_BadInternalCall();
1306         return -1;
1307     }
1308 
1309     if (!PyBytes_Check(obj)) {
1310         PyErr_Format(PyExc_TypeError,
1311              "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1312         return -1;
1313     }
1314 
1315     *s = PyBytes_AS_STRING(obj);
1316     if (len != NULL)
1317         *len = PyBytes_GET_SIZE(obj);
1318     else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1319         PyErr_SetString(PyExc_ValueError,
1320                         "embedded null byte");
1321         return -1;
1322     }
1323     return 0;
1324 }
1325 
1326 /* -------------------------------------------------------------------- */
1327 /* Methods */
1328 
1329 #include "stringlib/stringdefs.h"
1330 
1331 #include "stringlib/fastsearch.h"
1332 #include "stringlib/count.h"
1333 #include "stringlib/find.h"
1334 #include "stringlib/join.h"
1335 #include "stringlib/partition.h"
1336 #include "stringlib/split.h"
1337 #include "stringlib/ctype.h"
1338 
1339 #include "stringlib/transmogrify.h"
1340 
1341 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1342 PyBytes_Repr(PyObject *obj, int smartquotes)
1343 {
1344     PyBytesObject* op = (PyBytesObject*) obj;
1345     Py_ssize_t i, length = Py_SIZE(op);
1346     Py_ssize_t newsize, squotes, dquotes;
1347     PyObject *v;
1348     unsigned char quote, *s, *p;
1349 
1350     /* Compute size of output string */
1351     squotes = dquotes = 0;
1352     newsize = 3; /* b'' */
1353     s = (unsigned char*)op->ob_sval;
1354     for (i = 0; i < length; i++) {
1355         Py_ssize_t incr = 1;
1356         switch(s[i]) {
1357         case '\'': squotes++; break;
1358         case '"':  dquotes++; break;
1359         case '\\': case '\t': case '\n': case '\r':
1360             incr = 2; break; /* \C */
1361         default:
1362             if (s[i] < ' ' || s[i] >= 0x7f)
1363                 incr = 4; /* \xHH */
1364         }
1365         if (newsize > PY_SSIZE_T_MAX - incr)
1366             goto overflow;
1367         newsize += incr;
1368     }
1369     quote = '\'';
1370     if (smartquotes && squotes && !dquotes)
1371         quote = '"';
1372     if (squotes && quote == '\'') {
1373         if (newsize > PY_SSIZE_T_MAX - squotes)
1374             goto overflow;
1375         newsize += squotes;
1376     }
1377 
1378     v = PyUnicode_New(newsize, 127);
1379     if (v == NULL) {
1380         return NULL;
1381     }
1382     p = PyUnicode_1BYTE_DATA(v);
1383 
1384     *p++ = 'b', *p++ = quote;
1385     for (i = 0; i < length; i++) {
1386         unsigned char c = op->ob_sval[i];
1387         if (c == quote || c == '\\')
1388             *p++ = '\\', *p++ = c;
1389         else if (c == '\t')
1390             *p++ = '\\', *p++ = 't';
1391         else if (c == '\n')
1392             *p++ = '\\', *p++ = 'n';
1393         else if (c == '\r')
1394             *p++ = '\\', *p++ = 'r';
1395         else if (c < ' ' || c >= 0x7f) {
1396             *p++ = '\\';
1397             *p++ = 'x';
1398             *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1399             *p++ = Py_hexdigits[c & 0xf];
1400         }
1401         else
1402             *p++ = c;
1403     }
1404     *p++ = quote;
1405     assert(_PyUnicode_CheckConsistency(v, 1));
1406     return v;
1407 
1408   overflow:
1409     PyErr_SetString(PyExc_OverflowError,
1410                     "bytes object is too large to make repr");
1411     return NULL;
1412 }
1413 
1414 static PyObject *
bytes_repr(PyObject * op)1415 bytes_repr(PyObject *op)
1416 {
1417     return PyBytes_Repr(op, 1);
1418 }
1419 
1420 static PyObject *
bytes_str(PyObject * op)1421 bytes_str(PyObject *op)
1422 {
1423     if (Py_BytesWarningFlag) {
1424         if (PyErr_WarnEx(PyExc_BytesWarning,
1425                          "str() on a bytes instance", 1))
1426             return NULL;
1427     }
1428     return bytes_repr(op);
1429 }
1430 
1431 static Py_ssize_t
bytes_length(PyBytesObject * a)1432 bytes_length(PyBytesObject *a)
1433 {
1434     return Py_SIZE(a);
1435 }
1436 
1437 /* This is also used by PyBytes_Concat() */
1438 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1439 bytes_concat(PyObject *a, PyObject *b)
1440 {
1441     Py_buffer va, vb;
1442     PyObject *result = NULL;
1443 
1444     va.len = -1;
1445     vb.len = -1;
1446     if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1447         PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1448         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1449                      Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1450         goto done;
1451     }
1452 
1453     /* Optimize end cases */
1454     if (va.len == 0 && PyBytes_CheckExact(b)) {
1455         result = b;
1456         Py_INCREF(result);
1457         goto done;
1458     }
1459     if (vb.len == 0 && PyBytes_CheckExact(a)) {
1460         result = a;
1461         Py_INCREF(result);
1462         goto done;
1463     }
1464 
1465     if (va.len > PY_SSIZE_T_MAX - vb.len) {
1466         PyErr_NoMemory();
1467         goto done;
1468     }
1469 
1470     result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1471     if (result != NULL) {
1472         memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1473         memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1474     }
1475 
1476   done:
1477     if (va.len != -1)
1478         PyBuffer_Release(&va);
1479     if (vb.len != -1)
1480         PyBuffer_Release(&vb);
1481     return result;
1482 }
1483 
1484 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1485 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1486 {
1487     Py_ssize_t i;
1488     Py_ssize_t j;
1489     Py_ssize_t size;
1490     PyBytesObject *op;
1491     size_t nbytes;
1492     if (n < 0)
1493         n = 0;
1494     /* watch out for overflows:  the size can overflow int,
1495      * and the # of bytes needed can overflow size_t
1496      */
1497     if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1498         PyErr_SetString(PyExc_OverflowError,
1499             "repeated bytes are too long");
1500         return NULL;
1501     }
1502     size = Py_SIZE(a) * n;
1503     if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1504         Py_INCREF(a);
1505         return (PyObject *)a;
1506     }
1507     nbytes = (size_t)size;
1508     if (nbytes + PyBytesObject_SIZE <= nbytes) {
1509         PyErr_SetString(PyExc_OverflowError,
1510             "repeated bytes are too long");
1511         return NULL;
1512     }
1513     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1514     if (op == NULL)
1515         return PyErr_NoMemory();
1516     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1517     op->ob_shash = -1;
1518     op->ob_sval[size] = '\0';
1519     if (Py_SIZE(a) == 1 && n > 0) {
1520         memset(op->ob_sval, a->ob_sval[0] , n);
1521         return (PyObject *) op;
1522     }
1523     i = 0;
1524     if (i < size) {
1525         memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1526         i = Py_SIZE(a);
1527     }
1528     while (i < size) {
1529         j = (i <= size-i)  ?  i  :  size-i;
1530         memcpy(op->ob_sval+i, op->ob_sval, j);
1531         i += j;
1532     }
1533     return (PyObject *) op;
1534 }
1535 
1536 static int
bytes_contains(PyObject * self,PyObject * arg)1537 bytes_contains(PyObject *self, PyObject *arg)
1538 {
1539     return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1540 }
1541 
1542 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1543 bytes_item(PyBytesObject *a, Py_ssize_t i)
1544 {
1545     if (i < 0 || i >= Py_SIZE(a)) {
1546         PyErr_SetString(PyExc_IndexError, "index out of range");
1547         return NULL;
1548     }
1549     return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1550 }
1551 
1552 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1553 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1554 {
1555     int cmp;
1556     Py_ssize_t len;
1557 
1558     len = Py_SIZE(a);
1559     if (Py_SIZE(b) != len)
1560         return 0;
1561 
1562     if (a->ob_sval[0] != b->ob_sval[0])
1563         return 0;
1564 
1565     cmp = memcmp(a->ob_sval, b->ob_sval, len);
1566     return (cmp == 0);
1567 }
1568 
1569 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1570 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1571 {
1572     int c;
1573     Py_ssize_t len_a, len_b;
1574     Py_ssize_t min_len;
1575     int rc;
1576 
1577     /* Make sure both arguments are strings. */
1578     if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1579         if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1580             rc = PyObject_IsInstance((PyObject*)a,
1581                                      (PyObject*)&PyUnicode_Type);
1582             if (!rc)
1583                 rc = PyObject_IsInstance((PyObject*)b,
1584                                          (PyObject*)&PyUnicode_Type);
1585             if (rc < 0)
1586                 return NULL;
1587             if (rc) {
1588                 if (PyErr_WarnEx(PyExc_BytesWarning,
1589                                  "Comparison between bytes and string", 1))
1590                     return NULL;
1591             }
1592             else {
1593                 rc = PyObject_IsInstance((PyObject*)a,
1594                                          (PyObject*)&PyLong_Type);
1595                 if (!rc)
1596                     rc = PyObject_IsInstance((PyObject*)b,
1597                                              (PyObject*)&PyLong_Type);
1598                 if (rc < 0)
1599                     return NULL;
1600                 if (rc) {
1601                     if (PyErr_WarnEx(PyExc_BytesWarning,
1602                                      "Comparison between bytes and int", 1))
1603                         return NULL;
1604                 }
1605             }
1606         }
1607         Py_RETURN_NOTIMPLEMENTED;
1608     }
1609     else if (a == b) {
1610         switch (op) {
1611         case Py_EQ:
1612         case Py_LE:
1613         case Py_GE:
1614             /* a string is equal to itself */
1615             Py_RETURN_TRUE;
1616             break;
1617         case Py_NE:
1618         case Py_LT:
1619         case Py_GT:
1620             Py_RETURN_FALSE;
1621             break;
1622         default:
1623             PyErr_BadArgument();
1624             return NULL;
1625         }
1626     }
1627     else if (op == Py_EQ || op == Py_NE) {
1628         int eq = bytes_compare_eq(a, b);
1629         eq ^= (op == Py_NE);
1630         return PyBool_FromLong(eq);
1631     }
1632     else {
1633         len_a = Py_SIZE(a);
1634         len_b = Py_SIZE(b);
1635         min_len = Py_MIN(len_a, len_b);
1636         if (min_len > 0) {
1637             c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1638             if (c == 0)
1639                 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1640         }
1641         else
1642             c = 0;
1643         if (c != 0)
1644             Py_RETURN_RICHCOMPARE(c, 0, op);
1645         Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1646     }
1647 }
1648 
1649 static Py_hash_t
bytes_hash(PyBytesObject * a)1650 bytes_hash(PyBytesObject *a)
1651 {
1652     if (a->ob_shash == -1) {
1653         /* Can't fail */
1654         a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1655     }
1656     return a->ob_shash;
1657 }
1658 
1659 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1660 bytes_subscript(PyBytesObject* self, PyObject* item)
1661 {
1662     if (PyIndex_Check(item)) {
1663         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1664         if (i == -1 && PyErr_Occurred())
1665             return NULL;
1666         if (i < 0)
1667             i += PyBytes_GET_SIZE(self);
1668         if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1669             PyErr_SetString(PyExc_IndexError,
1670                             "index out of range");
1671             return NULL;
1672         }
1673         return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1674     }
1675     else if (PySlice_Check(item)) {
1676         Py_ssize_t start, stop, step, slicelength, cur, i;
1677         char* source_buf;
1678         char* result_buf;
1679         PyObject* result;
1680 
1681         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1682             return NULL;
1683         }
1684         slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1685                                             &stop, step);
1686 
1687         if (slicelength <= 0) {
1688             return PyBytes_FromStringAndSize("", 0);
1689         }
1690         else if (start == 0 && step == 1 &&
1691                  slicelength == PyBytes_GET_SIZE(self) &&
1692                  PyBytes_CheckExact(self)) {
1693             Py_INCREF(self);
1694             return (PyObject *)self;
1695         }
1696         else if (step == 1) {
1697             return PyBytes_FromStringAndSize(
1698                 PyBytes_AS_STRING(self) + start,
1699                 slicelength);
1700         }
1701         else {
1702             source_buf = PyBytes_AS_STRING(self);
1703             result = PyBytes_FromStringAndSize(NULL, slicelength);
1704             if (result == NULL)
1705                 return NULL;
1706 
1707             result_buf = PyBytes_AS_STRING(result);
1708             for (cur = start, i = 0; i < slicelength;
1709                  cur += step, i++) {
1710                 result_buf[i] = source_buf[cur];
1711             }
1712 
1713             return result;
1714         }
1715     }
1716     else {
1717         PyErr_Format(PyExc_TypeError,
1718                      "byte indices must be integers or slices, not %.200s",
1719                      Py_TYPE(item)->tp_name);
1720         return NULL;
1721     }
1722 }
1723 
1724 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1725 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1726 {
1727     return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1728                              1, flags);
1729 }
1730 
1731 static PySequenceMethods bytes_as_sequence = {
1732     (lenfunc)bytes_length, /*sq_length*/
1733     (binaryfunc)bytes_concat, /*sq_concat*/
1734     (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1735     (ssizeargfunc)bytes_item, /*sq_item*/
1736     0,                  /*sq_slice*/
1737     0,                  /*sq_ass_item*/
1738     0,                  /*sq_ass_slice*/
1739     (objobjproc)bytes_contains /*sq_contains*/
1740 };
1741 
1742 static PyMappingMethods bytes_as_mapping = {
1743     (lenfunc)bytes_length,
1744     (binaryfunc)bytes_subscript,
1745     0,
1746 };
1747 
1748 static PyBufferProcs bytes_as_buffer = {
1749     (getbufferproc)bytes_buffer_getbuffer,
1750     NULL,
1751 };
1752 
1753 
1754 #define LEFTSTRIP 0
1755 #define RIGHTSTRIP 1
1756 #define BOTHSTRIP 2
1757 
1758 /*[clinic input]
1759 bytes.split
1760 
1761     sep: object = None
1762         The delimiter according which to split the bytes.
1763         None (the default value) means split on ASCII whitespace characters
1764         (space, tab, return, newline, formfeed, vertical tab).
1765     maxsplit: Py_ssize_t = -1
1766         Maximum number of splits to do.
1767         -1 (the default value) means no limit.
1768 
1769 Return a list of the sections in the bytes, using sep as the delimiter.
1770 [clinic start generated code]*/
1771 
1772 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1773 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1774 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1775 {
1776     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1777     const char *s = PyBytes_AS_STRING(self), *sub;
1778     Py_buffer vsub;
1779     PyObject *list;
1780 
1781     if (maxsplit < 0)
1782         maxsplit = PY_SSIZE_T_MAX;
1783     if (sep == Py_None)
1784         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1785     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1786         return NULL;
1787     sub = vsub.buf;
1788     n = vsub.len;
1789 
1790     list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1791     PyBuffer_Release(&vsub);
1792     return list;
1793 }
1794 
1795 /*[clinic input]
1796 bytes.partition
1797 
1798     sep: Py_buffer
1799     /
1800 
1801 Partition the bytes into three parts using the given separator.
1802 
1803 This will search for the separator sep in the bytes. If the separator is found,
1804 returns a 3-tuple containing the part before the separator, the separator
1805 itself, and the part after it.
1806 
1807 If the separator is not found, returns a 3-tuple containing the original bytes
1808 object and two empty bytes objects.
1809 [clinic start generated code]*/
1810 
1811 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1812 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1813 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1814 {
1815     return stringlib_partition(
1816         (PyObject*) self,
1817         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1818         sep->obj, (const char *)sep->buf, sep->len
1819         );
1820 }
1821 
1822 /*[clinic input]
1823 bytes.rpartition
1824 
1825     sep: Py_buffer
1826     /
1827 
1828 Partition the bytes into three parts using the given separator.
1829 
1830 This will search for the separator sep in the bytes, starting at the end. If
1831 the separator is found, returns a 3-tuple containing the part before the
1832 separator, the separator itself, and the part after it.
1833 
1834 If the separator is not found, returns a 3-tuple containing two empty bytes
1835 objects and the original bytes object.
1836 [clinic start generated code]*/
1837 
1838 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1839 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1840 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1841 {
1842     return stringlib_rpartition(
1843         (PyObject*) self,
1844         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1845         sep->obj, (const char *)sep->buf, sep->len
1846         );
1847 }
1848 
1849 /*[clinic input]
1850 bytes.rsplit = bytes.split
1851 
1852 Return a list of the sections in the bytes, using sep as the delimiter.
1853 
1854 Splitting is done starting at the end of the bytes and working to the front.
1855 [clinic start generated code]*/
1856 
1857 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1858 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1859 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1860 {
1861     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1862     const char *s = PyBytes_AS_STRING(self), *sub;
1863     Py_buffer vsub;
1864     PyObject *list;
1865 
1866     if (maxsplit < 0)
1867         maxsplit = PY_SSIZE_T_MAX;
1868     if (sep == Py_None)
1869         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1870     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1871         return NULL;
1872     sub = vsub.buf;
1873     n = vsub.len;
1874 
1875     list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1876     PyBuffer_Release(&vsub);
1877     return list;
1878 }
1879 
1880 
1881 /*[clinic input]
1882 bytes.join
1883 
1884     iterable_of_bytes: object
1885     /
1886 
1887 Concatenate any number of bytes objects.
1888 
1889 The bytes whose method is called is inserted in between each pair.
1890 
1891 The result is returned as a new bytes object.
1892 
1893 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1894 [clinic start generated code]*/
1895 
1896 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1897 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1898 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1899 {
1900     return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1901 }
1902 
1903 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1904 _PyBytes_Join(PyObject *sep, PyObject *x)
1905 {
1906     assert(sep != NULL && PyBytes_Check(sep));
1907     assert(x != NULL);
1908     return bytes_join((PyBytesObject*)sep, x);
1909 }
1910 
1911 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1912 bytes_find(PyBytesObject *self, PyObject *args)
1913 {
1914     return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1915 }
1916 
1917 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1918 bytes_index(PyBytesObject *self, PyObject *args)
1919 {
1920     return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1921 }
1922 
1923 
1924 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1925 bytes_rfind(PyBytesObject *self, PyObject *args)
1926 {
1927     return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1928 }
1929 
1930 
1931 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1932 bytes_rindex(PyBytesObject *self, PyObject *args)
1933 {
1934     return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1935 }
1936 
1937 
1938 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1939 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1940 {
1941     Py_buffer vsep;
1942     char *s = PyBytes_AS_STRING(self);
1943     Py_ssize_t len = PyBytes_GET_SIZE(self);
1944     char *sep;
1945     Py_ssize_t seplen;
1946     Py_ssize_t i, j;
1947 
1948     if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1949         return NULL;
1950     sep = vsep.buf;
1951     seplen = vsep.len;
1952 
1953     i = 0;
1954     if (striptype != RIGHTSTRIP) {
1955         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1956             i++;
1957         }
1958     }
1959 
1960     j = len;
1961     if (striptype != LEFTSTRIP) {
1962         do {
1963             j--;
1964         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1965         j++;
1966     }
1967 
1968     PyBuffer_Release(&vsep);
1969 
1970     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1971         Py_INCREF(self);
1972         return (PyObject*)self;
1973     }
1974     else
1975         return PyBytes_FromStringAndSize(s+i, j-i);
1976 }
1977 
1978 
1979 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1980 do_strip(PyBytesObject *self, int striptype)
1981 {
1982     char *s = PyBytes_AS_STRING(self);
1983     Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1984 
1985     i = 0;
1986     if (striptype != RIGHTSTRIP) {
1987         while (i < len && Py_ISSPACE(s[i])) {
1988             i++;
1989         }
1990     }
1991 
1992     j = len;
1993     if (striptype != LEFTSTRIP) {
1994         do {
1995             j--;
1996         } while (j >= i && Py_ISSPACE(s[j]));
1997         j++;
1998     }
1999 
2000     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2001         Py_INCREF(self);
2002         return (PyObject*)self;
2003     }
2004     else
2005         return PyBytes_FromStringAndSize(s+i, j-i);
2006 }
2007 
2008 
2009 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)2010 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2011 {
2012     if (bytes != NULL && bytes != Py_None) {
2013         return do_xstrip(self, striptype, bytes);
2014     }
2015     return do_strip(self, striptype);
2016 }
2017 
2018 /*[clinic input]
2019 bytes.strip
2020 
2021     bytes: object = None
2022     /
2023 
2024 Strip leading and trailing bytes contained in the argument.
2025 
2026 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2027 [clinic start generated code]*/
2028 
2029 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)2030 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2031 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2032 {
2033     return do_argstrip(self, BOTHSTRIP, bytes);
2034 }
2035 
2036 /*[clinic input]
2037 bytes.lstrip
2038 
2039     bytes: object = None
2040     /
2041 
2042 Strip leading bytes contained in the argument.
2043 
2044 If the argument is omitted or None, strip leading  ASCII whitespace.
2045 [clinic start generated code]*/
2046 
2047 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)2048 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2049 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2050 {
2051     return do_argstrip(self, LEFTSTRIP, bytes);
2052 }
2053 
2054 /*[clinic input]
2055 bytes.rstrip
2056 
2057     bytes: object = None
2058     /
2059 
2060 Strip trailing bytes contained in the argument.
2061 
2062 If the argument is omitted or None, strip trailing ASCII whitespace.
2063 [clinic start generated code]*/
2064 
2065 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2066 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2067 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2068 {
2069     return do_argstrip(self, RIGHTSTRIP, bytes);
2070 }
2071 
2072 
2073 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2074 bytes_count(PyBytesObject *self, PyObject *args)
2075 {
2076     return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2077 }
2078 
2079 
2080 /*[clinic input]
2081 bytes.translate
2082 
2083     table: object
2084         Translation table, which must be a bytes object of length 256.
2085     /
2086     delete as deletechars: object(c_default="NULL") = b''
2087 
2088 Return a copy with each character mapped by the given translation table.
2089 
2090 All characters occurring in the optional argument delete are removed.
2091 The remaining characters are mapped through the given translation table.
2092 [clinic start generated code]*/
2093 
2094 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2095 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2096                      PyObject *deletechars)
2097 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2098 {
2099     char *input, *output;
2100     Py_buffer table_view = {NULL, NULL};
2101     Py_buffer del_table_view = {NULL, NULL};
2102     const char *table_chars;
2103     Py_ssize_t i, c, changed = 0;
2104     PyObject *input_obj = (PyObject*)self;
2105     const char *output_start, *del_table_chars=NULL;
2106     Py_ssize_t inlen, tablen, dellen = 0;
2107     PyObject *result;
2108     int trans_table[256];
2109 
2110     if (PyBytes_Check(table)) {
2111         table_chars = PyBytes_AS_STRING(table);
2112         tablen = PyBytes_GET_SIZE(table);
2113     }
2114     else if (table == Py_None) {
2115         table_chars = NULL;
2116         tablen = 256;
2117     }
2118     else {
2119         if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2120             return NULL;
2121         table_chars = table_view.buf;
2122         tablen = table_view.len;
2123     }
2124 
2125     if (tablen != 256) {
2126         PyErr_SetString(PyExc_ValueError,
2127           "translation table must be 256 characters long");
2128         PyBuffer_Release(&table_view);
2129         return NULL;
2130     }
2131 
2132     if (deletechars != NULL) {
2133         if (PyBytes_Check(deletechars)) {
2134             del_table_chars = PyBytes_AS_STRING(deletechars);
2135             dellen = PyBytes_GET_SIZE(deletechars);
2136         }
2137         else {
2138             if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2139                 PyBuffer_Release(&table_view);
2140                 return NULL;
2141             }
2142             del_table_chars = del_table_view.buf;
2143             dellen = del_table_view.len;
2144         }
2145     }
2146     else {
2147         del_table_chars = NULL;
2148         dellen = 0;
2149     }
2150 
2151     inlen = PyBytes_GET_SIZE(input_obj);
2152     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2153     if (result == NULL) {
2154         PyBuffer_Release(&del_table_view);
2155         PyBuffer_Release(&table_view);
2156         return NULL;
2157     }
2158     output_start = output = PyBytes_AS_STRING(result);
2159     input = PyBytes_AS_STRING(input_obj);
2160 
2161     if (dellen == 0 && table_chars != NULL) {
2162         /* If no deletions are required, use faster code */
2163         for (i = inlen; --i >= 0; ) {
2164             c = Py_CHARMASK(*input++);
2165             if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2166                 changed = 1;
2167         }
2168         if (!changed && PyBytes_CheckExact(input_obj)) {
2169             Py_INCREF(input_obj);
2170             Py_DECREF(result);
2171             result = input_obj;
2172         }
2173         PyBuffer_Release(&del_table_view);
2174         PyBuffer_Release(&table_view);
2175         return result;
2176     }
2177 
2178     if (table_chars == NULL) {
2179         for (i = 0; i < 256; i++)
2180             trans_table[i] = Py_CHARMASK(i);
2181     } else {
2182         for (i = 0; i < 256; i++)
2183             trans_table[i] = Py_CHARMASK(table_chars[i]);
2184     }
2185     PyBuffer_Release(&table_view);
2186 
2187     for (i = 0; i < dellen; i++)
2188         trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2189     PyBuffer_Release(&del_table_view);
2190 
2191     for (i = inlen; --i >= 0; ) {
2192         c = Py_CHARMASK(*input++);
2193         if (trans_table[c] != -1)
2194             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2195                 continue;
2196         changed = 1;
2197     }
2198     if (!changed && PyBytes_CheckExact(input_obj)) {
2199         Py_DECREF(result);
2200         Py_INCREF(input_obj);
2201         return input_obj;
2202     }
2203     /* Fix the size of the resulting string */
2204     if (inlen > 0)
2205         _PyBytes_Resize(&result, output - output_start);
2206     return result;
2207 }
2208 
2209 
2210 /*[clinic input]
2211 
2212 @staticmethod
2213 bytes.maketrans
2214 
2215     frm: Py_buffer
2216     to: Py_buffer
2217     /
2218 
2219 Return a translation table useable for the bytes or bytearray translate method.
2220 
2221 The returned table will be one where each byte in frm is mapped to the byte at
2222 the same position in to.
2223 
2224 The bytes objects frm and to must be of the same length.
2225 [clinic start generated code]*/
2226 
2227 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2228 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2229 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2230 {
2231     return _Py_bytes_maketrans(frm, to);
2232 }
2233 
2234 
2235 /*[clinic input]
2236 bytes.replace
2237 
2238     old: Py_buffer
2239     new: Py_buffer
2240     count: Py_ssize_t = -1
2241         Maximum number of occurrences to replace.
2242         -1 (the default value) means replace all occurrences.
2243     /
2244 
2245 Return a copy with all occurrences of substring old replaced by new.
2246 
2247 If the optional argument count is given, only the first count occurrences are
2248 replaced.
2249 [clinic start generated code]*/
2250 
2251 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2252 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2253                    Py_ssize_t count)
2254 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2255 {
2256     return stringlib_replace((PyObject *)self,
2257                              (const char *)old->buf, old->len,
2258                              (const char *)new->buf, new->len, count);
2259 }
2260 
2261 /** End DALKE **/
2262 
2263 
2264 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2265 bytes_startswith(PyBytesObject *self, PyObject *args)
2266 {
2267     return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2268 }
2269 
2270 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2271 bytes_endswith(PyBytesObject *self, PyObject *args)
2272 {
2273     return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2274 }
2275 
2276 
2277 /*[clinic input]
2278 bytes.decode
2279 
2280     encoding: str(c_default="NULL") = 'utf-8'
2281         The encoding with which to decode the bytes.
2282     errors: str(c_default="NULL") = 'strict'
2283         The error handling scheme to use for the handling of decoding errors.
2284         The default is 'strict' meaning that decoding errors raise a
2285         UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2286         as well as any other name registered with codecs.register_error that
2287         can handle UnicodeDecodeErrors.
2288 
2289 Decode the bytes using the codec registered for encoding.
2290 [clinic start generated code]*/
2291 
2292 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2293 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2294                   const char *errors)
2295 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2296 {
2297     return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2298 }
2299 
2300 
2301 /*[clinic input]
2302 bytes.splitlines
2303 
2304     keepends: bool(accept={int}) = False
2305 
2306 Return a list of the lines in the bytes, breaking at line boundaries.
2307 
2308 Line breaks are not included in the resulting list unless keepends is given and
2309 true.
2310 [clinic start generated code]*/
2311 
2312 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2313 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2314 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2315 {
2316     return stringlib_splitlines(
2317         (PyObject*) self, PyBytes_AS_STRING(self),
2318         PyBytes_GET_SIZE(self), keepends
2319         );
2320 }
2321 
2322 /*[clinic input]
2323 @classmethod
2324 bytes.fromhex
2325 
2326     string: unicode
2327     /
2328 
2329 Create a bytes object from a string of hexadecimal numbers.
2330 
2331 Spaces between two numbers are accepted.
2332 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2333 [clinic start generated code]*/
2334 
2335 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2336 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2337 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2338 {
2339     PyObject *result = _PyBytes_FromHex(string, 0);
2340     if (type != &PyBytes_Type && result != NULL) {
2341         Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2342                                                        result, NULL));
2343     }
2344     return result;
2345 }
2346 
2347 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2348 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2349 {
2350     char *buf;
2351     Py_ssize_t hexlen, invalid_char;
2352     unsigned int top, bot;
2353     Py_UCS1 *str, *end;
2354     _PyBytesWriter writer;
2355 
2356     _PyBytesWriter_Init(&writer);
2357     writer.use_bytearray = use_bytearray;
2358 
2359     assert(PyUnicode_Check(string));
2360     if (PyUnicode_READY(string))
2361         return NULL;
2362     hexlen = PyUnicode_GET_LENGTH(string);
2363 
2364     if (!PyUnicode_IS_ASCII(string)) {
2365         void *data = PyUnicode_DATA(string);
2366         unsigned int kind = PyUnicode_KIND(string);
2367         Py_ssize_t i;
2368 
2369         /* search for the first non-ASCII character */
2370         for (i = 0; i < hexlen; i++) {
2371             if (PyUnicode_READ(kind, data, i) >= 128)
2372                 break;
2373         }
2374         invalid_char = i;
2375         goto error;
2376     }
2377 
2378     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2379     str = PyUnicode_1BYTE_DATA(string);
2380 
2381     /* This overestimates if there are spaces */
2382     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2383     if (buf == NULL)
2384         return NULL;
2385 
2386     end = str + hexlen;
2387     while (str < end) {
2388         /* skip over spaces in the input */
2389         if (Py_ISSPACE(*str)) {
2390             do {
2391                 str++;
2392             } while (Py_ISSPACE(*str));
2393             if (str >= end)
2394                 break;
2395         }
2396 
2397         top = _PyLong_DigitValue[*str];
2398         if (top >= 16) {
2399             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2400             goto error;
2401         }
2402         str++;
2403 
2404         bot = _PyLong_DigitValue[*str];
2405         if (bot >= 16) {
2406             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2407             goto error;
2408         }
2409         str++;
2410 
2411         *buf++ = (unsigned char)((top << 4) + bot);
2412     }
2413 
2414     return _PyBytesWriter_Finish(&writer, buf);
2415 
2416   error:
2417     PyErr_Format(PyExc_ValueError,
2418                  "non-hexadecimal number found in "
2419                  "fromhex() arg at position %zd", invalid_char);
2420     _PyBytesWriter_Dealloc(&writer);
2421     return NULL;
2422 }
2423 
2424 PyDoc_STRVAR(hex__doc__,
2425 "B.hex() -> string\n\
2426 \n\
2427 Create a string of hexadecimal numbers from a bytes object.\n\
2428 Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2429 
2430 static PyObject *
bytes_hex(PyBytesObject * self)2431 bytes_hex(PyBytesObject *self)
2432 {
2433     char* argbuf = PyBytes_AS_STRING(self);
2434     Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2435     return _Py_strhex(argbuf, arglen);
2436 }
2437 
2438 static PyObject *
bytes_getnewargs(PyBytesObject * v)2439 bytes_getnewargs(PyBytesObject *v)
2440 {
2441     return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2442 }
2443 
2444 
2445 static PyMethodDef
2446 bytes_methods[] = {
2447     {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
2448     {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2449      _Py_capitalize__doc__},
2450     {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2451      _Py_center__doc__},
2452     {"count", (PyCFunction)bytes_count, METH_VARARGS,
2453      _Py_count__doc__},
2454     BYTES_DECODE_METHODDEF
2455     {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2456      _Py_endswith__doc__},
2457     {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
2458      _Py_expandtabs__doc__},
2459     {"find", (PyCFunction)bytes_find, METH_VARARGS,
2460      _Py_find__doc__},
2461     BYTES_FROMHEX_METHODDEF
2462     {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2463     {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2464     {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2465      _Py_isalnum__doc__},
2466     {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2467      _Py_isalpha__doc__},
2468     {"isascii", (PyCFunction)stringlib_isascii, METH_NOARGS,
2469      _Py_isascii__doc__},
2470     {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2471      _Py_isdigit__doc__},
2472     {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2473      _Py_islower__doc__},
2474     {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2475      _Py_isspace__doc__},
2476     {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2477      _Py_istitle__doc__},
2478     {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2479      _Py_isupper__doc__},
2480     BYTES_JOIN_METHODDEF
2481     {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
2482     {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2483     BYTES_LSTRIP_METHODDEF
2484     BYTES_MAKETRANS_METHODDEF
2485     BYTES_PARTITION_METHODDEF
2486     BYTES_REPLACE_METHODDEF
2487     {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2488     {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2489     {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
2490     BYTES_RPARTITION_METHODDEF
2491     BYTES_RSPLIT_METHODDEF
2492     BYTES_RSTRIP_METHODDEF
2493     BYTES_SPLIT_METHODDEF
2494     BYTES_SPLITLINES_METHODDEF
2495     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2496      _Py_startswith__doc__},
2497     BYTES_STRIP_METHODDEF
2498     {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2499      _Py_swapcase__doc__},
2500     {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2501     BYTES_TRANSLATE_METHODDEF
2502     {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2503     {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
2504     {NULL,     NULL}                         /* sentinel */
2505 };
2506 
2507 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2508 bytes_mod(PyObject *self, PyObject *arg)
2509 {
2510     if (!PyBytes_Check(self)) {
2511         Py_RETURN_NOTIMPLEMENTED;
2512     }
2513     return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2514                              arg, 0);
2515 }
2516 
2517 static PyNumberMethods bytes_as_number = {
2518     0,              /*nb_add*/
2519     0,              /*nb_subtract*/
2520     0,              /*nb_multiply*/
2521     bytes_mod,      /*nb_remainder*/
2522 };
2523 
2524 static PyObject *
2525 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2526 
2527 static PyObject *
bytes_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2528 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2529 {
2530     PyObject *x = NULL;
2531     const char *encoding = NULL;
2532     const char *errors = NULL;
2533     PyObject *new = NULL;
2534     PyObject *func;
2535     Py_ssize_t size;
2536     static char *kwlist[] = {"source", "encoding", "errors", 0};
2537     _Py_IDENTIFIER(__bytes__);
2538 
2539     if (type != &PyBytes_Type)
2540         return bytes_subtype_new(type, args, kwds);
2541     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2542                                      &encoding, &errors))
2543         return NULL;
2544     if (x == NULL) {
2545         if (encoding != NULL || errors != NULL) {
2546             PyErr_SetString(PyExc_TypeError,
2547                             "encoding or errors without sequence "
2548                             "argument");
2549             return NULL;
2550         }
2551         return PyBytes_FromStringAndSize(NULL, 0);
2552     }
2553 
2554     if (encoding != NULL) {
2555         /* Encode via the codec registry */
2556         if (!PyUnicode_Check(x)) {
2557             PyErr_SetString(PyExc_TypeError,
2558                             "encoding without a string argument");
2559             return NULL;
2560         }
2561         new = PyUnicode_AsEncodedString(x, encoding, errors);
2562         if (new == NULL)
2563             return NULL;
2564         assert(PyBytes_Check(new));
2565         return new;
2566     }
2567 
2568     if (errors != NULL) {
2569         PyErr_SetString(PyExc_TypeError,
2570                         PyUnicode_Check(x) ?
2571                         "string argument without an encoding" :
2572                         "errors without a string argument");
2573         return NULL;
2574     }
2575 
2576     /* We'd like to call PyObject_Bytes here, but we need to check for an
2577        integer argument before deferring to PyBytes_FromObject, something
2578        PyObject_Bytes doesn't do. */
2579     func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2580     if (func != NULL) {
2581         new = _PyObject_CallNoArg(func);
2582         Py_DECREF(func);
2583         if (new == NULL)
2584             return NULL;
2585         if (!PyBytes_Check(new)) {
2586             PyErr_Format(PyExc_TypeError,
2587                          "__bytes__ returned non-bytes (type %.200s)",
2588                          Py_TYPE(new)->tp_name);
2589             Py_DECREF(new);
2590             return NULL;
2591         }
2592         return new;
2593     }
2594     else if (PyErr_Occurred())
2595         return NULL;
2596 
2597     if (PyUnicode_Check(x)) {
2598         PyErr_SetString(PyExc_TypeError,
2599                         "string argument without an encoding");
2600         return NULL;
2601     }
2602     /* Is it an integer? */
2603     if (PyIndex_Check(x)) {
2604         size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2605         if (size == -1 && PyErr_Occurred()) {
2606             if (!PyErr_ExceptionMatches(PyExc_TypeError))
2607                 return NULL;
2608             PyErr_Clear();  /* fall through */
2609         }
2610         else {
2611             if (size < 0) {
2612                 PyErr_SetString(PyExc_ValueError, "negative count");
2613                 return NULL;
2614             }
2615             new = _PyBytes_FromSize(size, 1);
2616             if (new == NULL)
2617                 return NULL;
2618             return new;
2619         }
2620     }
2621 
2622     return PyBytes_FromObject(x);
2623 }
2624 
2625 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2626 _PyBytes_FromBuffer(PyObject *x)
2627 {
2628     PyObject *new;
2629     Py_buffer view;
2630 
2631     if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2632         return NULL;
2633 
2634     new = PyBytes_FromStringAndSize(NULL, view.len);
2635     if (!new)
2636         goto fail;
2637     if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2638                 &view, view.len, 'C') < 0)
2639         goto fail;
2640     PyBuffer_Release(&view);
2641     return new;
2642 
2643 fail:
2644     Py_XDECREF(new);
2645     PyBuffer_Release(&view);
2646     return NULL;
2647 }
2648 
2649 static PyObject*
_PyBytes_FromList(PyObject * x)2650 _PyBytes_FromList(PyObject *x)
2651 {
2652     Py_ssize_t i, size = PyList_GET_SIZE(x);
2653     Py_ssize_t value;
2654     char *str;
2655     PyObject *item;
2656     _PyBytesWriter writer;
2657 
2658     _PyBytesWriter_Init(&writer);
2659     str = _PyBytesWriter_Alloc(&writer, size);
2660     if (str == NULL)
2661         return NULL;
2662     writer.overallocate = 1;
2663     size = writer.allocated;
2664 
2665     for (i = 0; i < PyList_GET_SIZE(x); i++) {
2666         item = PyList_GET_ITEM(x, i);
2667         Py_INCREF(item);
2668         value = PyNumber_AsSsize_t(item, NULL);
2669         Py_DECREF(item);
2670         if (value == -1 && PyErr_Occurred())
2671             goto error;
2672 
2673         if (value < 0 || value >= 256) {
2674             PyErr_SetString(PyExc_ValueError,
2675                             "bytes must be in range(0, 256)");
2676             goto error;
2677         }
2678 
2679         if (i >= size) {
2680             str = _PyBytesWriter_Resize(&writer, str, size+1);
2681             if (str == NULL)
2682                 return NULL;
2683             size = writer.allocated;
2684         }
2685         *str++ = (char) value;
2686     }
2687     return _PyBytesWriter_Finish(&writer, str);
2688 
2689   error:
2690     _PyBytesWriter_Dealloc(&writer);
2691     return NULL;
2692 }
2693 
2694 static PyObject*
_PyBytes_FromTuple(PyObject * x)2695 _PyBytes_FromTuple(PyObject *x)
2696 {
2697     PyObject *bytes;
2698     Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2699     Py_ssize_t value;
2700     char *str;
2701     PyObject *item;
2702 
2703     bytes = PyBytes_FromStringAndSize(NULL, size);
2704     if (bytes == NULL)
2705         return NULL;
2706     str = ((PyBytesObject *)bytes)->ob_sval;
2707 
2708     for (i = 0; i < size; i++) {
2709         item = PyTuple_GET_ITEM(x, i);
2710         value = PyNumber_AsSsize_t(item, NULL);
2711         if (value == -1 && PyErr_Occurred())
2712             goto error;
2713 
2714         if (value < 0 || value >= 256) {
2715             PyErr_SetString(PyExc_ValueError,
2716                             "bytes must be in range(0, 256)");
2717             goto error;
2718         }
2719         *str++ = (char) value;
2720     }
2721     return bytes;
2722 
2723   error:
2724     Py_DECREF(bytes);
2725     return NULL;
2726 }
2727 
2728 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2729 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2730 {
2731     char *str;
2732     Py_ssize_t i, size;
2733     _PyBytesWriter writer;
2734 
2735     /* For iterator version, create a string object and resize as needed */
2736     size = PyObject_LengthHint(x, 64);
2737     if (size == -1 && PyErr_Occurred())
2738         return NULL;
2739 
2740     _PyBytesWriter_Init(&writer);
2741     str = _PyBytesWriter_Alloc(&writer, size);
2742     if (str == NULL)
2743         return NULL;
2744     writer.overallocate = 1;
2745     size = writer.allocated;
2746 
2747     /* Run the iterator to exhaustion */
2748     for (i = 0; ; i++) {
2749         PyObject *item;
2750         Py_ssize_t value;
2751 
2752         /* Get the next item */
2753         item = PyIter_Next(it);
2754         if (item == NULL) {
2755             if (PyErr_Occurred())
2756                 goto error;
2757             break;
2758         }
2759 
2760         /* Interpret it as an int (__index__) */
2761         value = PyNumber_AsSsize_t(item, NULL);
2762         Py_DECREF(item);
2763         if (value == -1 && PyErr_Occurred())
2764             goto error;
2765 
2766         /* Range check */
2767         if (value < 0 || value >= 256) {
2768             PyErr_SetString(PyExc_ValueError,
2769                             "bytes must be in range(0, 256)");
2770             goto error;
2771         }
2772 
2773         /* Append the byte */
2774         if (i >= size) {
2775             str = _PyBytesWriter_Resize(&writer, str, size+1);
2776             if (str == NULL)
2777                 return NULL;
2778             size = writer.allocated;
2779         }
2780         *str++ = (char) value;
2781     }
2782 
2783     return _PyBytesWriter_Finish(&writer, str);
2784 
2785   error:
2786     _PyBytesWriter_Dealloc(&writer);
2787     return NULL;
2788 }
2789 
2790 PyObject *
PyBytes_FromObject(PyObject * x)2791 PyBytes_FromObject(PyObject *x)
2792 {
2793     PyObject *it, *result;
2794 
2795     if (x == NULL) {
2796         PyErr_BadInternalCall();
2797         return NULL;
2798     }
2799 
2800     if (PyBytes_CheckExact(x)) {
2801         Py_INCREF(x);
2802         return x;
2803     }
2804 
2805     /* Use the modern buffer interface */
2806     if (PyObject_CheckBuffer(x))
2807         return _PyBytes_FromBuffer(x);
2808 
2809     if (PyList_CheckExact(x))
2810         return _PyBytes_FromList(x);
2811 
2812     if (PyTuple_CheckExact(x))
2813         return _PyBytes_FromTuple(x);
2814 
2815     if (!PyUnicode_Check(x)) {
2816         it = PyObject_GetIter(x);
2817         if (it != NULL) {
2818             result = _PyBytes_FromIterator(it, x);
2819             Py_DECREF(it);
2820             return result;
2821         }
2822         if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2823             return NULL;
2824         }
2825     }
2826 
2827     PyErr_Format(PyExc_TypeError,
2828                  "cannot convert '%.200s' object to bytes",
2829                  x->ob_type->tp_name);
2830     return NULL;
2831 }
2832 
2833 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2834 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2835 {
2836     PyObject *tmp, *pnew;
2837     Py_ssize_t n;
2838 
2839     assert(PyType_IsSubtype(type, &PyBytes_Type));
2840     tmp = bytes_new(&PyBytes_Type, args, kwds);
2841     if (tmp == NULL)
2842         return NULL;
2843     assert(PyBytes_Check(tmp));
2844     n = PyBytes_GET_SIZE(tmp);
2845     pnew = type->tp_alloc(type, n);
2846     if (pnew != NULL) {
2847         memcpy(PyBytes_AS_STRING(pnew),
2848                   PyBytes_AS_STRING(tmp), n+1);
2849         ((PyBytesObject *)pnew)->ob_shash =
2850             ((PyBytesObject *)tmp)->ob_shash;
2851     }
2852     Py_DECREF(tmp);
2853     return pnew;
2854 }
2855 
2856 PyDoc_STRVAR(bytes_doc,
2857 "bytes(iterable_of_ints) -> bytes\n\
2858 bytes(string, encoding[, errors]) -> bytes\n\
2859 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2860 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2861 bytes() -> empty bytes object\n\
2862 \n\
2863 Construct an immutable array of bytes from:\n\
2864   - an iterable yielding integers in range(256)\n\
2865   - a text string encoded using the specified encoding\n\
2866   - any object implementing the buffer API.\n\
2867   - an integer");
2868 
2869 static PyObject *bytes_iter(PyObject *seq);
2870 
2871 PyTypeObject PyBytes_Type = {
2872     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2873     "bytes",
2874     PyBytesObject_SIZE,
2875     sizeof(char),
2876     bytes_dealloc,                      /* tp_dealloc */
2877     0,                                          /* tp_print */
2878     0,                                          /* tp_getattr */
2879     0,                                          /* tp_setattr */
2880     0,                                          /* tp_reserved */
2881     (reprfunc)bytes_repr,                       /* tp_repr */
2882     &bytes_as_number,                           /* tp_as_number */
2883     &bytes_as_sequence,                         /* tp_as_sequence */
2884     &bytes_as_mapping,                          /* tp_as_mapping */
2885     (hashfunc)bytes_hash,                       /* tp_hash */
2886     0,                                          /* tp_call */
2887     bytes_str,                                  /* tp_str */
2888     PyObject_GenericGetAttr,                    /* tp_getattro */
2889     0,                                          /* tp_setattro */
2890     &bytes_as_buffer,                           /* tp_as_buffer */
2891     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2892         Py_TPFLAGS_BYTES_SUBCLASS,              /* tp_flags */
2893     bytes_doc,                                  /* tp_doc */
2894     0,                                          /* tp_traverse */
2895     0,                                          /* tp_clear */
2896     (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
2897     0,                                          /* tp_weaklistoffset */
2898     bytes_iter,                                 /* tp_iter */
2899     0,                                          /* tp_iternext */
2900     bytes_methods,                              /* tp_methods */
2901     0,                                          /* tp_members */
2902     0,                                          /* tp_getset */
2903     &PyBaseObject_Type,                         /* tp_base */
2904     0,                                          /* tp_dict */
2905     0,                                          /* tp_descr_get */
2906     0,                                          /* tp_descr_set */
2907     0,                                          /* tp_dictoffset */
2908     0,                                          /* tp_init */
2909     0,                                          /* tp_alloc */
2910     bytes_new,                                  /* tp_new */
2911     PyObject_Del,                               /* tp_free */
2912 };
2913 
2914 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2915 PyBytes_Concat(PyObject **pv, PyObject *w)
2916 {
2917     assert(pv != NULL);
2918     if (*pv == NULL)
2919         return;
2920     if (w == NULL) {
2921         Py_CLEAR(*pv);
2922         return;
2923     }
2924 
2925     if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2926         /* Only one reference, so we can resize in place */
2927         Py_ssize_t oldsize;
2928         Py_buffer wb;
2929 
2930         wb.len = -1;
2931         if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2932             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2933                          Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2934             Py_CLEAR(*pv);
2935             return;
2936         }
2937 
2938         oldsize = PyBytes_GET_SIZE(*pv);
2939         if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2940             PyErr_NoMemory();
2941             goto error;
2942         }
2943         if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2944             goto error;
2945 
2946         memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2947         PyBuffer_Release(&wb);
2948         return;
2949 
2950       error:
2951         PyBuffer_Release(&wb);
2952         Py_CLEAR(*pv);
2953         return;
2954     }
2955 
2956     else {
2957         /* Multiple references, need to create new object */
2958         PyObject *v;
2959         v = bytes_concat(*pv, w);
2960         Py_SETREF(*pv, v);
2961     }
2962 }
2963 
2964 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)2965 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2966 {
2967     PyBytes_Concat(pv, w);
2968     Py_XDECREF(w);
2969 }
2970 
2971 
2972 /* The following function breaks the notion that bytes are immutable:
2973    it changes the size of a bytes object.  We get away with this only if there
2974    is only one module referencing the object.  You can also think of it
2975    as creating a new bytes object and destroying the old one, only
2976    more efficiently.  In any case, don't use this if the bytes object may
2977    already be known to some other part of the code...
2978    Note that if there's not enough memory to resize the bytes object, the
2979    original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2980    memory" exception is set, and -1 is returned.  Else (on success) 0 is
2981    returned, and the value in *pv may or may not be the same as on input.
2982    As always, an extra byte is allocated for a trailing \0 byte (newsize
2983    does *not* include that), and a trailing \0 byte is stored.
2984 */
2985 
2986 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)2987 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2988 {
2989     PyObject *v;
2990     PyBytesObject *sv;
2991     v = *pv;
2992     if (!PyBytes_Check(v) || newsize < 0) {
2993         goto error;
2994     }
2995     if (Py_SIZE(v) == newsize) {
2996         /* return early if newsize equals to v->ob_size */
2997         return 0;
2998     }
2999     if (Py_SIZE(v) == 0) {
3000         if (newsize == 0) {
3001             return 0;
3002         }
3003         *pv = _PyBytes_FromSize(newsize, 0);
3004         Py_DECREF(v);
3005         return (*pv == NULL) ? -1 : 0;
3006     }
3007     if (Py_REFCNT(v) != 1) {
3008         goto error;
3009     }
3010     if (newsize == 0) {
3011         *pv = _PyBytes_FromSize(0, 0);
3012         Py_DECREF(v);
3013         return (*pv == NULL) ? -1 : 0;
3014     }
3015     /* XXX UNREF/NEWREF interface should be more symmetrical */
3016     _Py_DEC_REFTOTAL;
3017     _Py_ForgetReference(v);
3018     *pv = (PyObject *)
3019         PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
3020     if (*pv == NULL) {
3021         PyObject_Del(v);
3022         PyErr_NoMemory();
3023         return -1;
3024     }
3025     _Py_NewReference(*pv);
3026     sv = (PyBytesObject *) *pv;
3027     Py_SIZE(sv) = newsize;
3028     sv->ob_sval[newsize] = '\0';
3029     sv->ob_shash = -1;          /* invalidate cached hash value */
3030     return 0;
3031 error:
3032     *pv = 0;
3033     Py_DECREF(v);
3034     PyErr_BadInternalCall();
3035     return -1;
3036 }
3037 
3038 void
PyBytes_Fini(void)3039 PyBytes_Fini(void)
3040 {
3041     int i;
3042     for (i = 0; i < UCHAR_MAX + 1; i++)
3043         Py_CLEAR(characters[i]);
3044     Py_CLEAR(nullstring);
3045 }
3046 
3047 /*********************** Bytes Iterator ****************************/
3048 
3049 typedef struct {
3050     PyObject_HEAD
3051     Py_ssize_t it_index;
3052     PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3053 } striterobject;
3054 
3055 static void
striter_dealloc(striterobject * it)3056 striter_dealloc(striterobject *it)
3057 {
3058     _PyObject_GC_UNTRACK(it);
3059     Py_XDECREF(it->it_seq);
3060     PyObject_GC_Del(it);
3061 }
3062 
3063 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3064 striter_traverse(striterobject *it, visitproc visit, void *arg)
3065 {
3066     Py_VISIT(it->it_seq);
3067     return 0;
3068 }
3069 
3070 static PyObject *
striter_next(striterobject * it)3071 striter_next(striterobject *it)
3072 {
3073     PyBytesObject *seq;
3074     PyObject *item;
3075 
3076     assert(it != NULL);
3077     seq = it->it_seq;
3078     if (seq == NULL)
3079         return NULL;
3080     assert(PyBytes_Check(seq));
3081 
3082     if (it->it_index < PyBytes_GET_SIZE(seq)) {
3083         item = PyLong_FromLong(
3084             (unsigned char)seq->ob_sval[it->it_index]);
3085         if (item != NULL)
3086             ++it->it_index;
3087         return item;
3088     }
3089 
3090     it->it_seq = NULL;
3091     Py_DECREF(seq);
3092     return NULL;
3093 }
3094 
3095 static PyObject *
striter_len(striterobject * it)3096 striter_len(striterobject *it)
3097 {
3098     Py_ssize_t len = 0;
3099     if (it->it_seq)
3100         len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3101     return PyLong_FromSsize_t(len);
3102 }
3103 
3104 PyDoc_STRVAR(length_hint_doc,
3105              "Private method returning an estimate of len(list(it)).");
3106 
3107 static PyObject *
striter_reduce(striterobject * it)3108 striter_reduce(striterobject *it)
3109 {
3110     if (it->it_seq != NULL) {
3111         return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
3112                              it->it_seq, it->it_index);
3113     } else {
3114         return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
3115     }
3116 }
3117 
3118 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3119 
3120 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3121 striter_setstate(striterobject *it, PyObject *state)
3122 {
3123     Py_ssize_t index = PyLong_AsSsize_t(state);
3124     if (index == -1 && PyErr_Occurred())
3125         return NULL;
3126     if (it->it_seq != NULL) {
3127         if (index < 0)
3128             index = 0;
3129         else if (index > PyBytes_GET_SIZE(it->it_seq))
3130             index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3131         it->it_index = index;
3132     }
3133     Py_RETURN_NONE;
3134 }
3135 
3136 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3137 
3138 static PyMethodDef striter_methods[] = {
3139     {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3140      length_hint_doc},
3141     {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
3142      reduce_doc},
3143     {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
3144      setstate_doc},
3145     {NULL,              NULL}           /* sentinel */
3146 };
3147 
3148 PyTypeObject PyBytesIter_Type = {
3149     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3150     "bytes_iterator",                           /* tp_name */
3151     sizeof(striterobject),                      /* tp_basicsize */
3152     0,                                          /* tp_itemsize */
3153     /* methods */
3154     (destructor)striter_dealloc,                /* tp_dealloc */
3155     0,                                          /* tp_print */
3156     0,                                          /* tp_getattr */
3157     0,                                          /* tp_setattr */
3158     0,                                          /* tp_reserved */
3159     0,                                          /* tp_repr */
3160     0,                                          /* tp_as_number */
3161     0,                                          /* tp_as_sequence */
3162     0,                                          /* tp_as_mapping */
3163     0,                                          /* tp_hash */
3164     0,                                          /* tp_call */
3165     0,                                          /* tp_str */
3166     PyObject_GenericGetAttr,                    /* tp_getattro */
3167     0,                                          /* tp_setattro */
3168     0,                                          /* tp_as_buffer */
3169     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3170     0,                                          /* tp_doc */
3171     (traverseproc)striter_traverse,     /* tp_traverse */
3172     0,                                          /* tp_clear */
3173     0,                                          /* tp_richcompare */
3174     0,                                          /* tp_weaklistoffset */
3175     PyObject_SelfIter,                          /* tp_iter */
3176     (iternextfunc)striter_next,                 /* tp_iternext */
3177     striter_methods,                            /* tp_methods */
3178     0,
3179 };
3180 
3181 static PyObject *
bytes_iter(PyObject * seq)3182 bytes_iter(PyObject *seq)
3183 {
3184     striterobject *it;
3185 
3186     if (!PyBytes_Check(seq)) {
3187         PyErr_BadInternalCall();
3188         return NULL;
3189     }
3190     it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3191     if (it == NULL)
3192         return NULL;
3193     it->it_index = 0;
3194     Py_INCREF(seq);
3195     it->it_seq = (PyBytesObject *)seq;
3196     _PyObject_GC_TRACK(it);
3197     return (PyObject *)it;
3198 }
3199 
3200 
3201 /* _PyBytesWriter API */
3202 
3203 #ifdef MS_WINDOWS
3204    /* On Windows, overallocate by 50% is the best factor */
3205 #  define OVERALLOCATE_FACTOR 2
3206 #else
3207    /* On Linux, overallocate by 25% is the best factor */
3208 #  define OVERALLOCATE_FACTOR 4
3209 #endif
3210 
3211 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3212 _PyBytesWriter_Init(_PyBytesWriter *writer)
3213 {
3214     /* Set all attributes before small_buffer to 0 */
3215     memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3216 #ifdef Py_DEBUG
3217     memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
3218 #endif
3219 }
3220 
3221 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3222 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3223 {
3224     Py_CLEAR(writer->buffer);
3225 }
3226 
3227 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3228 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3229 {
3230     if (writer->use_small_buffer) {
3231         assert(writer->buffer == NULL);
3232         return writer->small_buffer;
3233     }
3234     else if (writer->use_bytearray) {
3235         assert(writer->buffer != NULL);
3236         return PyByteArray_AS_STRING(writer->buffer);
3237     }
3238     else {
3239         assert(writer->buffer != NULL);
3240         return PyBytes_AS_STRING(writer->buffer);
3241     }
3242 }
3243 
3244 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3245 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3246 {
3247     char *start = _PyBytesWriter_AsString(writer);
3248     assert(str != NULL);
3249     assert(str >= start);
3250     assert(str - start <= writer->allocated);
3251     return str - start;
3252 }
3253 
3254 Py_LOCAL_INLINE(void)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3255 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3256 {
3257 #ifdef Py_DEBUG
3258     char *start, *end;
3259 
3260     if (writer->use_small_buffer) {
3261         assert(writer->buffer == NULL);
3262     }
3263     else {
3264         assert(writer->buffer != NULL);
3265         if (writer->use_bytearray)
3266             assert(PyByteArray_CheckExact(writer->buffer));
3267         else
3268             assert(PyBytes_CheckExact(writer->buffer));
3269         assert(Py_REFCNT(writer->buffer) == 1);
3270     }
3271 
3272     if (writer->use_bytearray) {
3273         /* bytearray has its own overallocation algorithm,
3274            writer overallocation must be disabled */
3275         assert(!writer->overallocate);
3276     }
3277 
3278     assert(0 <= writer->allocated);
3279     assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3280     /* the last byte must always be null */
3281     start = _PyBytesWriter_AsString(writer);
3282     assert(start[writer->allocated] == 0);
3283 
3284     end = start + writer->allocated;
3285     assert(str != NULL);
3286     assert(start <= str && str <= end);
3287 #endif
3288 }
3289 
3290 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3291 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3292 {
3293     Py_ssize_t allocated, pos;
3294 
3295     _PyBytesWriter_CheckConsistency(writer, str);
3296     assert(writer->allocated < size);
3297 
3298     allocated = size;
3299     if (writer->overallocate
3300         && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3301         /* overallocate to limit the number of realloc() */
3302         allocated += allocated / OVERALLOCATE_FACTOR;
3303     }
3304 
3305     pos = _PyBytesWriter_GetSize(writer, str);
3306     if (!writer->use_small_buffer) {
3307         if (writer->use_bytearray) {
3308             if (PyByteArray_Resize(writer->buffer, allocated))
3309                 goto error;
3310             /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3311                but we cannot use ob_alloc because bytes may need to be moved
3312                to use the whole buffer. bytearray uses an internal optimization
3313                to avoid moving or copying bytes when bytes are removed at the
3314                beginning (ex: del bytearray[:1]). */
3315         }
3316         else {
3317             if (_PyBytes_Resize(&writer->buffer, allocated))
3318                 goto error;
3319         }
3320     }
3321     else {
3322         /* convert from stack buffer to bytes object buffer */
3323         assert(writer->buffer == NULL);
3324 
3325         if (writer->use_bytearray)
3326             writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3327         else
3328             writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3329         if (writer->buffer == NULL)
3330             goto error;
3331 
3332         if (pos != 0) {
3333             char *dest;
3334             if (writer->use_bytearray)
3335                 dest = PyByteArray_AS_STRING(writer->buffer);
3336             else
3337                 dest = PyBytes_AS_STRING(writer->buffer);
3338             memcpy(dest,
3339                       writer->small_buffer,
3340                       pos);
3341         }
3342 
3343         writer->use_small_buffer = 0;
3344 #ifdef Py_DEBUG
3345         memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
3346 #endif
3347     }
3348     writer->allocated = allocated;
3349 
3350     str = _PyBytesWriter_AsString(writer) + pos;
3351     _PyBytesWriter_CheckConsistency(writer, str);
3352     return str;
3353 
3354 error:
3355     _PyBytesWriter_Dealloc(writer);
3356     return NULL;
3357 }
3358 
3359 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3360 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3361 {
3362     Py_ssize_t new_min_size;
3363 
3364     _PyBytesWriter_CheckConsistency(writer, str);
3365     assert(size >= 0);
3366 
3367     if (size == 0) {
3368         /* nothing to do */
3369         return str;
3370     }
3371 
3372     if (writer->min_size > PY_SSIZE_T_MAX - size) {
3373         PyErr_NoMemory();
3374         _PyBytesWriter_Dealloc(writer);
3375         return NULL;
3376     }
3377     new_min_size = writer->min_size + size;
3378 
3379     if (new_min_size > writer->allocated)
3380         str = _PyBytesWriter_Resize(writer, str, new_min_size);
3381 
3382     writer->min_size = new_min_size;
3383     return str;
3384 }
3385 
3386 /* Allocate the buffer to write size bytes.
3387    Return the pointer to the beginning of buffer data.
3388    Raise an exception and return NULL on error. */
3389 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3390 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3391 {
3392     /* ensure that _PyBytesWriter_Alloc() is only called once */
3393     assert(writer->min_size == 0 && writer->buffer == NULL);
3394     assert(size >= 0);
3395 
3396     writer->use_small_buffer = 1;
3397 #ifdef Py_DEBUG
3398     writer->allocated = sizeof(writer->small_buffer) - 1;
3399     /* In debug mode, don't use the full small buffer because it is less
3400        efficient than bytes and bytearray objects to detect buffer underflow
3401        and buffer overflow. Use 10 bytes of the small buffer to test also
3402        code using the smaller buffer in debug mode.
3403 
3404        Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3405        in debug mode to also be able to detect stack overflow when running
3406        tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3407        if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3408        stack overflow. */
3409     writer->allocated = Py_MIN(writer->allocated, 10);
3410     /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3411        to detect buffer overflow */
3412     writer->small_buffer[writer->allocated] = 0;
3413 #else
3414     writer->allocated = sizeof(writer->small_buffer);
3415 #endif
3416     return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3417 }
3418 
3419 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3420 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3421 {
3422     Py_ssize_t size;
3423     PyObject *result;
3424 
3425     _PyBytesWriter_CheckConsistency(writer, str);
3426 
3427     size = _PyBytesWriter_GetSize(writer, str);
3428     if (size == 0 && !writer->use_bytearray) {
3429         Py_CLEAR(writer->buffer);
3430         /* Get the empty byte string singleton */
3431         result = PyBytes_FromStringAndSize(NULL, 0);
3432     }
3433     else if (writer->use_small_buffer) {
3434         if (writer->use_bytearray) {
3435             result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3436         }
3437         else {
3438             result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3439         }
3440     }
3441     else {
3442         result = writer->buffer;
3443         writer->buffer = NULL;
3444 
3445         if (size != writer->allocated) {
3446             if (writer->use_bytearray) {
3447                 if (PyByteArray_Resize(result, size)) {
3448                     Py_DECREF(result);
3449                     return NULL;
3450                 }
3451             }
3452             else {
3453                 if (_PyBytes_Resize(&result, size)) {
3454                     assert(result == NULL);
3455                     return NULL;
3456                 }
3457             }
3458         }
3459     }
3460     return result;
3461 }
3462 
3463 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3464 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3465                           const void *bytes, Py_ssize_t size)
3466 {
3467     char *str = (char *)ptr;
3468 
3469     str = _PyBytesWriter_Prepare(writer, str, size);
3470     if (str == NULL)
3471         return NULL;
3472 
3473     memcpy(str, bytes, size);
3474     str += size;
3475 
3476     return str;
3477 }
3478