1 /* bytes object implementation */
2
3 #define PY_SSIZE_T_CLEAN
4
5 #include "Python.h"
6 #include "internal/mem.h"
7 #include "internal/pystate.h"
8
9 #include "bytes_methods.h"
10 #include "pystrhex.h"
11 #include <stddef.h>
12
13 /*[clinic input]
14 class bytes "PyBytesObject *" "&PyBytes_Type"
15 [clinic start generated code]*/
16 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
17
18 #include "clinic/bytesobject.c.h"
19
20 #ifdef COUNT_ALLOCS
21 Py_ssize_t null_strings, one_strings;
22 #endif
23
24 static PyBytesObject *characters[UCHAR_MAX + 1];
25 static PyBytesObject *nullstring;
26
27 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
28 for a string of length n should request PyBytesObject_SIZE + n bytes.
29
30 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31 3 bytes per string allocation on a typical system.
32 */
33 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34
35 /* Forward declaration */
36 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37 char *str);
38
39 /*
40 For PyBytes_FromString(), the parameter `str' points to a null-terminated
41 string containing exactly `size' bytes.
42
43 For PyBytes_FromStringAndSize(), the parameter `str' is
44 either NULL or else points to a string containing at least `size' bytes.
45 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
46 not have to be null-terminated. (Therefore it is safe to construct a
47 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
48 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
49 bytes (setting the last byte to the null terminating character) and you can
50 fill in the data yourself. If `str' is non-NULL then the resulting
51 PyBytes object must be treated as immutable and you must not fill in nor
52 alter the data yourself, since the strings may be shared.
53
54 The PyObject member `op->ob_size', which denotes the number of "extra
55 items" in a variable-size object, will contain the number of bytes
56 allocated for string data, not counting the null terminating character.
57 It is therefore equal to the `size' parameter (for
58 PyBytes_FromStringAndSize()) or the length of the string in the `str'
59 parameter (for PyBytes_FromString()).
60 */
61 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)62 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
63 {
64 PyBytesObject *op;
65 assert(size >= 0);
66
67 if (size == 0 && (op = nullstring) != NULL) {
68 #ifdef COUNT_ALLOCS
69 null_strings++;
70 #endif
71 Py_INCREF(op);
72 return (PyObject *)op;
73 }
74
75 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
76 PyErr_SetString(PyExc_OverflowError,
77 "byte string is too large");
78 return NULL;
79 }
80
81 /* Inline PyObject_NewVar */
82 if (use_calloc)
83 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
84 else
85 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
86 if (op == NULL)
87 return PyErr_NoMemory();
88 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
89 op->ob_shash = -1;
90 if (!use_calloc)
91 op->ob_sval[size] = '\0';
92 /* empty byte string singleton */
93 if (size == 0) {
94 nullstring = op;
95 Py_INCREF(op);
96 }
97 return (PyObject *) op;
98 }
99
100 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)101 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
102 {
103 PyBytesObject *op;
104 if (size < 0) {
105 PyErr_SetString(PyExc_SystemError,
106 "Negative size passed to PyBytes_FromStringAndSize");
107 return NULL;
108 }
109 if (size == 1 && str != NULL &&
110 (op = characters[*str & UCHAR_MAX]) != NULL)
111 {
112 #ifdef COUNT_ALLOCS
113 one_strings++;
114 #endif
115 Py_INCREF(op);
116 return (PyObject *)op;
117 }
118
119 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
120 if (op == NULL)
121 return NULL;
122 if (str == NULL)
123 return (PyObject *) op;
124
125 memcpy(op->ob_sval, str, size);
126 /* share short strings */
127 if (size == 1) {
128 characters[*str & UCHAR_MAX] = op;
129 Py_INCREF(op);
130 }
131 return (PyObject *) op;
132 }
133
134 PyObject *
PyBytes_FromString(const char * str)135 PyBytes_FromString(const char *str)
136 {
137 size_t size;
138 PyBytesObject *op;
139
140 assert(str != NULL);
141 size = strlen(str);
142 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
143 PyErr_SetString(PyExc_OverflowError,
144 "byte string is too long");
145 return NULL;
146 }
147 if (size == 0 && (op = nullstring) != NULL) {
148 #ifdef COUNT_ALLOCS
149 null_strings++;
150 #endif
151 Py_INCREF(op);
152 return (PyObject *)op;
153 }
154 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
155 #ifdef COUNT_ALLOCS
156 one_strings++;
157 #endif
158 Py_INCREF(op);
159 return (PyObject *)op;
160 }
161
162 /* Inline PyObject_NewVar */
163 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
164 if (op == NULL)
165 return PyErr_NoMemory();
166 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
167 op->ob_shash = -1;
168 memcpy(op->ob_sval, str, size+1);
169 /* share short strings */
170 if (size == 0) {
171 nullstring = op;
172 Py_INCREF(op);
173 } else if (size == 1) {
174 characters[*str & UCHAR_MAX] = op;
175 Py_INCREF(op);
176 }
177 return (PyObject *) op;
178 }
179
180 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)181 PyBytes_FromFormatV(const char *format, va_list vargs)
182 {
183 char *s;
184 const char *f;
185 const char *p;
186 Py_ssize_t prec;
187 int longflag;
188 int size_tflag;
189 /* Longest 64-bit formatted numbers:
190 - "18446744073709551615\0" (21 bytes)
191 - "-9223372036854775808\0" (21 bytes)
192 Decimal takes the most space (it isn't enough for octal.)
193
194 Longest 64-bit pointer representation:
195 "0xffffffffffffffff\0" (19 bytes). */
196 char buffer[21];
197 _PyBytesWriter writer;
198
199 _PyBytesWriter_Init(&writer);
200
201 s = _PyBytesWriter_Alloc(&writer, strlen(format));
202 if (s == NULL)
203 return NULL;
204 writer.overallocate = 1;
205
206 #define WRITE_BYTES(str) \
207 do { \
208 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
209 if (s == NULL) \
210 goto error; \
211 } while (0)
212
213 for (f = format; *f; f++) {
214 if (*f != '%') {
215 *s++ = *f;
216 continue;
217 }
218
219 p = f++;
220
221 /* ignore the width (ex: 10 in "%10s") */
222 while (Py_ISDIGIT(*f))
223 f++;
224
225 /* parse the precision (ex: 10 in "%.10s") */
226 prec = 0;
227 if (*f == '.') {
228 f++;
229 for (; Py_ISDIGIT(*f); f++) {
230 prec = (prec * 10) + (*f - '0');
231 }
232 }
233
234 while (*f && *f != '%' && !Py_ISALPHA(*f))
235 f++;
236
237 /* handle the long flag ('l'), but only for %ld and %lu.
238 others can be added when necessary. */
239 longflag = 0;
240 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
241 longflag = 1;
242 ++f;
243 }
244
245 /* handle the size_t flag ('z'). */
246 size_tflag = 0;
247 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
248 size_tflag = 1;
249 ++f;
250 }
251
252 /* subtract bytes preallocated for the format string
253 (ex: 2 for "%s") */
254 writer.min_size -= (f - p + 1);
255
256 switch (*f) {
257 case 'c':
258 {
259 int c = va_arg(vargs, int);
260 if (c < 0 || c > 255) {
261 PyErr_SetString(PyExc_OverflowError,
262 "PyBytes_FromFormatV(): %c format "
263 "expects an integer in range [0; 255]");
264 goto error;
265 }
266 writer.min_size++;
267 *s++ = (unsigned char)c;
268 break;
269 }
270
271 case 'd':
272 if (longflag)
273 sprintf(buffer, "%ld", va_arg(vargs, long));
274 else if (size_tflag)
275 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(buffer, "%d", va_arg(vargs, int));
279 assert(strlen(buffer) < sizeof(buffer));
280 WRITE_BYTES(buffer);
281 break;
282
283 case 'u':
284 if (longflag)
285 sprintf(buffer, "%lu",
286 va_arg(vargs, unsigned long));
287 else if (size_tflag)
288 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
289 va_arg(vargs, size_t));
290 else
291 sprintf(buffer, "%u",
292 va_arg(vargs, unsigned int));
293 assert(strlen(buffer) < sizeof(buffer));
294 WRITE_BYTES(buffer);
295 break;
296
297 case 'i':
298 sprintf(buffer, "%i", va_arg(vargs, int));
299 assert(strlen(buffer) < sizeof(buffer));
300 WRITE_BYTES(buffer);
301 break;
302
303 case 'x':
304 sprintf(buffer, "%x", va_arg(vargs, int));
305 assert(strlen(buffer) < sizeof(buffer));
306 WRITE_BYTES(buffer);
307 break;
308
309 case 's':
310 {
311 Py_ssize_t i;
312
313 p = va_arg(vargs, const char*);
314 if (prec <= 0) {
315 i = strlen(p);
316 }
317 else {
318 i = 0;
319 while (i < prec && p[i]) {
320 i++;
321 }
322 }
323 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
324 if (s == NULL)
325 goto error;
326 break;
327 }
328
329 case 'p':
330 sprintf(buffer, "%p", va_arg(vargs, void*));
331 assert(strlen(buffer) < sizeof(buffer));
332 /* %p is ill-defined: ensure leading 0x. */
333 if (buffer[1] == 'X')
334 buffer[1] = 'x';
335 else if (buffer[1] != 'x') {
336 memmove(buffer+2, buffer, strlen(buffer)+1);
337 buffer[0] = '0';
338 buffer[1] = 'x';
339 }
340 WRITE_BYTES(buffer);
341 break;
342
343 case '%':
344 writer.min_size++;
345 *s++ = '%';
346 break;
347
348 default:
349 if (*f == 0) {
350 /* fix min_size if we reached the end of the format string */
351 writer.min_size++;
352 }
353
354 /* invalid format string: copy unformatted string and exit */
355 WRITE_BYTES(p);
356 return _PyBytesWriter_Finish(&writer, s);
357 }
358 }
359
360 #undef WRITE_BYTES
361
362 return _PyBytesWriter_Finish(&writer, s);
363
364 error:
365 _PyBytesWriter_Dealloc(&writer);
366 return NULL;
367 }
368
369 PyObject *
PyBytes_FromFormat(const char * format,...)370 PyBytes_FromFormat(const char *format, ...)
371 {
372 PyObject* ret;
373 va_list vargs;
374
375 #ifdef HAVE_STDARG_PROTOTYPES
376 va_start(vargs, format);
377 #else
378 va_start(vargs);
379 #endif
380 ret = PyBytes_FromFormatV(format, vargs);
381 va_end(vargs);
382 return ret;
383 }
384
385 /* Helpers for formatstring */
386
387 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)388 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
389 {
390 Py_ssize_t argidx = *p_argidx;
391 if (argidx < arglen) {
392 (*p_argidx)++;
393 if (arglen < 0)
394 return args;
395 else
396 return PyTuple_GetItem(args, argidx);
397 }
398 PyErr_SetString(PyExc_TypeError,
399 "not enough arguments for format string");
400 return NULL;
401 }
402
403 /* Format codes
404 * F_LJUST '-'
405 * F_SIGN '+'
406 * F_BLANK ' '
407 * F_ALT '#'
408 * F_ZERO '0'
409 */
410 #define F_LJUST (1<<0)
411 #define F_SIGN (1<<1)
412 #define F_BLANK (1<<2)
413 #define F_ALT (1<<3)
414 #define F_ZERO (1<<4)
415
416 /* Returns a new reference to a PyBytes object, or NULL on failure. */
417
418 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)419 formatfloat(PyObject *v, int flags, int prec, int type,
420 PyObject **p_result, _PyBytesWriter *writer, char *str)
421 {
422 char *p;
423 PyObject *result;
424 double x;
425 size_t len;
426
427 x = PyFloat_AsDouble(v);
428 if (x == -1.0 && PyErr_Occurred()) {
429 PyErr_Format(PyExc_TypeError, "float argument required, "
430 "not %.200s", Py_TYPE(v)->tp_name);
431 return NULL;
432 }
433
434 if (prec < 0)
435 prec = 6;
436
437 p = PyOS_double_to_string(x, type, prec,
438 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
439
440 if (p == NULL)
441 return NULL;
442
443 len = strlen(p);
444 if (writer != NULL) {
445 str = _PyBytesWriter_Prepare(writer, str, len);
446 if (str == NULL)
447 return NULL;
448 memcpy(str, p, len);
449 PyMem_Free(p);
450 str += len;
451 return str;
452 }
453
454 result = PyBytes_FromStringAndSize(p, len);
455 PyMem_Free(p);
456 *p_result = result;
457 return result != NULL ? str : NULL;
458 }
459
460 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)461 formatlong(PyObject *v, int flags, int prec, int type)
462 {
463 PyObject *result, *iobj;
464 if (type == 'i')
465 type = 'd';
466 if (PyLong_Check(v))
467 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
468 if (PyNumber_Check(v)) {
469 /* make sure number is a type of integer for o, x, and X */
470 if (type == 'o' || type == 'x' || type == 'X')
471 iobj = PyNumber_Index(v);
472 else
473 iobj = PyNumber_Long(v);
474 if (iobj == NULL) {
475 if (!PyErr_ExceptionMatches(PyExc_TypeError))
476 return NULL;
477 }
478 else if (!PyLong_Check(iobj))
479 Py_CLEAR(iobj);
480 if (iobj != NULL) {
481 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
482 Py_DECREF(iobj);
483 return result;
484 }
485 }
486 PyErr_Format(PyExc_TypeError,
487 "%%%c format: %s is required, not %.200s", type,
488 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
489 : "a number",
490 Py_TYPE(v)->tp_name);
491 return NULL;
492 }
493
494 static int
byte_converter(PyObject * arg,char * p)495 byte_converter(PyObject *arg, char *p)
496 {
497 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
498 *p = PyBytes_AS_STRING(arg)[0];
499 return 1;
500 }
501 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
502 *p = PyByteArray_AS_STRING(arg)[0];
503 return 1;
504 }
505 else {
506 PyObject *iobj;
507 long ival;
508 int overflow;
509 /* make sure number is a type of integer */
510 if (PyLong_Check(arg)) {
511 ival = PyLong_AsLongAndOverflow(arg, &overflow);
512 }
513 else {
514 iobj = PyNumber_Index(arg);
515 if (iobj == NULL) {
516 if (!PyErr_ExceptionMatches(PyExc_TypeError))
517 return 0;
518 goto onError;
519 }
520 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
521 Py_DECREF(iobj);
522 }
523 if (!overflow && ival == -1 && PyErr_Occurred())
524 goto onError;
525 if (overflow || !(0 <= ival && ival <= 255)) {
526 PyErr_SetString(PyExc_OverflowError,
527 "%c arg not in range(256)");
528 return 0;
529 }
530 *p = (char)ival;
531 return 1;
532 }
533 onError:
534 PyErr_SetString(PyExc_TypeError,
535 "%c requires an integer in range(256) or a single byte");
536 return 0;
537 }
538
539 static PyObject *_PyBytes_FromBuffer(PyObject *x);
540
541 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)542 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
543 {
544 PyObject *func, *result;
545 _Py_IDENTIFIER(__bytes__);
546 /* is it a bytes object? */
547 if (PyBytes_Check(v)) {
548 *pbuf = PyBytes_AS_STRING(v);
549 *plen = PyBytes_GET_SIZE(v);
550 Py_INCREF(v);
551 return v;
552 }
553 if (PyByteArray_Check(v)) {
554 *pbuf = PyByteArray_AS_STRING(v);
555 *plen = PyByteArray_GET_SIZE(v);
556 Py_INCREF(v);
557 return v;
558 }
559 /* does it support __bytes__? */
560 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
561 if (func != NULL) {
562 result = _PyObject_CallNoArg(func);
563 Py_DECREF(func);
564 if (result == NULL)
565 return NULL;
566 if (!PyBytes_Check(result)) {
567 PyErr_Format(PyExc_TypeError,
568 "__bytes__ returned non-bytes (type %.200s)",
569 Py_TYPE(result)->tp_name);
570 Py_DECREF(result);
571 return NULL;
572 }
573 *pbuf = PyBytes_AS_STRING(result);
574 *plen = PyBytes_GET_SIZE(result);
575 return result;
576 }
577 /* does it support buffer protocol? */
578 if (PyObject_CheckBuffer(v)) {
579 /* maybe we can avoid making a copy of the buffer object here? */
580 result = _PyBytes_FromBuffer(v);
581 if (result == NULL)
582 return NULL;
583 *pbuf = PyBytes_AS_STRING(result);
584 *plen = PyBytes_GET_SIZE(result);
585 return result;
586 }
587 PyErr_Format(PyExc_TypeError,
588 "%%b requires a bytes-like object, "
589 "or an object that implements __bytes__, not '%.100s'",
590 Py_TYPE(v)->tp_name);
591 return NULL;
592 }
593
594 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
595
596 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)597 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
598 PyObject *args, int use_bytearray)
599 {
600 const char *fmt;
601 char *res;
602 Py_ssize_t arglen, argidx;
603 Py_ssize_t fmtcnt;
604 int args_owned = 0;
605 PyObject *dict = NULL;
606 _PyBytesWriter writer;
607
608 if (args == NULL) {
609 PyErr_BadInternalCall();
610 return NULL;
611 }
612 fmt = format;
613 fmtcnt = format_len;
614
615 _PyBytesWriter_Init(&writer);
616 writer.use_bytearray = use_bytearray;
617
618 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
619 if (res == NULL)
620 return NULL;
621 if (!use_bytearray)
622 writer.overallocate = 1;
623
624 if (PyTuple_Check(args)) {
625 arglen = PyTuple_GET_SIZE(args);
626 argidx = 0;
627 }
628 else {
629 arglen = -1;
630 argidx = -2;
631 }
632 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
633 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
634 !PyByteArray_Check(args)) {
635 dict = args;
636 }
637
638 while (--fmtcnt >= 0) {
639 if (*fmt != '%') {
640 Py_ssize_t len;
641 char *pos;
642
643 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
644 if (pos != NULL)
645 len = pos - fmt;
646 else
647 len = fmtcnt + 1;
648 assert(len != 0);
649
650 memcpy(res, fmt, len);
651 res += len;
652 fmt += len;
653 fmtcnt -= (len - 1);
654 }
655 else {
656 /* Got a format specifier */
657 int flags = 0;
658 Py_ssize_t width = -1;
659 int prec = -1;
660 int c = '\0';
661 int fill;
662 PyObject *v = NULL;
663 PyObject *temp = NULL;
664 const char *pbuf = NULL;
665 int sign;
666 Py_ssize_t len = 0;
667 char onechar; /* For byte_converter() */
668 Py_ssize_t alloc;
669 #ifdef Py_DEBUG
670 char *before;
671 #endif
672
673 fmt++;
674 if (*fmt == '%') {
675 *res++ = '%';
676 fmt++;
677 fmtcnt--;
678 continue;
679 }
680 if (*fmt == '(') {
681 const char *keystart;
682 Py_ssize_t keylen;
683 PyObject *key;
684 int pcount = 1;
685
686 if (dict == NULL) {
687 PyErr_SetString(PyExc_TypeError,
688 "format requires a mapping");
689 goto error;
690 }
691 ++fmt;
692 --fmtcnt;
693 keystart = fmt;
694 /* Skip over balanced parentheses */
695 while (pcount > 0 && --fmtcnt >= 0) {
696 if (*fmt == ')')
697 --pcount;
698 else if (*fmt == '(')
699 ++pcount;
700 fmt++;
701 }
702 keylen = fmt - keystart - 1;
703 if (fmtcnt < 0 || pcount > 0) {
704 PyErr_SetString(PyExc_ValueError,
705 "incomplete format key");
706 goto error;
707 }
708 key = PyBytes_FromStringAndSize(keystart,
709 keylen);
710 if (key == NULL)
711 goto error;
712 if (args_owned) {
713 Py_DECREF(args);
714 args_owned = 0;
715 }
716 args = PyObject_GetItem(dict, key);
717 Py_DECREF(key);
718 if (args == NULL) {
719 goto error;
720 }
721 args_owned = 1;
722 arglen = -1;
723 argidx = -2;
724 }
725
726 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
727 while (--fmtcnt >= 0) {
728 switch (c = *fmt++) {
729 case '-': flags |= F_LJUST; continue;
730 case '+': flags |= F_SIGN; continue;
731 case ' ': flags |= F_BLANK; continue;
732 case '#': flags |= F_ALT; continue;
733 case '0': flags |= F_ZERO; continue;
734 }
735 break;
736 }
737
738 /* Parse width. Example: "%10s" => width=10 */
739 if (c == '*') {
740 v = getnextarg(args, arglen, &argidx);
741 if (v == NULL)
742 goto error;
743 if (!PyLong_Check(v)) {
744 PyErr_SetString(PyExc_TypeError,
745 "* wants int");
746 goto error;
747 }
748 width = PyLong_AsSsize_t(v);
749 if (width == -1 && PyErr_Occurred())
750 goto error;
751 if (width < 0) {
752 flags |= F_LJUST;
753 width = -width;
754 }
755 if (--fmtcnt >= 0)
756 c = *fmt++;
757 }
758 else if (c >= 0 && isdigit(c)) {
759 width = c - '0';
760 while (--fmtcnt >= 0) {
761 c = Py_CHARMASK(*fmt++);
762 if (!isdigit(c))
763 break;
764 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
765 PyErr_SetString(
766 PyExc_ValueError,
767 "width too big");
768 goto error;
769 }
770 width = width*10 + (c - '0');
771 }
772 }
773
774 /* Parse precision. Example: "%.3f" => prec=3 */
775 if (c == '.') {
776 prec = 0;
777 if (--fmtcnt >= 0)
778 c = *fmt++;
779 if (c == '*') {
780 v = getnextarg(args, arglen, &argidx);
781 if (v == NULL)
782 goto error;
783 if (!PyLong_Check(v)) {
784 PyErr_SetString(
785 PyExc_TypeError,
786 "* wants int");
787 goto error;
788 }
789 prec = _PyLong_AsInt(v);
790 if (prec == -1 && PyErr_Occurred())
791 goto error;
792 if (prec < 0)
793 prec = 0;
794 if (--fmtcnt >= 0)
795 c = *fmt++;
796 }
797 else if (c >= 0 && isdigit(c)) {
798 prec = c - '0';
799 while (--fmtcnt >= 0) {
800 c = Py_CHARMASK(*fmt++);
801 if (!isdigit(c))
802 break;
803 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
804 PyErr_SetString(
805 PyExc_ValueError,
806 "prec too big");
807 goto error;
808 }
809 prec = prec*10 + (c - '0');
810 }
811 }
812 } /* prec */
813 if (fmtcnt >= 0) {
814 if (c == 'h' || c == 'l' || c == 'L') {
815 if (--fmtcnt >= 0)
816 c = *fmt++;
817 }
818 }
819 if (fmtcnt < 0) {
820 PyErr_SetString(PyExc_ValueError,
821 "incomplete format");
822 goto error;
823 }
824 v = getnextarg(args, arglen, &argidx);
825 if (v == NULL)
826 goto error;
827
828 if (fmtcnt == 0) {
829 /* last write: disable writer overallocation */
830 writer.overallocate = 0;
831 }
832
833 sign = 0;
834 fill = ' ';
835 switch (c) {
836 case 'r':
837 // %r is only for 2/3 code; 3 only code should use %a
838 case 'a':
839 temp = PyObject_ASCII(v);
840 if (temp == NULL)
841 goto error;
842 assert(PyUnicode_IS_ASCII(temp));
843 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
844 len = PyUnicode_GET_LENGTH(temp);
845 if (prec >= 0 && len > prec)
846 len = prec;
847 break;
848
849 case 's':
850 // %s is only for 2/3 code; 3 only code should use %b
851 case 'b':
852 temp = format_obj(v, &pbuf, &len);
853 if (temp == NULL)
854 goto error;
855 if (prec >= 0 && len > prec)
856 len = prec;
857 break;
858
859 case 'i':
860 case 'd':
861 case 'u':
862 case 'o':
863 case 'x':
864 case 'X':
865 if (PyLong_CheckExact(v)
866 && width == -1 && prec == -1
867 && !(flags & (F_SIGN | F_BLANK))
868 && c != 'X')
869 {
870 /* Fast path */
871 int alternate = flags & F_ALT;
872 int base;
873
874 switch(c)
875 {
876 default:
877 Py_UNREACHABLE();
878 case 'd':
879 case 'i':
880 case 'u':
881 base = 10;
882 break;
883 case 'o':
884 base = 8;
885 break;
886 case 'x':
887 case 'X':
888 base = 16;
889 break;
890 }
891
892 /* Fast path */
893 writer.min_size -= 2; /* size preallocated for "%d" */
894 res = _PyLong_FormatBytesWriter(&writer, res,
895 v, base, alternate);
896 if (res == NULL)
897 goto error;
898 continue;
899 }
900
901 temp = formatlong(v, flags, prec, c);
902 if (!temp)
903 goto error;
904 assert(PyUnicode_IS_ASCII(temp));
905 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
906 len = PyUnicode_GET_LENGTH(temp);
907 sign = 1;
908 if (flags & F_ZERO)
909 fill = '0';
910 break;
911
912 case 'e':
913 case 'E':
914 case 'f':
915 case 'F':
916 case 'g':
917 case 'G':
918 if (width == -1 && prec == -1
919 && !(flags & (F_SIGN | F_BLANK)))
920 {
921 /* Fast path */
922 writer.min_size -= 2; /* size preallocated for "%f" */
923 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
924 if (res == NULL)
925 goto error;
926 continue;
927 }
928
929 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
930 goto error;
931 pbuf = PyBytes_AS_STRING(temp);
932 len = PyBytes_GET_SIZE(temp);
933 sign = 1;
934 if (flags & F_ZERO)
935 fill = '0';
936 break;
937
938 case 'c':
939 pbuf = &onechar;
940 len = byte_converter(v, &onechar);
941 if (!len)
942 goto error;
943 if (width == -1) {
944 /* Fast path */
945 *res++ = onechar;
946 continue;
947 }
948 break;
949
950 default:
951 PyErr_Format(PyExc_ValueError,
952 "unsupported format character '%c' (0x%x) "
953 "at index %zd",
954 c, c,
955 (Py_ssize_t)(fmt - 1 - format));
956 goto error;
957 }
958
959 if (sign) {
960 if (*pbuf == '-' || *pbuf == '+') {
961 sign = *pbuf++;
962 len--;
963 }
964 else if (flags & F_SIGN)
965 sign = '+';
966 else if (flags & F_BLANK)
967 sign = ' ';
968 else
969 sign = 0;
970 }
971 if (width < len)
972 width = len;
973
974 alloc = width;
975 if (sign != 0 && len == width)
976 alloc++;
977 /* 2: size preallocated for %s */
978 if (alloc > 2) {
979 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
980 if (res == NULL)
981 goto error;
982 }
983 #ifdef Py_DEBUG
984 before = res;
985 #endif
986
987 /* Write the sign if needed */
988 if (sign) {
989 if (fill != ' ')
990 *res++ = sign;
991 if (width > len)
992 width--;
993 }
994
995 /* Write the numeric prefix for "x", "X" and "o" formats
996 if the alternate form is used.
997 For example, write "0x" for the "%#x" format. */
998 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
999 assert(pbuf[0] == '0');
1000 assert(pbuf[1] == c);
1001 if (fill != ' ') {
1002 *res++ = *pbuf++;
1003 *res++ = *pbuf++;
1004 }
1005 width -= 2;
1006 if (width < 0)
1007 width = 0;
1008 len -= 2;
1009 }
1010
1011 /* Pad left with the fill character if needed */
1012 if (width > len && !(flags & F_LJUST)) {
1013 memset(res, fill, width - len);
1014 res += (width - len);
1015 width = len;
1016 }
1017
1018 /* If padding with spaces: write sign if needed and/or numeric
1019 prefix if the alternate form is used */
1020 if (fill == ' ') {
1021 if (sign)
1022 *res++ = sign;
1023 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1024 assert(pbuf[0] == '0');
1025 assert(pbuf[1] == c);
1026 *res++ = *pbuf++;
1027 *res++ = *pbuf++;
1028 }
1029 }
1030
1031 /* Copy bytes */
1032 memcpy(res, pbuf, len);
1033 res += len;
1034
1035 /* Pad right with the fill character if needed */
1036 if (width > len) {
1037 memset(res, ' ', width - len);
1038 res += (width - len);
1039 }
1040
1041 if (dict && (argidx < arglen)) {
1042 PyErr_SetString(PyExc_TypeError,
1043 "not all arguments converted during bytes formatting");
1044 Py_XDECREF(temp);
1045 goto error;
1046 }
1047 Py_XDECREF(temp);
1048
1049 #ifdef Py_DEBUG
1050 /* check that we computed the exact size for this write */
1051 assert((res - before) == alloc);
1052 #endif
1053 } /* '%' */
1054
1055 /* If overallocation was disabled, ensure that it was the last
1056 write. Otherwise, we missed an optimization */
1057 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1058 } /* until end */
1059
1060 if (argidx < arglen && !dict) {
1061 PyErr_SetString(PyExc_TypeError,
1062 "not all arguments converted during bytes formatting");
1063 goto error;
1064 }
1065
1066 if (args_owned) {
1067 Py_DECREF(args);
1068 }
1069 return _PyBytesWriter_Finish(&writer, res);
1070
1071 error:
1072 _PyBytesWriter_Dealloc(&writer);
1073 if (args_owned) {
1074 Py_DECREF(args);
1075 }
1076 return NULL;
1077 }
1078
1079 /* =-= */
1080
1081 static void
bytes_dealloc(PyObject * op)1082 bytes_dealloc(PyObject *op)
1083 {
1084 Py_TYPE(op)->tp_free(op);
1085 }
1086
1087 /* Unescape a backslash-escaped string. If unicode is non-zero,
1088 the string is a u-literal. If recode_encoding is non-zero,
1089 the string is UTF-8 encoded and should be re-encoded in the
1090 specified encoding. */
1091
1092 static char *
_PyBytes_DecodeEscapeRecode(const char ** s,const char * end,const char * errors,const char * recode_encoding,_PyBytesWriter * writer,char * p)1093 _PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1094 const char *errors, const char *recode_encoding,
1095 _PyBytesWriter *writer, char *p)
1096 {
1097 PyObject *u, *w;
1098 const char* t;
1099
1100 t = *s;
1101 /* Decode non-ASCII bytes as UTF-8. */
1102 while (t < end && (*t & 0x80))
1103 t++;
1104 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1105 if (u == NULL)
1106 return NULL;
1107
1108 /* Recode them in target encoding. */
1109 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1110 Py_DECREF(u);
1111 if (w == NULL)
1112 return NULL;
1113 assert(PyBytes_Check(w));
1114
1115 /* Append bytes to output buffer. */
1116 writer->min_size--; /* subtract 1 preallocated byte */
1117 p = _PyBytesWriter_WriteBytes(writer, p,
1118 PyBytes_AS_STRING(w),
1119 PyBytes_GET_SIZE(w));
1120 Py_DECREF(w);
1121 if (p == NULL)
1122 return NULL;
1123
1124 *s = t;
1125 return p;
1126 }
1127
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding,const char ** first_invalid_escape)1128 PyObject *_PyBytes_DecodeEscape(const char *s,
1129 Py_ssize_t len,
1130 const char *errors,
1131 Py_ssize_t unicode,
1132 const char *recode_encoding,
1133 const char **first_invalid_escape)
1134 {
1135 int c;
1136 char *p;
1137 const char *end;
1138 _PyBytesWriter writer;
1139
1140 _PyBytesWriter_Init(&writer);
1141
1142 p = _PyBytesWriter_Alloc(&writer, len);
1143 if (p == NULL)
1144 return NULL;
1145 writer.overallocate = 1;
1146
1147 *first_invalid_escape = NULL;
1148
1149 end = s + len;
1150 while (s < end) {
1151 if (*s != '\\') {
1152 non_esc:
1153 if (!(recode_encoding && (*s & 0x80))) {
1154 *p++ = *s++;
1155 }
1156 else {
1157 /* non-ASCII character and need to recode */
1158 p = _PyBytes_DecodeEscapeRecode(&s, end,
1159 errors, recode_encoding,
1160 &writer, p);
1161 if (p == NULL)
1162 goto failed;
1163 }
1164 continue;
1165 }
1166
1167 s++;
1168 if (s == end) {
1169 PyErr_SetString(PyExc_ValueError,
1170 "Trailing \\ in string");
1171 goto failed;
1172 }
1173
1174 switch (*s++) {
1175 /* XXX This assumes ASCII! */
1176 case '\n': break;
1177 case '\\': *p++ = '\\'; break;
1178 case '\'': *p++ = '\''; break;
1179 case '\"': *p++ = '\"'; break;
1180 case 'b': *p++ = '\b'; break;
1181 case 'f': *p++ = '\014'; break; /* FF */
1182 case 't': *p++ = '\t'; break;
1183 case 'n': *p++ = '\n'; break;
1184 case 'r': *p++ = '\r'; break;
1185 case 'v': *p++ = '\013'; break; /* VT */
1186 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1187 case '0': case '1': case '2': case '3':
1188 case '4': case '5': case '6': case '7':
1189 c = s[-1] - '0';
1190 if (s < end && '0' <= *s && *s <= '7') {
1191 c = (c<<3) + *s++ - '0';
1192 if (s < end && '0' <= *s && *s <= '7')
1193 c = (c<<3) + *s++ - '0';
1194 }
1195 *p++ = c;
1196 break;
1197 case 'x':
1198 if (s+1 < end) {
1199 int digit1, digit2;
1200 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1201 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1202 if (digit1 < 16 && digit2 < 16) {
1203 *p++ = (unsigned char)((digit1 << 4) + digit2);
1204 s += 2;
1205 break;
1206 }
1207 }
1208 /* invalid hexadecimal digits */
1209
1210 if (!errors || strcmp(errors, "strict") == 0) {
1211 PyErr_Format(PyExc_ValueError,
1212 "invalid \\x escape at position %d",
1213 s - 2 - (end - len));
1214 goto failed;
1215 }
1216 if (strcmp(errors, "replace") == 0) {
1217 *p++ = '?';
1218 } else if (strcmp(errors, "ignore") == 0)
1219 /* do nothing */;
1220 else {
1221 PyErr_Format(PyExc_ValueError,
1222 "decoding error; unknown "
1223 "error handling code: %.400s",
1224 errors);
1225 goto failed;
1226 }
1227 /* skip \x */
1228 if (s < end && Py_ISXDIGIT(s[0]))
1229 s++; /* and a hexdigit */
1230 break;
1231
1232 default:
1233 if (*first_invalid_escape == NULL) {
1234 *first_invalid_escape = s-1; /* Back up one char, since we've
1235 already incremented s. */
1236 }
1237 *p++ = '\\';
1238 s--;
1239 goto non_esc; /* an arbitrary number of unescaped
1240 UTF-8 bytes may follow. */
1241 }
1242 }
1243
1244 return _PyBytesWriter_Finish(&writer, p);
1245
1246 failed:
1247 _PyBytesWriter_Dealloc(&writer);
1248 return NULL;
1249 }
1250
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)1251 PyObject *PyBytes_DecodeEscape(const char *s,
1252 Py_ssize_t len,
1253 const char *errors,
1254 Py_ssize_t unicode,
1255 const char *recode_encoding)
1256 {
1257 const char* first_invalid_escape;
1258 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1259 recode_encoding,
1260 &first_invalid_escape);
1261 if (result == NULL)
1262 return NULL;
1263 if (first_invalid_escape != NULL) {
1264 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1265 "invalid escape sequence '\\%c'",
1266 (unsigned char)*first_invalid_escape) < 0) {
1267 Py_DECREF(result);
1268 return NULL;
1269 }
1270 }
1271 return result;
1272
1273 }
1274 /* -------------------------------------------------------------------- */
1275 /* object api */
1276
1277 Py_ssize_t
PyBytes_Size(PyObject * op)1278 PyBytes_Size(PyObject *op)
1279 {
1280 if (!PyBytes_Check(op)) {
1281 PyErr_Format(PyExc_TypeError,
1282 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1283 return -1;
1284 }
1285 return Py_SIZE(op);
1286 }
1287
1288 char *
PyBytes_AsString(PyObject * op)1289 PyBytes_AsString(PyObject *op)
1290 {
1291 if (!PyBytes_Check(op)) {
1292 PyErr_Format(PyExc_TypeError,
1293 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1294 return NULL;
1295 }
1296 return ((PyBytesObject *)op)->ob_sval;
1297 }
1298
1299 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1300 PyBytes_AsStringAndSize(PyObject *obj,
1301 char **s,
1302 Py_ssize_t *len)
1303 {
1304 if (s == NULL) {
1305 PyErr_BadInternalCall();
1306 return -1;
1307 }
1308
1309 if (!PyBytes_Check(obj)) {
1310 PyErr_Format(PyExc_TypeError,
1311 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1312 return -1;
1313 }
1314
1315 *s = PyBytes_AS_STRING(obj);
1316 if (len != NULL)
1317 *len = PyBytes_GET_SIZE(obj);
1318 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1319 PyErr_SetString(PyExc_ValueError,
1320 "embedded null byte");
1321 return -1;
1322 }
1323 return 0;
1324 }
1325
1326 /* -------------------------------------------------------------------- */
1327 /* Methods */
1328
1329 #include "stringlib/stringdefs.h"
1330
1331 #include "stringlib/fastsearch.h"
1332 #include "stringlib/count.h"
1333 #include "stringlib/find.h"
1334 #include "stringlib/join.h"
1335 #include "stringlib/partition.h"
1336 #include "stringlib/split.h"
1337 #include "stringlib/ctype.h"
1338
1339 #include "stringlib/transmogrify.h"
1340
1341 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1342 PyBytes_Repr(PyObject *obj, int smartquotes)
1343 {
1344 PyBytesObject* op = (PyBytesObject*) obj;
1345 Py_ssize_t i, length = Py_SIZE(op);
1346 Py_ssize_t newsize, squotes, dquotes;
1347 PyObject *v;
1348 unsigned char quote, *s, *p;
1349
1350 /* Compute size of output string */
1351 squotes = dquotes = 0;
1352 newsize = 3; /* b'' */
1353 s = (unsigned char*)op->ob_sval;
1354 for (i = 0; i < length; i++) {
1355 Py_ssize_t incr = 1;
1356 switch(s[i]) {
1357 case '\'': squotes++; break;
1358 case '"': dquotes++; break;
1359 case '\\': case '\t': case '\n': case '\r':
1360 incr = 2; break; /* \C */
1361 default:
1362 if (s[i] < ' ' || s[i] >= 0x7f)
1363 incr = 4; /* \xHH */
1364 }
1365 if (newsize > PY_SSIZE_T_MAX - incr)
1366 goto overflow;
1367 newsize += incr;
1368 }
1369 quote = '\'';
1370 if (smartquotes && squotes && !dquotes)
1371 quote = '"';
1372 if (squotes && quote == '\'') {
1373 if (newsize > PY_SSIZE_T_MAX - squotes)
1374 goto overflow;
1375 newsize += squotes;
1376 }
1377
1378 v = PyUnicode_New(newsize, 127);
1379 if (v == NULL) {
1380 return NULL;
1381 }
1382 p = PyUnicode_1BYTE_DATA(v);
1383
1384 *p++ = 'b', *p++ = quote;
1385 for (i = 0; i < length; i++) {
1386 unsigned char c = op->ob_sval[i];
1387 if (c == quote || c == '\\')
1388 *p++ = '\\', *p++ = c;
1389 else if (c == '\t')
1390 *p++ = '\\', *p++ = 't';
1391 else if (c == '\n')
1392 *p++ = '\\', *p++ = 'n';
1393 else if (c == '\r')
1394 *p++ = '\\', *p++ = 'r';
1395 else if (c < ' ' || c >= 0x7f) {
1396 *p++ = '\\';
1397 *p++ = 'x';
1398 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1399 *p++ = Py_hexdigits[c & 0xf];
1400 }
1401 else
1402 *p++ = c;
1403 }
1404 *p++ = quote;
1405 assert(_PyUnicode_CheckConsistency(v, 1));
1406 return v;
1407
1408 overflow:
1409 PyErr_SetString(PyExc_OverflowError,
1410 "bytes object is too large to make repr");
1411 return NULL;
1412 }
1413
1414 static PyObject *
bytes_repr(PyObject * op)1415 bytes_repr(PyObject *op)
1416 {
1417 return PyBytes_Repr(op, 1);
1418 }
1419
1420 static PyObject *
bytes_str(PyObject * op)1421 bytes_str(PyObject *op)
1422 {
1423 if (Py_BytesWarningFlag) {
1424 if (PyErr_WarnEx(PyExc_BytesWarning,
1425 "str() on a bytes instance", 1))
1426 return NULL;
1427 }
1428 return bytes_repr(op);
1429 }
1430
1431 static Py_ssize_t
bytes_length(PyBytesObject * a)1432 bytes_length(PyBytesObject *a)
1433 {
1434 return Py_SIZE(a);
1435 }
1436
1437 /* This is also used by PyBytes_Concat() */
1438 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1439 bytes_concat(PyObject *a, PyObject *b)
1440 {
1441 Py_buffer va, vb;
1442 PyObject *result = NULL;
1443
1444 va.len = -1;
1445 vb.len = -1;
1446 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1447 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1448 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1449 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1450 goto done;
1451 }
1452
1453 /* Optimize end cases */
1454 if (va.len == 0 && PyBytes_CheckExact(b)) {
1455 result = b;
1456 Py_INCREF(result);
1457 goto done;
1458 }
1459 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1460 result = a;
1461 Py_INCREF(result);
1462 goto done;
1463 }
1464
1465 if (va.len > PY_SSIZE_T_MAX - vb.len) {
1466 PyErr_NoMemory();
1467 goto done;
1468 }
1469
1470 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1471 if (result != NULL) {
1472 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1473 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1474 }
1475
1476 done:
1477 if (va.len != -1)
1478 PyBuffer_Release(&va);
1479 if (vb.len != -1)
1480 PyBuffer_Release(&vb);
1481 return result;
1482 }
1483
1484 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1485 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1486 {
1487 Py_ssize_t i;
1488 Py_ssize_t j;
1489 Py_ssize_t size;
1490 PyBytesObject *op;
1491 size_t nbytes;
1492 if (n < 0)
1493 n = 0;
1494 /* watch out for overflows: the size can overflow int,
1495 * and the # of bytes needed can overflow size_t
1496 */
1497 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1498 PyErr_SetString(PyExc_OverflowError,
1499 "repeated bytes are too long");
1500 return NULL;
1501 }
1502 size = Py_SIZE(a) * n;
1503 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1504 Py_INCREF(a);
1505 return (PyObject *)a;
1506 }
1507 nbytes = (size_t)size;
1508 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1509 PyErr_SetString(PyExc_OverflowError,
1510 "repeated bytes are too long");
1511 return NULL;
1512 }
1513 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1514 if (op == NULL)
1515 return PyErr_NoMemory();
1516 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1517 op->ob_shash = -1;
1518 op->ob_sval[size] = '\0';
1519 if (Py_SIZE(a) == 1 && n > 0) {
1520 memset(op->ob_sval, a->ob_sval[0] , n);
1521 return (PyObject *) op;
1522 }
1523 i = 0;
1524 if (i < size) {
1525 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1526 i = Py_SIZE(a);
1527 }
1528 while (i < size) {
1529 j = (i <= size-i) ? i : size-i;
1530 memcpy(op->ob_sval+i, op->ob_sval, j);
1531 i += j;
1532 }
1533 return (PyObject *) op;
1534 }
1535
1536 static int
bytes_contains(PyObject * self,PyObject * arg)1537 bytes_contains(PyObject *self, PyObject *arg)
1538 {
1539 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1540 }
1541
1542 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1543 bytes_item(PyBytesObject *a, Py_ssize_t i)
1544 {
1545 if (i < 0 || i >= Py_SIZE(a)) {
1546 PyErr_SetString(PyExc_IndexError, "index out of range");
1547 return NULL;
1548 }
1549 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1550 }
1551
1552 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1553 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1554 {
1555 int cmp;
1556 Py_ssize_t len;
1557
1558 len = Py_SIZE(a);
1559 if (Py_SIZE(b) != len)
1560 return 0;
1561
1562 if (a->ob_sval[0] != b->ob_sval[0])
1563 return 0;
1564
1565 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1566 return (cmp == 0);
1567 }
1568
1569 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1570 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1571 {
1572 int c;
1573 Py_ssize_t len_a, len_b;
1574 Py_ssize_t min_len;
1575 int rc;
1576
1577 /* Make sure both arguments are strings. */
1578 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1579 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1580 rc = PyObject_IsInstance((PyObject*)a,
1581 (PyObject*)&PyUnicode_Type);
1582 if (!rc)
1583 rc = PyObject_IsInstance((PyObject*)b,
1584 (PyObject*)&PyUnicode_Type);
1585 if (rc < 0)
1586 return NULL;
1587 if (rc) {
1588 if (PyErr_WarnEx(PyExc_BytesWarning,
1589 "Comparison between bytes and string", 1))
1590 return NULL;
1591 }
1592 else {
1593 rc = PyObject_IsInstance((PyObject*)a,
1594 (PyObject*)&PyLong_Type);
1595 if (!rc)
1596 rc = PyObject_IsInstance((PyObject*)b,
1597 (PyObject*)&PyLong_Type);
1598 if (rc < 0)
1599 return NULL;
1600 if (rc) {
1601 if (PyErr_WarnEx(PyExc_BytesWarning,
1602 "Comparison between bytes and int", 1))
1603 return NULL;
1604 }
1605 }
1606 }
1607 Py_RETURN_NOTIMPLEMENTED;
1608 }
1609 else if (a == b) {
1610 switch (op) {
1611 case Py_EQ:
1612 case Py_LE:
1613 case Py_GE:
1614 /* a string is equal to itself */
1615 Py_RETURN_TRUE;
1616 break;
1617 case Py_NE:
1618 case Py_LT:
1619 case Py_GT:
1620 Py_RETURN_FALSE;
1621 break;
1622 default:
1623 PyErr_BadArgument();
1624 return NULL;
1625 }
1626 }
1627 else if (op == Py_EQ || op == Py_NE) {
1628 int eq = bytes_compare_eq(a, b);
1629 eq ^= (op == Py_NE);
1630 return PyBool_FromLong(eq);
1631 }
1632 else {
1633 len_a = Py_SIZE(a);
1634 len_b = Py_SIZE(b);
1635 min_len = Py_MIN(len_a, len_b);
1636 if (min_len > 0) {
1637 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1638 if (c == 0)
1639 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1640 }
1641 else
1642 c = 0;
1643 if (c != 0)
1644 Py_RETURN_RICHCOMPARE(c, 0, op);
1645 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1646 }
1647 }
1648
1649 static Py_hash_t
bytes_hash(PyBytesObject * a)1650 bytes_hash(PyBytesObject *a)
1651 {
1652 if (a->ob_shash == -1) {
1653 /* Can't fail */
1654 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1655 }
1656 return a->ob_shash;
1657 }
1658
1659 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1660 bytes_subscript(PyBytesObject* self, PyObject* item)
1661 {
1662 if (PyIndex_Check(item)) {
1663 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1664 if (i == -1 && PyErr_Occurred())
1665 return NULL;
1666 if (i < 0)
1667 i += PyBytes_GET_SIZE(self);
1668 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1669 PyErr_SetString(PyExc_IndexError,
1670 "index out of range");
1671 return NULL;
1672 }
1673 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1674 }
1675 else if (PySlice_Check(item)) {
1676 Py_ssize_t start, stop, step, slicelength, cur, i;
1677 char* source_buf;
1678 char* result_buf;
1679 PyObject* result;
1680
1681 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1682 return NULL;
1683 }
1684 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1685 &stop, step);
1686
1687 if (slicelength <= 0) {
1688 return PyBytes_FromStringAndSize("", 0);
1689 }
1690 else if (start == 0 && step == 1 &&
1691 slicelength == PyBytes_GET_SIZE(self) &&
1692 PyBytes_CheckExact(self)) {
1693 Py_INCREF(self);
1694 return (PyObject *)self;
1695 }
1696 else if (step == 1) {
1697 return PyBytes_FromStringAndSize(
1698 PyBytes_AS_STRING(self) + start,
1699 slicelength);
1700 }
1701 else {
1702 source_buf = PyBytes_AS_STRING(self);
1703 result = PyBytes_FromStringAndSize(NULL, slicelength);
1704 if (result == NULL)
1705 return NULL;
1706
1707 result_buf = PyBytes_AS_STRING(result);
1708 for (cur = start, i = 0; i < slicelength;
1709 cur += step, i++) {
1710 result_buf[i] = source_buf[cur];
1711 }
1712
1713 return result;
1714 }
1715 }
1716 else {
1717 PyErr_Format(PyExc_TypeError,
1718 "byte indices must be integers or slices, not %.200s",
1719 Py_TYPE(item)->tp_name);
1720 return NULL;
1721 }
1722 }
1723
1724 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1725 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1726 {
1727 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1728 1, flags);
1729 }
1730
1731 static PySequenceMethods bytes_as_sequence = {
1732 (lenfunc)bytes_length, /*sq_length*/
1733 (binaryfunc)bytes_concat, /*sq_concat*/
1734 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1735 (ssizeargfunc)bytes_item, /*sq_item*/
1736 0, /*sq_slice*/
1737 0, /*sq_ass_item*/
1738 0, /*sq_ass_slice*/
1739 (objobjproc)bytes_contains /*sq_contains*/
1740 };
1741
1742 static PyMappingMethods bytes_as_mapping = {
1743 (lenfunc)bytes_length,
1744 (binaryfunc)bytes_subscript,
1745 0,
1746 };
1747
1748 static PyBufferProcs bytes_as_buffer = {
1749 (getbufferproc)bytes_buffer_getbuffer,
1750 NULL,
1751 };
1752
1753
1754 #define LEFTSTRIP 0
1755 #define RIGHTSTRIP 1
1756 #define BOTHSTRIP 2
1757
1758 /*[clinic input]
1759 bytes.split
1760
1761 sep: object = None
1762 The delimiter according which to split the bytes.
1763 None (the default value) means split on ASCII whitespace characters
1764 (space, tab, return, newline, formfeed, vertical tab).
1765 maxsplit: Py_ssize_t = -1
1766 Maximum number of splits to do.
1767 -1 (the default value) means no limit.
1768
1769 Return a list of the sections in the bytes, using sep as the delimiter.
1770 [clinic start generated code]*/
1771
1772 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1773 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1774 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1775 {
1776 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1777 const char *s = PyBytes_AS_STRING(self), *sub;
1778 Py_buffer vsub;
1779 PyObject *list;
1780
1781 if (maxsplit < 0)
1782 maxsplit = PY_SSIZE_T_MAX;
1783 if (sep == Py_None)
1784 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1785 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1786 return NULL;
1787 sub = vsub.buf;
1788 n = vsub.len;
1789
1790 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1791 PyBuffer_Release(&vsub);
1792 return list;
1793 }
1794
1795 /*[clinic input]
1796 bytes.partition
1797
1798 sep: Py_buffer
1799 /
1800
1801 Partition the bytes into three parts using the given separator.
1802
1803 This will search for the separator sep in the bytes. If the separator is found,
1804 returns a 3-tuple containing the part before the separator, the separator
1805 itself, and the part after it.
1806
1807 If the separator is not found, returns a 3-tuple containing the original bytes
1808 object and two empty bytes objects.
1809 [clinic start generated code]*/
1810
1811 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1812 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1813 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1814 {
1815 return stringlib_partition(
1816 (PyObject*) self,
1817 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1818 sep->obj, (const char *)sep->buf, sep->len
1819 );
1820 }
1821
1822 /*[clinic input]
1823 bytes.rpartition
1824
1825 sep: Py_buffer
1826 /
1827
1828 Partition the bytes into three parts using the given separator.
1829
1830 This will search for the separator sep in the bytes, starting at the end. If
1831 the separator is found, returns a 3-tuple containing the part before the
1832 separator, the separator itself, and the part after it.
1833
1834 If the separator is not found, returns a 3-tuple containing two empty bytes
1835 objects and the original bytes object.
1836 [clinic start generated code]*/
1837
1838 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1839 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1840 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1841 {
1842 return stringlib_rpartition(
1843 (PyObject*) self,
1844 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1845 sep->obj, (const char *)sep->buf, sep->len
1846 );
1847 }
1848
1849 /*[clinic input]
1850 bytes.rsplit = bytes.split
1851
1852 Return a list of the sections in the bytes, using sep as the delimiter.
1853
1854 Splitting is done starting at the end of the bytes and working to the front.
1855 [clinic start generated code]*/
1856
1857 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1858 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1859 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1860 {
1861 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1862 const char *s = PyBytes_AS_STRING(self), *sub;
1863 Py_buffer vsub;
1864 PyObject *list;
1865
1866 if (maxsplit < 0)
1867 maxsplit = PY_SSIZE_T_MAX;
1868 if (sep == Py_None)
1869 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1870 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1871 return NULL;
1872 sub = vsub.buf;
1873 n = vsub.len;
1874
1875 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1876 PyBuffer_Release(&vsub);
1877 return list;
1878 }
1879
1880
1881 /*[clinic input]
1882 bytes.join
1883
1884 iterable_of_bytes: object
1885 /
1886
1887 Concatenate any number of bytes objects.
1888
1889 The bytes whose method is called is inserted in between each pair.
1890
1891 The result is returned as a new bytes object.
1892
1893 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1894 [clinic start generated code]*/
1895
1896 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1897 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1898 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1899 {
1900 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1901 }
1902
1903 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1904 _PyBytes_Join(PyObject *sep, PyObject *x)
1905 {
1906 assert(sep != NULL && PyBytes_Check(sep));
1907 assert(x != NULL);
1908 return bytes_join((PyBytesObject*)sep, x);
1909 }
1910
1911 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1912 bytes_find(PyBytesObject *self, PyObject *args)
1913 {
1914 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1915 }
1916
1917 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1918 bytes_index(PyBytesObject *self, PyObject *args)
1919 {
1920 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1921 }
1922
1923
1924 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1925 bytes_rfind(PyBytesObject *self, PyObject *args)
1926 {
1927 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1928 }
1929
1930
1931 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1932 bytes_rindex(PyBytesObject *self, PyObject *args)
1933 {
1934 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1935 }
1936
1937
1938 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1939 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1940 {
1941 Py_buffer vsep;
1942 char *s = PyBytes_AS_STRING(self);
1943 Py_ssize_t len = PyBytes_GET_SIZE(self);
1944 char *sep;
1945 Py_ssize_t seplen;
1946 Py_ssize_t i, j;
1947
1948 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1949 return NULL;
1950 sep = vsep.buf;
1951 seplen = vsep.len;
1952
1953 i = 0;
1954 if (striptype != RIGHTSTRIP) {
1955 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1956 i++;
1957 }
1958 }
1959
1960 j = len;
1961 if (striptype != LEFTSTRIP) {
1962 do {
1963 j--;
1964 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1965 j++;
1966 }
1967
1968 PyBuffer_Release(&vsep);
1969
1970 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1971 Py_INCREF(self);
1972 return (PyObject*)self;
1973 }
1974 else
1975 return PyBytes_FromStringAndSize(s+i, j-i);
1976 }
1977
1978
1979 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1980 do_strip(PyBytesObject *self, int striptype)
1981 {
1982 char *s = PyBytes_AS_STRING(self);
1983 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1984
1985 i = 0;
1986 if (striptype != RIGHTSTRIP) {
1987 while (i < len && Py_ISSPACE(s[i])) {
1988 i++;
1989 }
1990 }
1991
1992 j = len;
1993 if (striptype != LEFTSTRIP) {
1994 do {
1995 j--;
1996 } while (j >= i && Py_ISSPACE(s[j]));
1997 j++;
1998 }
1999
2000 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2001 Py_INCREF(self);
2002 return (PyObject*)self;
2003 }
2004 else
2005 return PyBytes_FromStringAndSize(s+i, j-i);
2006 }
2007
2008
2009 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)2010 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2011 {
2012 if (bytes != NULL && bytes != Py_None) {
2013 return do_xstrip(self, striptype, bytes);
2014 }
2015 return do_strip(self, striptype);
2016 }
2017
2018 /*[clinic input]
2019 bytes.strip
2020
2021 bytes: object = None
2022 /
2023
2024 Strip leading and trailing bytes contained in the argument.
2025
2026 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2027 [clinic start generated code]*/
2028
2029 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)2030 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2031 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2032 {
2033 return do_argstrip(self, BOTHSTRIP, bytes);
2034 }
2035
2036 /*[clinic input]
2037 bytes.lstrip
2038
2039 bytes: object = None
2040 /
2041
2042 Strip leading bytes contained in the argument.
2043
2044 If the argument is omitted or None, strip leading ASCII whitespace.
2045 [clinic start generated code]*/
2046
2047 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)2048 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2049 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2050 {
2051 return do_argstrip(self, LEFTSTRIP, bytes);
2052 }
2053
2054 /*[clinic input]
2055 bytes.rstrip
2056
2057 bytes: object = None
2058 /
2059
2060 Strip trailing bytes contained in the argument.
2061
2062 If the argument is omitted or None, strip trailing ASCII whitespace.
2063 [clinic start generated code]*/
2064
2065 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2066 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2067 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2068 {
2069 return do_argstrip(self, RIGHTSTRIP, bytes);
2070 }
2071
2072
2073 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2074 bytes_count(PyBytesObject *self, PyObject *args)
2075 {
2076 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2077 }
2078
2079
2080 /*[clinic input]
2081 bytes.translate
2082
2083 table: object
2084 Translation table, which must be a bytes object of length 256.
2085 /
2086 delete as deletechars: object(c_default="NULL") = b''
2087
2088 Return a copy with each character mapped by the given translation table.
2089
2090 All characters occurring in the optional argument delete are removed.
2091 The remaining characters are mapped through the given translation table.
2092 [clinic start generated code]*/
2093
2094 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2095 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2096 PyObject *deletechars)
2097 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2098 {
2099 char *input, *output;
2100 Py_buffer table_view = {NULL, NULL};
2101 Py_buffer del_table_view = {NULL, NULL};
2102 const char *table_chars;
2103 Py_ssize_t i, c, changed = 0;
2104 PyObject *input_obj = (PyObject*)self;
2105 const char *output_start, *del_table_chars=NULL;
2106 Py_ssize_t inlen, tablen, dellen = 0;
2107 PyObject *result;
2108 int trans_table[256];
2109
2110 if (PyBytes_Check(table)) {
2111 table_chars = PyBytes_AS_STRING(table);
2112 tablen = PyBytes_GET_SIZE(table);
2113 }
2114 else if (table == Py_None) {
2115 table_chars = NULL;
2116 tablen = 256;
2117 }
2118 else {
2119 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2120 return NULL;
2121 table_chars = table_view.buf;
2122 tablen = table_view.len;
2123 }
2124
2125 if (tablen != 256) {
2126 PyErr_SetString(PyExc_ValueError,
2127 "translation table must be 256 characters long");
2128 PyBuffer_Release(&table_view);
2129 return NULL;
2130 }
2131
2132 if (deletechars != NULL) {
2133 if (PyBytes_Check(deletechars)) {
2134 del_table_chars = PyBytes_AS_STRING(deletechars);
2135 dellen = PyBytes_GET_SIZE(deletechars);
2136 }
2137 else {
2138 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2139 PyBuffer_Release(&table_view);
2140 return NULL;
2141 }
2142 del_table_chars = del_table_view.buf;
2143 dellen = del_table_view.len;
2144 }
2145 }
2146 else {
2147 del_table_chars = NULL;
2148 dellen = 0;
2149 }
2150
2151 inlen = PyBytes_GET_SIZE(input_obj);
2152 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2153 if (result == NULL) {
2154 PyBuffer_Release(&del_table_view);
2155 PyBuffer_Release(&table_view);
2156 return NULL;
2157 }
2158 output_start = output = PyBytes_AS_STRING(result);
2159 input = PyBytes_AS_STRING(input_obj);
2160
2161 if (dellen == 0 && table_chars != NULL) {
2162 /* If no deletions are required, use faster code */
2163 for (i = inlen; --i >= 0; ) {
2164 c = Py_CHARMASK(*input++);
2165 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2166 changed = 1;
2167 }
2168 if (!changed && PyBytes_CheckExact(input_obj)) {
2169 Py_INCREF(input_obj);
2170 Py_DECREF(result);
2171 result = input_obj;
2172 }
2173 PyBuffer_Release(&del_table_view);
2174 PyBuffer_Release(&table_view);
2175 return result;
2176 }
2177
2178 if (table_chars == NULL) {
2179 for (i = 0; i < 256; i++)
2180 trans_table[i] = Py_CHARMASK(i);
2181 } else {
2182 for (i = 0; i < 256; i++)
2183 trans_table[i] = Py_CHARMASK(table_chars[i]);
2184 }
2185 PyBuffer_Release(&table_view);
2186
2187 for (i = 0; i < dellen; i++)
2188 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2189 PyBuffer_Release(&del_table_view);
2190
2191 for (i = inlen; --i >= 0; ) {
2192 c = Py_CHARMASK(*input++);
2193 if (trans_table[c] != -1)
2194 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2195 continue;
2196 changed = 1;
2197 }
2198 if (!changed && PyBytes_CheckExact(input_obj)) {
2199 Py_DECREF(result);
2200 Py_INCREF(input_obj);
2201 return input_obj;
2202 }
2203 /* Fix the size of the resulting string */
2204 if (inlen > 0)
2205 _PyBytes_Resize(&result, output - output_start);
2206 return result;
2207 }
2208
2209
2210 /*[clinic input]
2211
2212 @staticmethod
2213 bytes.maketrans
2214
2215 frm: Py_buffer
2216 to: Py_buffer
2217 /
2218
2219 Return a translation table useable for the bytes or bytearray translate method.
2220
2221 The returned table will be one where each byte in frm is mapped to the byte at
2222 the same position in to.
2223
2224 The bytes objects frm and to must be of the same length.
2225 [clinic start generated code]*/
2226
2227 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2228 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2229 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2230 {
2231 return _Py_bytes_maketrans(frm, to);
2232 }
2233
2234
2235 /*[clinic input]
2236 bytes.replace
2237
2238 old: Py_buffer
2239 new: Py_buffer
2240 count: Py_ssize_t = -1
2241 Maximum number of occurrences to replace.
2242 -1 (the default value) means replace all occurrences.
2243 /
2244
2245 Return a copy with all occurrences of substring old replaced by new.
2246
2247 If the optional argument count is given, only the first count occurrences are
2248 replaced.
2249 [clinic start generated code]*/
2250
2251 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2252 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2253 Py_ssize_t count)
2254 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2255 {
2256 return stringlib_replace((PyObject *)self,
2257 (const char *)old->buf, old->len,
2258 (const char *)new->buf, new->len, count);
2259 }
2260
2261 /** End DALKE **/
2262
2263
2264 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2265 bytes_startswith(PyBytesObject *self, PyObject *args)
2266 {
2267 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2268 }
2269
2270 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2271 bytes_endswith(PyBytesObject *self, PyObject *args)
2272 {
2273 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2274 }
2275
2276
2277 /*[clinic input]
2278 bytes.decode
2279
2280 encoding: str(c_default="NULL") = 'utf-8'
2281 The encoding with which to decode the bytes.
2282 errors: str(c_default="NULL") = 'strict'
2283 The error handling scheme to use for the handling of decoding errors.
2284 The default is 'strict' meaning that decoding errors raise a
2285 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2286 as well as any other name registered with codecs.register_error that
2287 can handle UnicodeDecodeErrors.
2288
2289 Decode the bytes using the codec registered for encoding.
2290 [clinic start generated code]*/
2291
2292 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2293 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2294 const char *errors)
2295 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2296 {
2297 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2298 }
2299
2300
2301 /*[clinic input]
2302 bytes.splitlines
2303
2304 keepends: bool(accept={int}) = False
2305
2306 Return a list of the lines in the bytes, breaking at line boundaries.
2307
2308 Line breaks are not included in the resulting list unless keepends is given and
2309 true.
2310 [clinic start generated code]*/
2311
2312 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2313 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2314 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2315 {
2316 return stringlib_splitlines(
2317 (PyObject*) self, PyBytes_AS_STRING(self),
2318 PyBytes_GET_SIZE(self), keepends
2319 );
2320 }
2321
2322 /*[clinic input]
2323 @classmethod
2324 bytes.fromhex
2325
2326 string: unicode
2327 /
2328
2329 Create a bytes object from a string of hexadecimal numbers.
2330
2331 Spaces between two numbers are accepted.
2332 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2333 [clinic start generated code]*/
2334
2335 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2336 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2337 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2338 {
2339 PyObject *result = _PyBytes_FromHex(string, 0);
2340 if (type != &PyBytes_Type && result != NULL) {
2341 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2342 result, NULL));
2343 }
2344 return result;
2345 }
2346
2347 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2348 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2349 {
2350 char *buf;
2351 Py_ssize_t hexlen, invalid_char;
2352 unsigned int top, bot;
2353 Py_UCS1 *str, *end;
2354 _PyBytesWriter writer;
2355
2356 _PyBytesWriter_Init(&writer);
2357 writer.use_bytearray = use_bytearray;
2358
2359 assert(PyUnicode_Check(string));
2360 if (PyUnicode_READY(string))
2361 return NULL;
2362 hexlen = PyUnicode_GET_LENGTH(string);
2363
2364 if (!PyUnicode_IS_ASCII(string)) {
2365 void *data = PyUnicode_DATA(string);
2366 unsigned int kind = PyUnicode_KIND(string);
2367 Py_ssize_t i;
2368
2369 /* search for the first non-ASCII character */
2370 for (i = 0; i < hexlen; i++) {
2371 if (PyUnicode_READ(kind, data, i) >= 128)
2372 break;
2373 }
2374 invalid_char = i;
2375 goto error;
2376 }
2377
2378 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2379 str = PyUnicode_1BYTE_DATA(string);
2380
2381 /* This overestimates if there are spaces */
2382 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2383 if (buf == NULL)
2384 return NULL;
2385
2386 end = str + hexlen;
2387 while (str < end) {
2388 /* skip over spaces in the input */
2389 if (Py_ISSPACE(*str)) {
2390 do {
2391 str++;
2392 } while (Py_ISSPACE(*str));
2393 if (str >= end)
2394 break;
2395 }
2396
2397 top = _PyLong_DigitValue[*str];
2398 if (top >= 16) {
2399 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2400 goto error;
2401 }
2402 str++;
2403
2404 bot = _PyLong_DigitValue[*str];
2405 if (bot >= 16) {
2406 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2407 goto error;
2408 }
2409 str++;
2410
2411 *buf++ = (unsigned char)((top << 4) + bot);
2412 }
2413
2414 return _PyBytesWriter_Finish(&writer, buf);
2415
2416 error:
2417 PyErr_Format(PyExc_ValueError,
2418 "non-hexadecimal number found in "
2419 "fromhex() arg at position %zd", invalid_char);
2420 _PyBytesWriter_Dealloc(&writer);
2421 return NULL;
2422 }
2423
2424 PyDoc_STRVAR(hex__doc__,
2425 "B.hex() -> string\n\
2426 \n\
2427 Create a string of hexadecimal numbers from a bytes object.\n\
2428 Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2429
2430 static PyObject *
bytes_hex(PyBytesObject * self)2431 bytes_hex(PyBytesObject *self)
2432 {
2433 char* argbuf = PyBytes_AS_STRING(self);
2434 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2435 return _Py_strhex(argbuf, arglen);
2436 }
2437
2438 static PyObject *
bytes_getnewargs(PyBytesObject * v)2439 bytes_getnewargs(PyBytesObject *v)
2440 {
2441 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2442 }
2443
2444
2445 static PyMethodDef
2446 bytes_methods[] = {
2447 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2448 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2449 _Py_capitalize__doc__},
2450 {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2451 _Py_center__doc__},
2452 {"count", (PyCFunction)bytes_count, METH_VARARGS,
2453 _Py_count__doc__},
2454 BYTES_DECODE_METHODDEF
2455 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2456 _Py_endswith__doc__},
2457 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
2458 _Py_expandtabs__doc__},
2459 {"find", (PyCFunction)bytes_find, METH_VARARGS,
2460 _Py_find__doc__},
2461 BYTES_FROMHEX_METHODDEF
2462 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2463 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2464 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2465 _Py_isalnum__doc__},
2466 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2467 _Py_isalpha__doc__},
2468 {"isascii", (PyCFunction)stringlib_isascii, METH_NOARGS,
2469 _Py_isascii__doc__},
2470 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2471 _Py_isdigit__doc__},
2472 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2473 _Py_islower__doc__},
2474 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2475 _Py_isspace__doc__},
2476 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2477 _Py_istitle__doc__},
2478 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2479 _Py_isupper__doc__},
2480 BYTES_JOIN_METHODDEF
2481 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
2482 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2483 BYTES_LSTRIP_METHODDEF
2484 BYTES_MAKETRANS_METHODDEF
2485 BYTES_PARTITION_METHODDEF
2486 BYTES_REPLACE_METHODDEF
2487 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2488 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2489 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
2490 BYTES_RPARTITION_METHODDEF
2491 BYTES_RSPLIT_METHODDEF
2492 BYTES_RSTRIP_METHODDEF
2493 BYTES_SPLIT_METHODDEF
2494 BYTES_SPLITLINES_METHODDEF
2495 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2496 _Py_startswith__doc__},
2497 BYTES_STRIP_METHODDEF
2498 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2499 _Py_swapcase__doc__},
2500 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2501 BYTES_TRANSLATE_METHODDEF
2502 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2503 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
2504 {NULL, NULL} /* sentinel */
2505 };
2506
2507 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2508 bytes_mod(PyObject *self, PyObject *arg)
2509 {
2510 if (!PyBytes_Check(self)) {
2511 Py_RETURN_NOTIMPLEMENTED;
2512 }
2513 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2514 arg, 0);
2515 }
2516
2517 static PyNumberMethods bytes_as_number = {
2518 0, /*nb_add*/
2519 0, /*nb_subtract*/
2520 0, /*nb_multiply*/
2521 bytes_mod, /*nb_remainder*/
2522 };
2523
2524 static PyObject *
2525 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2526
2527 static PyObject *
bytes_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2528 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2529 {
2530 PyObject *x = NULL;
2531 const char *encoding = NULL;
2532 const char *errors = NULL;
2533 PyObject *new = NULL;
2534 PyObject *func;
2535 Py_ssize_t size;
2536 static char *kwlist[] = {"source", "encoding", "errors", 0};
2537 _Py_IDENTIFIER(__bytes__);
2538
2539 if (type != &PyBytes_Type)
2540 return bytes_subtype_new(type, args, kwds);
2541 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2542 &encoding, &errors))
2543 return NULL;
2544 if (x == NULL) {
2545 if (encoding != NULL || errors != NULL) {
2546 PyErr_SetString(PyExc_TypeError,
2547 "encoding or errors without sequence "
2548 "argument");
2549 return NULL;
2550 }
2551 return PyBytes_FromStringAndSize(NULL, 0);
2552 }
2553
2554 if (encoding != NULL) {
2555 /* Encode via the codec registry */
2556 if (!PyUnicode_Check(x)) {
2557 PyErr_SetString(PyExc_TypeError,
2558 "encoding without a string argument");
2559 return NULL;
2560 }
2561 new = PyUnicode_AsEncodedString(x, encoding, errors);
2562 if (new == NULL)
2563 return NULL;
2564 assert(PyBytes_Check(new));
2565 return new;
2566 }
2567
2568 if (errors != NULL) {
2569 PyErr_SetString(PyExc_TypeError,
2570 PyUnicode_Check(x) ?
2571 "string argument without an encoding" :
2572 "errors without a string argument");
2573 return NULL;
2574 }
2575
2576 /* We'd like to call PyObject_Bytes here, but we need to check for an
2577 integer argument before deferring to PyBytes_FromObject, something
2578 PyObject_Bytes doesn't do. */
2579 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2580 if (func != NULL) {
2581 new = _PyObject_CallNoArg(func);
2582 Py_DECREF(func);
2583 if (new == NULL)
2584 return NULL;
2585 if (!PyBytes_Check(new)) {
2586 PyErr_Format(PyExc_TypeError,
2587 "__bytes__ returned non-bytes (type %.200s)",
2588 Py_TYPE(new)->tp_name);
2589 Py_DECREF(new);
2590 return NULL;
2591 }
2592 return new;
2593 }
2594 else if (PyErr_Occurred())
2595 return NULL;
2596
2597 if (PyUnicode_Check(x)) {
2598 PyErr_SetString(PyExc_TypeError,
2599 "string argument without an encoding");
2600 return NULL;
2601 }
2602 /* Is it an integer? */
2603 if (PyIndex_Check(x)) {
2604 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2605 if (size == -1 && PyErr_Occurred()) {
2606 if (!PyErr_ExceptionMatches(PyExc_TypeError))
2607 return NULL;
2608 PyErr_Clear(); /* fall through */
2609 }
2610 else {
2611 if (size < 0) {
2612 PyErr_SetString(PyExc_ValueError, "negative count");
2613 return NULL;
2614 }
2615 new = _PyBytes_FromSize(size, 1);
2616 if (new == NULL)
2617 return NULL;
2618 return new;
2619 }
2620 }
2621
2622 return PyBytes_FromObject(x);
2623 }
2624
2625 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2626 _PyBytes_FromBuffer(PyObject *x)
2627 {
2628 PyObject *new;
2629 Py_buffer view;
2630
2631 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2632 return NULL;
2633
2634 new = PyBytes_FromStringAndSize(NULL, view.len);
2635 if (!new)
2636 goto fail;
2637 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2638 &view, view.len, 'C') < 0)
2639 goto fail;
2640 PyBuffer_Release(&view);
2641 return new;
2642
2643 fail:
2644 Py_XDECREF(new);
2645 PyBuffer_Release(&view);
2646 return NULL;
2647 }
2648
2649 static PyObject*
_PyBytes_FromList(PyObject * x)2650 _PyBytes_FromList(PyObject *x)
2651 {
2652 Py_ssize_t i, size = PyList_GET_SIZE(x);
2653 Py_ssize_t value;
2654 char *str;
2655 PyObject *item;
2656 _PyBytesWriter writer;
2657
2658 _PyBytesWriter_Init(&writer);
2659 str = _PyBytesWriter_Alloc(&writer, size);
2660 if (str == NULL)
2661 return NULL;
2662 writer.overallocate = 1;
2663 size = writer.allocated;
2664
2665 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2666 item = PyList_GET_ITEM(x, i);
2667 Py_INCREF(item);
2668 value = PyNumber_AsSsize_t(item, NULL);
2669 Py_DECREF(item);
2670 if (value == -1 && PyErr_Occurred())
2671 goto error;
2672
2673 if (value < 0 || value >= 256) {
2674 PyErr_SetString(PyExc_ValueError,
2675 "bytes must be in range(0, 256)");
2676 goto error;
2677 }
2678
2679 if (i >= size) {
2680 str = _PyBytesWriter_Resize(&writer, str, size+1);
2681 if (str == NULL)
2682 return NULL;
2683 size = writer.allocated;
2684 }
2685 *str++ = (char) value;
2686 }
2687 return _PyBytesWriter_Finish(&writer, str);
2688
2689 error:
2690 _PyBytesWriter_Dealloc(&writer);
2691 return NULL;
2692 }
2693
2694 static PyObject*
_PyBytes_FromTuple(PyObject * x)2695 _PyBytes_FromTuple(PyObject *x)
2696 {
2697 PyObject *bytes;
2698 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2699 Py_ssize_t value;
2700 char *str;
2701 PyObject *item;
2702
2703 bytes = PyBytes_FromStringAndSize(NULL, size);
2704 if (bytes == NULL)
2705 return NULL;
2706 str = ((PyBytesObject *)bytes)->ob_sval;
2707
2708 for (i = 0; i < size; i++) {
2709 item = PyTuple_GET_ITEM(x, i);
2710 value = PyNumber_AsSsize_t(item, NULL);
2711 if (value == -1 && PyErr_Occurred())
2712 goto error;
2713
2714 if (value < 0 || value >= 256) {
2715 PyErr_SetString(PyExc_ValueError,
2716 "bytes must be in range(0, 256)");
2717 goto error;
2718 }
2719 *str++ = (char) value;
2720 }
2721 return bytes;
2722
2723 error:
2724 Py_DECREF(bytes);
2725 return NULL;
2726 }
2727
2728 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2729 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2730 {
2731 char *str;
2732 Py_ssize_t i, size;
2733 _PyBytesWriter writer;
2734
2735 /* For iterator version, create a string object and resize as needed */
2736 size = PyObject_LengthHint(x, 64);
2737 if (size == -1 && PyErr_Occurred())
2738 return NULL;
2739
2740 _PyBytesWriter_Init(&writer);
2741 str = _PyBytesWriter_Alloc(&writer, size);
2742 if (str == NULL)
2743 return NULL;
2744 writer.overallocate = 1;
2745 size = writer.allocated;
2746
2747 /* Run the iterator to exhaustion */
2748 for (i = 0; ; i++) {
2749 PyObject *item;
2750 Py_ssize_t value;
2751
2752 /* Get the next item */
2753 item = PyIter_Next(it);
2754 if (item == NULL) {
2755 if (PyErr_Occurred())
2756 goto error;
2757 break;
2758 }
2759
2760 /* Interpret it as an int (__index__) */
2761 value = PyNumber_AsSsize_t(item, NULL);
2762 Py_DECREF(item);
2763 if (value == -1 && PyErr_Occurred())
2764 goto error;
2765
2766 /* Range check */
2767 if (value < 0 || value >= 256) {
2768 PyErr_SetString(PyExc_ValueError,
2769 "bytes must be in range(0, 256)");
2770 goto error;
2771 }
2772
2773 /* Append the byte */
2774 if (i >= size) {
2775 str = _PyBytesWriter_Resize(&writer, str, size+1);
2776 if (str == NULL)
2777 return NULL;
2778 size = writer.allocated;
2779 }
2780 *str++ = (char) value;
2781 }
2782
2783 return _PyBytesWriter_Finish(&writer, str);
2784
2785 error:
2786 _PyBytesWriter_Dealloc(&writer);
2787 return NULL;
2788 }
2789
2790 PyObject *
PyBytes_FromObject(PyObject * x)2791 PyBytes_FromObject(PyObject *x)
2792 {
2793 PyObject *it, *result;
2794
2795 if (x == NULL) {
2796 PyErr_BadInternalCall();
2797 return NULL;
2798 }
2799
2800 if (PyBytes_CheckExact(x)) {
2801 Py_INCREF(x);
2802 return x;
2803 }
2804
2805 /* Use the modern buffer interface */
2806 if (PyObject_CheckBuffer(x))
2807 return _PyBytes_FromBuffer(x);
2808
2809 if (PyList_CheckExact(x))
2810 return _PyBytes_FromList(x);
2811
2812 if (PyTuple_CheckExact(x))
2813 return _PyBytes_FromTuple(x);
2814
2815 if (!PyUnicode_Check(x)) {
2816 it = PyObject_GetIter(x);
2817 if (it != NULL) {
2818 result = _PyBytes_FromIterator(it, x);
2819 Py_DECREF(it);
2820 return result;
2821 }
2822 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2823 return NULL;
2824 }
2825 }
2826
2827 PyErr_Format(PyExc_TypeError,
2828 "cannot convert '%.200s' object to bytes",
2829 x->ob_type->tp_name);
2830 return NULL;
2831 }
2832
2833 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2834 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2835 {
2836 PyObject *tmp, *pnew;
2837 Py_ssize_t n;
2838
2839 assert(PyType_IsSubtype(type, &PyBytes_Type));
2840 tmp = bytes_new(&PyBytes_Type, args, kwds);
2841 if (tmp == NULL)
2842 return NULL;
2843 assert(PyBytes_Check(tmp));
2844 n = PyBytes_GET_SIZE(tmp);
2845 pnew = type->tp_alloc(type, n);
2846 if (pnew != NULL) {
2847 memcpy(PyBytes_AS_STRING(pnew),
2848 PyBytes_AS_STRING(tmp), n+1);
2849 ((PyBytesObject *)pnew)->ob_shash =
2850 ((PyBytesObject *)tmp)->ob_shash;
2851 }
2852 Py_DECREF(tmp);
2853 return pnew;
2854 }
2855
2856 PyDoc_STRVAR(bytes_doc,
2857 "bytes(iterable_of_ints) -> bytes\n\
2858 bytes(string, encoding[, errors]) -> bytes\n\
2859 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2860 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2861 bytes() -> empty bytes object\n\
2862 \n\
2863 Construct an immutable array of bytes from:\n\
2864 - an iterable yielding integers in range(256)\n\
2865 - a text string encoded using the specified encoding\n\
2866 - any object implementing the buffer API.\n\
2867 - an integer");
2868
2869 static PyObject *bytes_iter(PyObject *seq);
2870
2871 PyTypeObject PyBytes_Type = {
2872 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2873 "bytes",
2874 PyBytesObject_SIZE,
2875 sizeof(char),
2876 bytes_dealloc, /* tp_dealloc */
2877 0, /* tp_print */
2878 0, /* tp_getattr */
2879 0, /* tp_setattr */
2880 0, /* tp_reserved */
2881 (reprfunc)bytes_repr, /* tp_repr */
2882 &bytes_as_number, /* tp_as_number */
2883 &bytes_as_sequence, /* tp_as_sequence */
2884 &bytes_as_mapping, /* tp_as_mapping */
2885 (hashfunc)bytes_hash, /* tp_hash */
2886 0, /* tp_call */
2887 bytes_str, /* tp_str */
2888 PyObject_GenericGetAttr, /* tp_getattro */
2889 0, /* tp_setattro */
2890 &bytes_as_buffer, /* tp_as_buffer */
2891 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2892 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2893 bytes_doc, /* tp_doc */
2894 0, /* tp_traverse */
2895 0, /* tp_clear */
2896 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2897 0, /* tp_weaklistoffset */
2898 bytes_iter, /* tp_iter */
2899 0, /* tp_iternext */
2900 bytes_methods, /* tp_methods */
2901 0, /* tp_members */
2902 0, /* tp_getset */
2903 &PyBaseObject_Type, /* tp_base */
2904 0, /* tp_dict */
2905 0, /* tp_descr_get */
2906 0, /* tp_descr_set */
2907 0, /* tp_dictoffset */
2908 0, /* tp_init */
2909 0, /* tp_alloc */
2910 bytes_new, /* tp_new */
2911 PyObject_Del, /* tp_free */
2912 };
2913
2914 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2915 PyBytes_Concat(PyObject **pv, PyObject *w)
2916 {
2917 assert(pv != NULL);
2918 if (*pv == NULL)
2919 return;
2920 if (w == NULL) {
2921 Py_CLEAR(*pv);
2922 return;
2923 }
2924
2925 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2926 /* Only one reference, so we can resize in place */
2927 Py_ssize_t oldsize;
2928 Py_buffer wb;
2929
2930 wb.len = -1;
2931 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2932 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2933 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2934 Py_CLEAR(*pv);
2935 return;
2936 }
2937
2938 oldsize = PyBytes_GET_SIZE(*pv);
2939 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2940 PyErr_NoMemory();
2941 goto error;
2942 }
2943 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2944 goto error;
2945
2946 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2947 PyBuffer_Release(&wb);
2948 return;
2949
2950 error:
2951 PyBuffer_Release(&wb);
2952 Py_CLEAR(*pv);
2953 return;
2954 }
2955
2956 else {
2957 /* Multiple references, need to create new object */
2958 PyObject *v;
2959 v = bytes_concat(*pv, w);
2960 Py_SETREF(*pv, v);
2961 }
2962 }
2963
2964 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)2965 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2966 {
2967 PyBytes_Concat(pv, w);
2968 Py_XDECREF(w);
2969 }
2970
2971
2972 /* The following function breaks the notion that bytes are immutable:
2973 it changes the size of a bytes object. We get away with this only if there
2974 is only one module referencing the object. You can also think of it
2975 as creating a new bytes object and destroying the old one, only
2976 more efficiently. In any case, don't use this if the bytes object may
2977 already be known to some other part of the code...
2978 Note that if there's not enough memory to resize the bytes object, the
2979 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2980 memory" exception is set, and -1 is returned. Else (on success) 0 is
2981 returned, and the value in *pv may or may not be the same as on input.
2982 As always, an extra byte is allocated for a trailing \0 byte (newsize
2983 does *not* include that), and a trailing \0 byte is stored.
2984 */
2985
2986 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)2987 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2988 {
2989 PyObject *v;
2990 PyBytesObject *sv;
2991 v = *pv;
2992 if (!PyBytes_Check(v) || newsize < 0) {
2993 goto error;
2994 }
2995 if (Py_SIZE(v) == newsize) {
2996 /* return early if newsize equals to v->ob_size */
2997 return 0;
2998 }
2999 if (Py_SIZE(v) == 0) {
3000 if (newsize == 0) {
3001 return 0;
3002 }
3003 *pv = _PyBytes_FromSize(newsize, 0);
3004 Py_DECREF(v);
3005 return (*pv == NULL) ? -1 : 0;
3006 }
3007 if (Py_REFCNT(v) != 1) {
3008 goto error;
3009 }
3010 if (newsize == 0) {
3011 *pv = _PyBytes_FromSize(0, 0);
3012 Py_DECREF(v);
3013 return (*pv == NULL) ? -1 : 0;
3014 }
3015 /* XXX UNREF/NEWREF interface should be more symmetrical */
3016 _Py_DEC_REFTOTAL;
3017 _Py_ForgetReference(v);
3018 *pv = (PyObject *)
3019 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
3020 if (*pv == NULL) {
3021 PyObject_Del(v);
3022 PyErr_NoMemory();
3023 return -1;
3024 }
3025 _Py_NewReference(*pv);
3026 sv = (PyBytesObject *) *pv;
3027 Py_SIZE(sv) = newsize;
3028 sv->ob_sval[newsize] = '\0';
3029 sv->ob_shash = -1; /* invalidate cached hash value */
3030 return 0;
3031 error:
3032 *pv = 0;
3033 Py_DECREF(v);
3034 PyErr_BadInternalCall();
3035 return -1;
3036 }
3037
3038 void
PyBytes_Fini(void)3039 PyBytes_Fini(void)
3040 {
3041 int i;
3042 for (i = 0; i < UCHAR_MAX + 1; i++)
3043 Py_CLEAR(characters[i]);
3044 Py_CLEAR(nullstring);
3045 }
3046
3047 /*********************** Bytes Iterator ****************************/
3048
3049 typedef struct {
3050 PyObject_HEAD
3051 Py_ssize_t it_index;
3052 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3053 } striterobject;
3054
3055 static void
striter_dealloc(striterobject * it)3056 striter_dealloc(striterobject *it)
3057 {
3058 _PyObject_GC_UNTRACK(it);
3059 Py_XDECREF(it->it_seq);
3060 PyObject_GC_Del(it);
3061 }
3062
3063 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3064 striter_traverse(striterobject *it, visitproc visit, void *arg)
3065 {
3066 Py_VISIT(it->it_seq);
3067 return 0;
3068 }
3069
3070 static PyObject *
striter_next(striterobject * it)3071 striter_next(striterobject *it)
3072 {
3073 PyBytesObject *seq;
3074 PyObject *item;
3075
3076 assert(it != NULL);
3077 seq = it->it_seq;
3078 if (seq == NULL)
3079 return NULL;
3080 assert(PyBytes_Check(seq));
3081
3082 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3083 item = PyLong_FromLong(
3084 (unsigned char)seq->ob_sval[it->it_index]);
3085 if (item != NULL)
3086 ++it->it_index;
3087 return item;
3088 }
3089
3090 it->it_seq = NULL;
3091 Py_DECREF(seq);
3092 return NULL;
3093 }
3094
3095 static PyObject *
striter_len(striterobject * it)3096 striter_len(striterobject *it)
3097 {
3098 Py_ssize_t len = 0;
3099 if (it->it_seq)
3100 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3101 return PyLong_FromSsize_t(len);
3102 }
3103
3104 PyDoc_STRVAR(length_hint_doc,
3105 "Private method returning an estimate of len(list(it)).");
3106
3107 static PyObject *
striter_reduce(striterobject * it)3108 striter_reduce(striterobject *it)
3109 {
3110 if (it->it_seq != NULL) {
3111 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
3112 it->it_seq, it->it_index);
3113 } else {
3114 return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
3115 }
3116 }
3117
3118 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3119
3120 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3121 striter_setstate(striterobject *it, PyObject *state)
3122 {
3123 Py_ssize_t index = PyLong_AsSsize_t(state);
3124 if (index == -1 && PyErr_Occurred())
3125 return NULL;
3126 if (it->it_seq != NULL) {
3127 if (index < 0)
3128 index = 0;
3129 else if (index > PyBytes_GET_SIZE(it->it_seq))
3130 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3131 it->it_index = index;
3132 }
3133 Py_RETURN_NONE;
3134 }
3135
3136 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3137
3138 static PyMethodDef striter_methods[] = {
3139 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3140 length_hint_doc},
3141 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3142 reduce_doc},
3143 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3144 setstate_doc},
3145 {NULL, NULL} /* sentinel */
3146 };
3147
3148 PyTypeObject PyBytesIter_Type = {
3149 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3150 "bytes_iterator", /* tp_name */
3151 sizeof(striterobject), /* tp_basicsize */
3152 0, /* tp_itemsize */
3153 /* methods */
3154 (destructor)striter_dealloc, /* tp_dealloc */
3155 0, /* tp_print */
3156 0, /* tp_getattr */
3157 0, /* tp_setattr */
3158 0, /* tp_reserved */
3159 0, /* tp_repr */
3160 0, /* tp_as_number */
3161 0, /* tp_as_sequence */
3162 0, /* tp_as_mapping */
3163 0, /* tp_hash */
3164 0, /* tp_call */
3165 0, /* tp_str */
3166 PyObject_GenericGetAttr, /* tp_getattro */
3167 0, /* tp_setattro */
3168 0, /* tp_as_buffer */
3169 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3170 0, /* tp_doc */
3171 (traverseproc)striter_traverse, /* tp_traverse */
3172 0, /* tp_clear */
3173 0, /* tp_richcompare */
3174 0, /* tp_weaklistoffset */
3175 PyObject_SelfIter, /* tp_iter */
3176 (iternextfunc)striter_next, /* tp_iternext */
3177 striter_methods, /* tp_methods */
3178 0,
3179 };
3180
3181 static PyObject *
bytes_iter(PyObject * seq)3182 bytes_iter(PyObject *seq)
3183 {
3184 striterobject *it;
3185
3186 if (!PyBytes_Check(seq)) {
3187 PyErr_BadInternalCall();
3188 return NULL;
3189 }
3190 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3191 if (it == NULL)
3192 return NULL;
3193 it->it_index = 0;
3194 Py_INCREF(seq);
3195 it->it_seq = (PyBytesObject *)seq;
3196 _PyObject_GC_TRACK(it);
3197 return (PyObject *)it;
3198 }
3199
3200
3201 /* _PyBytesWriter API */
3202
3203 #ifdef MS_WINDOWS
3204 /* On Windows, overallocate by 50% is the best factor */
3205 # define OVERALLOCATE_FACTOR 2
3206 #else
3207 /* On Linux, overallocate by 25% is the best factor */
3208 # define OVERALLOCATE_FACTOR 4
3209 #endif
3210
3211 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3212 _PyBytesWriter_Init(_PyBytesWriter *writer)
3213 {
3214 /* Set all attributes before small_buffer to 0 */
3215 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3216 #ifdef Py_DEBUG
3217 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
3218 #endif
3219 }
3220
3221 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3222 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3223 {
3224 Py_CLEAR(writer->buffer);
3225 }
3226
3227 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3228 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3229 {
3230 if (writer->use_small_buffer) {
3231 assert(writer->buffer == NULL);
3232 return writer->small_buffer;
3233 }
3234 else if (writer->use_bytearray) {
3235 assert(writer->buffer != NULL);
3236 return PyByteArray_AS_STRING(writer->buffer);
3237 }
3238 else {
3239 assert(writer->buffer != NULL);
3240 return PyBytes_AS_STRING(writer->buffer);
3241 }
3242 }
3243
3244 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3245 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3246 {
3247 char *start = _PyBytesWriter_AsString(writer);
3248 assert(str != NULL);
3249 assert(str >= start);
3250 assert(str - start <= writer->allocated);
3251 return str - start;
3252 }
3253
3254 Py_LOCAL_INLINE(void)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3255 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3256 {
3257 #ifdef Py_DEBUG
3258 char *start, *end;
3259
3260 if (writer->use_small_buffer) {
3261 assert(writer->buffer == NULL);
3262 }
3263 else {
3264 assert(writer->buffer != NULL);
3265 if (writer->use_bytearray)
3266 assert(PyByteArray_CheckExact(writer->buffer));
3267 else
3268 assert(PyBytes_CheckExact(writer->buffer));
3269 assert(Py_REFCNT(writer->buffer) == 1);
3270 }
3271
3272 if (writer->use_bytearray) {
3273 /* bytearray has its own overallocation algorithm,
3274 writer overallocation must be disabled */
3275 assert(!writer->overallocate);
3276 }
3277
3278 assert(0 <= writer->allocated);
3279 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3280 /* the last byte must always be null */
3281 start = _PyBytesWriter_AsString(writer);
3282 assert(start[writer->allocated] == 0);
3283
3284 end = start + writer->allocated;
3285 assert(str != NULL);
3286 assert(start <= str && str <= end);
3287 #endif
3288 }
3289
3290 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3291 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3292 {
3293 Py_ssize_t allocated, pos;
3294
3295 _PyBytesWriter_CheckConsistency(writer, str);
3296 assert(writer->allocated < size);
3297
3298 allocated = size;
3299 if (writer->overallocate
3300 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3301 /* overallocate to limit the number of realloc() */
3302 allocated += allocated / OVERALLOCATE_FACTOR;
3303 }
3304
3305 pos = _PyBytesWriter_GetSize(writer, str);
3306 if (!writer->use_small_buffer) {
3307 if (writer->use_bytearray) {
3308 if (PyByteArray_Resize(writer->buffer, allocated))
3309 goto error;
3310 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3311 but we cannot use ob_alloc because bytes may need to be moved
3312 to use the whole buffer. bytearray uses an internal optimization
3313 to avoid moving or copying bytes when bytes are removed at the
3314 beginning (ex: del bytearray[:1]). */
3315 }
3316 else {
3317 if (_PyBytes_Resize(&writer->buffer, allocated))
3318 goto error;
3319 }
3320 }
3321 else {
3322 /* convert from stack buffer to bytes object buffer */
3323 assert(writer->buffer == NULL);
3324
3325 if (writer->use_bytearray)
3326 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3327 else
3328 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3329 if (writer->buffer == NULL)
3330 goto error;
3331
3332 if (pos != 0) {
3333 char *dest;
3334 if (writer->use_bytearray)
3335 dest = PyByteArray_AS_STRING(writer->buffer);
3336 else
3337 dest = PyBytes_AS_STRING(writer->buffer);
3338 memcpy(dest,
3339 writer->small_buffer,
3340 pos);
3341 }
3342
3343 writer->use_small_buffer = 0;
3344 #ifdef Py_DEBUG
3345 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
3346 #endif
3347 }
3348 writer->allocated = allocated;
3349
3350 str = _PyBytesWriter_AsString(writer) + pos;
3351 _PyBytesWriter_CheckConsistency(writer, str);
3352 return str;
3353
3354 error:
3355 _PyBytesWriter_Dealloc(writer);
3356 return NULL;
3357 }
3358
3359 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3360 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3361 {
3362 Py_ssize_t new_min_size;
3363
3364 _PyBytesWriter_CheckConsistency(writer, str);
3365 assert(size >= 0);
3366
3367 if (size == 0) {
3368 /* nothing to do */
3369 return str;
3370 }
3371
3372 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3373 PyErr_NoMemory();
3374 _PyBytesWriter_Dealloc(writer);
3375 return NULL;
3376 }
3377 new_min_size = writer->min_size + size;
3378
3379 if (new_min_size > writer->allocated)
3380 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3381
3382 writer->min_size = new_min_size;
3383 return str;
3384 }
3385
3386 /* Allocate the buffer to write size bytes.
3387 Return the pointer to the beginning of buffer data.
3388 Raise an exception and return NULL on error. */
3389 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3390 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3391 {
3392 /* ensure that _PyBytesWriter_Alloc() is only called once */
3393 assert(writer->min_size == 0 && writer->buffer == NULL);
3394 assert(size >= 0);
3395
3396 writer->use_small_buffer = 1;
3397 #ifdef Py_DEBUG
3398 writer->allocated = sizeof(writer->small_buffer) - 1;
3399 /* In debug mode, don't use the full small buffer because it is less
3400 efficient than bytes and bytearray objects to detect buffer underflow
3401 and buffer overflow. Use 10 bytes of the small buffer to test also
3402 code using the smaller buffer in debug mode.
3403
3404 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3405 in debug mode to also be able to detect stack overflow when running
3406 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3407 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3408 stack overflow. */
3409 writer->allocated = Py_MIN(writer->allocated, 10);
3410 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3411 to detect buffer overflow */
3412 writer->small_buffer[writer->allocated] = 0;
3413 #else
3414 writer->allocated = sizeof(writer->small_buffer);
3415 #endif
3416 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3417 }
3418
3419 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3420 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3421 {
3422 Py_ssize_t size;
3423 PyObject *result;
3424
3425 _PyBytesWriter_CheckConsistency(writer, str);
3426
3427 size = _PyBytesWriter_GetSize(writer, str);
3428 if (size == 0 && !writer->use_bytearray) {
3429 Py_CLEAR(writer->buffer);
3430 /* Get the empty byte string singleton */
3431 result = PyBytes_FromStringAndSize(NULL, 0);
3432 }
3433 else if (writer->use_small_buffer) {
3434 if (writer->use_bytearray) {
3435 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3436 }
3437 else {
3438 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3439 }
3440 }
3441 else {
3442 result = writer->buffer;
3443 writer->buffer = NULL;
3444
3445 if (size != writer->allocated) {
3446 if (writer->use_bytearray) {
3447 if (PyByteArray_Resize(result, size)) {
3448 Py_DECREF(result);
3449 return NULL;
3450 }
3451 }
3452 else {
3453 if (_PyBytes_Resize(&result, size)) {
3454 assert(result == NULL);
3455 return NULL;
3456 }
3457 }
3458 }
3459 }
3460 return result;
3461 }
3462
3463 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3464 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3465 const void *bytes, Py_ssize_t size)
3466 {
3467 char *str = (char *)ptr;
3468
3469 str = _PyBytesWriter_Prepare(writer, str, size);
3470 if (str == NULL)
3471 return NULL;
3472
3473 memcpy(str, bytes, size);
3474 str += size;
3475
3476 return str;
3477 }
3478