1 /* String (str/bytes) object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include <ctype.h>
7 #include <stddef.h>
8 
9 #ifdef COUNT_ALLOCS
10 Py_ssize_t null_strings, one_strings;
11 #endif
12 
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
15 
16 /* This dictionary holds all interned strings.  Note that references to
17    strings in this dictionary are *not* counted in the string's ob_refcnt.
18    When the interned string reaches a refcnt of 0 the string deallocation
19    function will delete the reference from this dictionary.
20 
21    Another way to look at this is that to say that the actual reference
22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
23 */
24 static PyObject *interned;
25 
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27    for a string of length n should request PyStringObject_SIZE + n bytes.
28 
29    Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30    3 bytes per string allocation on a typical system.
31 */
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33 
34 /*
35    For PyString_FromString(), the parameter `str' points to a null-terminated
36    string containing exactly `size' bytes.
37 
38    For PyString_FromStringAndSize(), the parameter the parameter `str' is
39    either NULL or else points to a string containing at least `size' bytes.
40    For PyString_FromStringAndSize(), the string in the `str' parameter does
41    not have to be null-terminated.  (Therefore it is safe to construct a
42    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44    bytes (setting the last byte to the null terminating character) and you can
45    fill in the data yourself.  If `str' is non-NULL then the resulting
46    PyString object must be treated as immutable and you must not fill in nor
47    alter the data yourself, since the strings may be shared.
48 
49    The PyObject member `op->ob_size', which denotes the number of "extra
50    items" in a variable-size object, will contain the number of bytes
51    allocated for string data, not counting the null terminating character.
52    It is therefore equal to the `size' parameter (for
53    PyString_FromStringAndSize()) or the length of the string in the `str'
54    parameter (for PyString_FromString()).
55 */
56 PyObject *
PyString_FromStringAndSize(const char * str,Py_ssize_t size)57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
58 {
59     register PyStringObject *op;
60     if (size < 0) {
61         PyErr_SetString(PyExc_SystemError,
62             "Negative size passed to PyString_FromStringAndSize");
63         return NULL;
64     }
65     if (size == 0 && (op = nullstring) != NULL) {
66 #ifdef COUNT_ALLOCS
67         null_strings++;
68 #endif
69         Py_INCREF(op);
70         return (PyObject *)op;
71     }
72     if (size == 1 && str != NULL &&
73         (op = characters[*str & UCHAR_MAX]) != NULL)
74     {
75 #ifdef COUNT_ALLOCS
76         one_strings++;
77 #endif
78         Py_INCREF(op);
79         return (PyObject *)op;
80     }
81 
82     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83         PyErr_SetString(PyExc_OverflowError, "string is too large");
84         return NULL;
85     }
86 
87     /* Inline PyObject_NewVar */
88     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89     if (op == NULL)
90         return PyErr_NoMemory();
91     PyObject_INIT_VAR(op, &PyString_Type, size);
92     op->ob_shash = -1;
93     op->ob_sstate = SSTATE_NOT_INTERNED;
94     if (str != NULL)
95         Py_MEMCPY(op->ob_sval, str, size);
96     op->ob_sval[size] = '\0';
97     /* share short strings */
98     if (size == 0) {
99         PyObject *t = (PyObject *)op;
100         PyString_InternInPlace(&t);
101         op = (PyStringObject *)t;
102         nullstring = op;
103         Py_INCREF(op);
104     } else if (size == 1 && str != NULL) {
105         PyObject *t = (PyObject *)op;
106         PyString_InternInPlace(&t);
107         op = (PyStringObject *)t;
108         characters[*str & UCHAR_MAX] = op;
109         Py_INCREF(op);
110     }
111     return (PyObject *) op;
112 }
113 
114 PyObject *
PyString_FromString(const char * str)115 PyString_FromString(const char *str)
116 {
117     register size_t size;
118     register PyStringObject *op;
119 
120     assert(str != NULL);
121     size = strlen(str);
122     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123         PyErr_SetString(PyExc_OverflowError,
124             "string is too long for a Python string");
125         return NULL;
126     }
127     if (size == 0 && (op = nullstring) != NULL) {
128 #ifdef COUNT_ALLOCS
129         null_strings++;
130 #endif
131         Py_INCREF(op);
132         return (PyObject *)op;
133     }
134     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135 #ifdef COUNT_ALLOCS
136         one_strings++;
137 #endif
138         Py_INCREF(op);
139         return (PyObject *)op;
140     }
141 
142     /* Inline PyObject_NewVar */
143     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144     if (op == NULL)
145         return PyErr_NoMemory();
146     PyObject_INIT_VAR(op, &PyString_Type, size);
147     op->ob_shash = -1;
148     op->ob_sstate = SSTATE_NOT_INTERNED;
149     Py_MEMCPY(op->ob_sval, str, size+1);
150     /* share short strings */
151     if (size == 0) {
152         PyObject *t = (PyObject *)op;
153         PyString_InternInPlace(&t);
154         op = (PyStringObject *)t;
155         nullstring = op;
156         Py_INCREF(op);
157     } else if (size == 1) {
158         PyObject *t = (PyObject *)op;
159         PyString_InternInPlace(&t);
160         op = (PyStringObject *)t;
161         characters[*str & UCHAR_MAX] = op;
162         Py_INCREF(op);
163     }
164     return (PyObject *) op;
165 }
166 
167 PyObject *
PyString_FromFormatV(const char * format,va_list vargs)168 PyString_FromFormatV(const char *format, va_list vargs)
169 {
170     va_list count;
171     Py_ssize_t n = 0;
172     const char* f;
173     char *s;
174     PyObject* string;
175 
176 #ifdef VA_LIST_IS_ARRAY
177     Py_MEMCPY(count, vargs, sizeof(va_list));
178 #else
179 #ifdef  __va_copy
180     __va_copy(count, vargs);
181 #else
182     count = vargs;
183 #endif
184 #endif
185     /* step 1: figure out how large a buffer we need */
186     for (f = format; *f; f++) {
187         if (*f == '%') {
188 #ifdef HAVE_LONG_LONG
189             int longlongflag = 0;
190 #endif
191             const char* p = f;
192             while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193                 ;
194 
195             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196              * they don't affect the amount of space we reserve.
197              */
198             if (*f == 'l') {
199                 if (f[1] == 'd' || f[1] == 'u') {
200                     ++f;
201                 }
202 #ifdef HAVE_LONG_LONG
203                 else if (f[1] == 'l' &&
204                          (f[2] == 'd' || f[2] == 'u')) {
205                     longlongflag = 1;
206                     f += 2;
207                 }
208 #endif
209             }
210             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211                 ++f;
212             }
213 
214             switch (*f) {
215             case 'c':
216                 (void)va_arg(count, int);
217                 /* fall through... */
218             case '%':
219                 n++;
220                 break;
221             case 'd': case 'u': case 'i': case 'x':
222                 (void) va_arg(count, int);
223 #ifdef HAVE_LONG_LONG
224                 /* Need at most
225                    ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226                    plus 1 for the sign.  53/22 is an upper
227                    bound for log10(256). */
228                 if (longlongflag)
229                     n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230                 else
231 #endif
232                     /* 20 bytes is enough to hold a 64-bit
233                        integer.  Decimal takes the most
234                        space.  This isn't enough for
235                        octal. */
236                     n += 20;
237 
238                 break;
239             case 's':
240                 s = va_arg(count, char*);
241                 n += strlen(s);
242                 break;
243             case 'p':
244                 (void) va_arg(count, int);
245                 /* maximum 64-bit pointer representation:
246                  * 0xffffffffffffffff
247                  * so 19 characters is enough.
248                  * XXX I count 18 -- what's the extra for?
249                  */
250                 n += 19;
251                 break;
252             default:
253                 /* if we stumble upon an unknown
254                    formatting code, copy the rest of
255                    the format string to the output
256                    string. (we cannot just skip the
257                    code, since there's no way to know
258                    what's in the argument list) */
259                 n += strlen(p);
260                 goto expand;
261             }
262         } else
263             n++;
264     }
265  expand:
266     /* step 2: fill the buffer */
267     /* Since we've analyzed how much space we need for the worst case,
268        use sprintf directly instead of the slower PyOS_snprintf. */
269     string = PyString_FromStringAndSize(NULL, n);
270     if (!string)
271         return NULL;
272 
273     s = PyString_AsString(string);
274 
275     for (f = format; *f; f++) {
276         if (*f == '%') {
277             const char* p = f++;
278             Py_ssize_t i;
279             int longflag = 0;
280 #ifdef HAVE_LONG_LONG
281             int longlongflag = 0;
282 #endif
283             int size_tflag = 0;
284             /* parse the width.precision part (we're only
285                interested in the precision value, if any) */
286             n = 0;
287             while (isdigit(Py_CHARMASK(*f)))
288                 n = (n*10) + *f++ - '0';
289             if (*f == '.') {
290                 f++;
291                 n = 0;
292                 while (isdigit(Py_CHARMASK(*f)))
293                     n = (n*10) + *f++ - '0';
294             }
295             while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296                 f++;
297             /* Handle %ld, %lu, %lld and %llu. */
298             if (*f == 'l') {
299                 if (f[1] == 'd' || f[1] == 'u') {
300                     longflag = 1;
301                     ++f;
302                 }
303 #ifdef HAVE_LONG_LONG
304                 else if (f[1] == 'l' &&
305                          (f[2] == 'd' || f[2] == 'u')) {
306                     longlongflag = 1;
307                     f += 2;
308                 }
309 #endif
310             }
311             /* handle the size_t flag. */
312             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313                 size_tflag = 1;
314                 ++f;
315             }
316 
317             switch (*f) {
318             case 'c':
319                 *s++ = va_arg(vargs, int);
320                 break;
321             case 'd':
322                 if (longflag)
323                     sprintf(s, "%ld", va_arg(vargs, long));
324 #ifdef HAVE_LONG_LONG
325                 else if (longlongflag)
326                     sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327                         va_arg(vargs, PY_LONG_LONG));
328 #endif
329                 else if (size_tflag)
330                     sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331                         va_arg(vargs, Py_ssize_t));
332                 else
333                     sprintf(s, "%d", va_arg(vargs, int));
334                 s += strlen(s);
335                 break;
336             case 'u':
337                 if (longflag)
338                     sprintf(s, "%lu",
339                         va_arg(vargs, unsigned long));
340 #ifdef HAVE_LONG_LONG
341                 else if (longlongflag)
342                     sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343                         va_arg(vargs, PY_LONG_LONG));
344 #endif
345                 else if (size_tflag)
346                     sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347                         va_arg(vargs, size_t));
348                 else
349                     sprintf(s, "%u",
350                         va_arg(vargs, unsigned int));
351                 s += strlen(s);
352                 break;
353             case 'i':
354                 sprintf(s, "%i", va_arg(vargs, int));
355                 s += strlen(s);
356                 break;
357             case 'x':
358                 sprintf(s, "%x", va_arg(vargs, int));
359                 s += strlen(s);
360                 break;
361             case 's':
362                 p = va_arg(vargs, char*);
363                 i = strlen(p);
364                 if (n > 0 && i > n)
365                     i = n;
366                 Py_MEMCPY(s, p, i);
367                 s += i;
368                 break;
369             case 'p':
370                 sprintf(s, "%p", va_arg(vargs, void*));
371                 /* %p is ill-defined:  ensure leading 0x. */
372                 if (s[1] == 'X')
373                     s[1] = 'x';
374                 else if (s[1] != 'x') {
375                     memmove(s+2, s, strlen(s)+1);
376                     s[0] = '0';
377                     s[1] = 'x';
378                 }
379                 s += strlen(s);
380                 break;
381             case '%':
382                 *s++ = '%';
383                 break;
384             default:
385                 strcpy(s, p);
386                 s += strlen(s);
387                 goto end;
388             }
389         } else
390             *s++ = *f;
391     }
392 
393  end:
394     if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395         return NULL;
396     return string;
397 }
398 
399 PyObject *
PyString_FromFormat(const char * format,...)400 PyString_FromFormat(const char *format, ...)
401 {
402     PyObject* ret;
403     va_list vargs;
404 
405 #ifdef HAVE_STDARG_PROTOTYPES
406     va_start(vargs, format);
407 #else
408     va_start(vargs);
409 #endif
410     ret = PyString_FromFormatV(format, vargs);
411     va_end(vargs);
412     return ret;
413 }
414 
415 
PyString_Decode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)416 PyObject *PyString_Decode(const char *s,
417                           Py_ssize_t size,
418                           const char *encoding,
419                           const char *errors)
420 {
421     PyObject *v, *str;
422 
423     str = PyString_FromStringAndSize(s, size);
424     if (str == NULL)
425         return NULL;
426     v = PyString_AsDecodedString(str, encoding, errors);
427     Py_DECREF(str);
428     return v;
429 }
430 
PyString_AsDecodedObject(PyObject * str,const char * encoding,const char * errors)431 PyObject *PyString_AsDecodedObject(PyObject *str,
432                                    const char *encoding,
433                                    const char *errors)
434 {
435     PyObject *v;
436 
437     if (!PyString_Check(str)) {
438         PyErr_BadArgument();
439         goto onError;
440     }
441 
442     if (encoding == NULL) {
443 #ifdef Py_USING_UNICODE
444         encoding = PyUnicode_GetDefaultEncoding();
445 #else
446         PyErr_SetString(PyExc_ValueError, "no encoding specified");
447         goto onError;
448 #endif
449     }
450 
451     /* Decode via the codec registry */
452     v = PyCodec_Decode(str, encoding, errors);
453     if (v == NULL)
454         goto onError;
455 
456     return v;
457 
458  onError:
459     return NULL;
460 }
461 
PyString_AsDecodedString(PyObject * str,const char * encoding,const char * errors)462 PyObject *PyString_AsDecodedString(PyObject *str,
463                                    const char *encoding,
464                                    const char *errors)
465 {
466     PyObject *v;
467 
468     v = PyString_AsDecodedObject(str, encoding, errors);
469     if (v == NULL)
470         goto onError;
471 
472 #ifdef Py_USING_UNICODE
473     /* Convert Unicode to a string using the default encoding */
474     if (PyUnicode_Check(v)) {
475         PyObject *temp = v;
476         v = PyUnicode_AsEncodedString(v, NULL, NULL);
477         Py_DECREF(temp);
478         if (v == NULL)
479             goto onError;
480     }
481 #endif
482     if (!PyString_Check(v)) {
483         PyErr_Format(PyExc_TypeError,
484                      "decoder did not return a string object (type=%.400s)",
485                      Py_TYPE(v)->tp_name);
486         Py_DECREF(v);
487         goto onError;
488     }
489 
490     return v;
491 
492  onError:
493     return NULL;
494 }
495 
PyString_Encode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)496 PyObject *PyString_Encode(const char *s,
497                           Py_ssize_t size,
498                           const char *encoding,
499                           const char *errors)
500 {
501     PyObject *v, *str;
502 
503     str = PyString_FromStringAndSize(s, size);
504     if (str == NULL)
505         return NULL;
506     v = PyString_AsEncodedString(str, encoding, errors);
507     Py_DECREF(str);
508     return v;
509 }
510 
PyString_AsEncodedObject(PyObject * str,const char * encoding,const char * errors)511 PyObject *PyString_AsEncodedObject(PyObject *str,
512                                    const char *encoding,
513                                    const char *errors)
514 {
515     PyObject *v;
516 
517     if (!PyString_Check(str)) {
518         PyErr_BadArgument();
519         goto onError;
520     }
521 
522     if (encoding == NULL) {
523 #ifdef Py_USING_UNICODE
524         encoding = PyUnicode_GetDefaultEncoding();
525 #else
526         PyErr_SetString(PyExc_ValueError, "no encoding specified");
527         goto onError;
528 #endif
529     }
530 
531     /* Encode via the codec registry */
532     v = PyCodec_Encode(str, encoding, errors);
533     if (v == NULL)
534         goto onError;
535 
536     return v;
537 
538  onError:
539     return NULL;
540 }
541 
PyString_AsEncodedString(PyObject * str,const char * encoding,const char * errors)542 PyObject *PyString_AsEncodedString(PyObject *str,
543                                    const char *encoding,
544                                    const char *errors)
545 {
546     PyObject *v;
547 
548     v = PyString_AsEncodedObject(str, encoding, errors);
549     if (v == NULL)
550         goto onError;
551 
552 #ifdef Py_USING_UNICODE
553     /* Convert Unicode to a string using the default encoding */
554     if (PyUnicode_Check(v)) {
555         PyObject *temp = v;
556         v = PyUnicode_AsEncodedString(v, NULL, NULL);
557         Py_DECREF(temp);
558         if (v == NULL)
559             goto onError;
560     }
561 #endif
562     if (!PyString_Check(v)) {
563         PyErr_Format(PyExc_TypeError,
564                      "encoder did not return a string object (type=%.400s)",
565                      Py_TYPE(v)->tp_name);
566         Py_DECREF(v);
567         goto onError;
568     }
569 
570     return v;
571 
572  onError:
573     return NULL;
574 }
575 
576 static void
string_dealloc(PyObject * op)577 string_dealloc(PyObject *op)
578 {
579     switch (PyString_CHECK_INTERNED(op)) {
580         case SSTATE_NOT_INTERNED:
581             break;
582 
583         case SSTATE_INTERNED_MORTAL:
584             /* revive dead object temporarily for DelItem */
585             Py_REFCNT(op) = 3;
586             if (PyDict_DelItem(interned, op) != 0)
587                 Py_FatalError(
588                     "deletion of interned string failed");
589             break;
590 
591         case SSTATE_INTERNED_IMMORTAL:
592             Py_FatalError("Immortal interned string died.");
593 
594         default:
595             Py_FatalError("Inconsistent interned string state.");
596     }
597     Py_TYPE(op)->tp_free(op);
598 }
599 
600 /* Unescape a backslash-escaped string. If unicode is non-zero,
601    the string is a u-literal. If recode_encoding is non-zero,
602    the string is UTF-8 encoded and should be re-encoded in the
603    specified encoding.  */
604 
PyString_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)605 PyObject *PyString_DecodeEscape(const char *s,
606                                 Py_ssize_t len,
607                                 const char *errors,
608                                 Py_ssize_t unicode,
609                                 const char *recode_encoding)
610 {
611     int c;
612     char *p, *buf;
613     const char *end;
614     PyObject *v;
615     Py_ssize_t newlen = recode_encoding ? 4*len:len;
616     v = PyString_FromStringAndSize((char *)NULL, newlen);
617     if (v == NULL)
618         return NULL;
619     p = buf = PyString_AsString(v);
620     end = s + len;
621     while (s < end) {
622         if (*s != '\\') {
623           non_esc:
624 #ifdef Py_USING_UNICODE
625             if (recode_encoding && (*s & 0x80)) {
626                 PyObject *u, *w;
627                 char *r;
628                 const char* t;
629                 Py_ssize_t rn;
630                 t = s;
631                 /* Decode non-ASCII bytes as UTF-8. */
632                 while (t < end && (*t & 0x80)) t++;
633                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634                 if(!u) goto failed;
635 
636                 /* Recode them in target encoding. */
637                 w = PyUnicode_AsEncodedString(
638                     u, recode_encoding, errors);
639                 Py_DECREF(u);
640                 if (!w)                 goto failed;
641 
642                 /* Append bytes to output buffer. */
643                 assert(PyString_Check(w));
644                 r = PyString_AS_STRING(w);
645                 rn = PyString_GET_SIZE(w);
646                 Py_MEMCPY(p, r, rn);
647                 p += rn;
648                 Py_DECREF(w);
649                 s = t;
650             } else {
651                 *p++ = *s++;
652             }
653 #else
654             *p++ = *s++;
655 #endif
656             continue;
657         }
658         s++;
659         if (s==end) {
660             PyErr_SetString(PyExc_ValueError,
661                             "Trailing \\ in string");
662             goto failed;
663         }
664         switch (*s++) {
665         /* XXX This assumes ASCII! */
666         case '\n': break;
667         case '\\': *p++ = '\\'; break;
668         case '\'': *p++ = '\''; break;
669         case '\"': *p++ = '\"'; break;
670         case 'b': *p++ = '\b'; break;
671         case 'f': *p++ = '\014'; break; /* FF */
672         case 't': *p++ = '\t'; break;
673         case 'n': *p++ = '\n'; break;
674         case 'r': *p++ = '\r'; break;
675         case 'v': *p++ = '\013'; break; /* VT */
676         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677         case '0': case '1': case '2': case '3':
678         case '4': case '5': case '6': case '7':
679             c = s[-1] - '0';
680             if (s < end && '0' <= *s && *s <= '7') {
681                 c = (c<<3) + *s++ - '0';
682                 if (s < end && '0' <= *s && *s <= '7')
683                     c = (c<<3) + *s++ - '0';
684             }
685             *p++ = c;
686             break;
687         case 'x':
688             if (s+1 < end &&
689                 isxdigit(Py_CHARMASK(s[0])) &&
690                 isxdigit(Py_CHARMASK(s[1])))
691             {
692                 unsigned int x = 0;
693                 c = Py_CHARMASK(*s);
694                 s++;
695                 if (isdigit(c))
696                     x = c - '0';
697                 else if (islower(c))
698                     x = 10 + c - 'a';
699                 else
700                     x = 10 + c - 'A';
701                 x = x << 4;
702                 c = Py_CHARMASK(*s);
703                 s++;
704                 if (isdigit(c))
705                     x += c - '0';
706                 else if (islower(c))
707                     x += 10 + c - 'a';
708                 else
709                     x += 10 + c - 'A';
710                 *p++ = x;
711                 break;
712             }
713             if (!errors || strcmp(errors, "strict") == 0) {
714                 PyErr_SetString(PyExc_ValueError,
715                                 "invalid \\x escape");
716                 goto failed;
717             }
718             if (strcmp(errors, "replace") == 0) {
719                 *p++ = '?';
720             } else if (strcmp(errors, "ignore") == 0)
721                 /* do nothing */;
722             else {
723                 PyErr_Format(PyExc_ValueError,
724                              "decoding error; "
725                              "unknown error handling code: %.400s",
726                              errors);
727                 goto failed;
728             }
729 #ifndef Py_USING_UNICODE
730         case 'u':
731         case 'U':
732         case 'N':
733             if (unicode) {
734                 PyErr_SetString(PyExc_ValueError,
735                           "Unicode escapes not legal "
736                           "when Unicode disabled");
737                 goto failed;
738             }
739 #endif
740         default:
741             *p++ = '\\';
742             s--;
743             goto non_esc; /* an arbitrary number of unescaped
744                              UTF-8 bytes may follow. */
745         }
746     }
747     if (p-buf < newlen && _PyString_Resize(&v, p - buf))
748         goto failed;
749     return v;
750   failed:
751     Py_DECREF(v);
752     return NULL;
753 }
754 
755 /* -------------------------------------------------------------------- */
756 /* object api */
757 
758 static Py_ssize_t
string_getsize(register PyObject * op)759 string_getsize(register PyObject *op)
760 {
761     char *s;
762     Py_ssize_t len;
763     if (PyString_AsStringAndSize(op, &s, &len))
764         return -1;
765     return len;
766 }
767 
768 static /*const*/ char *
string_getbuffer(register PyObject * op)769 string_getbuffer(register PyObject *op)
770 {
771     char *s;
772     Py_ssize_t len;
773     if (PyString_AsStringAndSize(op, &s, &len))
774         return NULL;
775     return s;
776 }
777 
778 Py_ssize_t
PyString_Size(register PyObject * op)779 PyString_Size(register PyObject *op)
780 {
781     if (!PyString_Check(op))
782         return string_getsize(op);
783     return Py_SIZE(op);
784 }
785 
786 /*const*/ char *
PyString_AsString(register PyObject * op)787 PyString_AsString(register PyObject *op)
788 {
789     if (!PyString_Check(op))
790         return string_getbuffer(op);
791     return ((PyStringObject *)op) -> ob_sval;
792 }
793 
794 int
PyString_AsStringAndSize(register PyObject * obj,register char ** s,register Py_ssize_t * len)795 PyString_AsStringAndSize(register PyObject *obj,
796                          register char **s,
797                          register Py_ssize_t *len)
798 {
799     if (s == NULL) {
800         PyErr_BadInternalCall();
801         return -1;
802     }
803 
804     if (!PyString_Check(obj)) {
805 #ifdef Py_USING_UNICODE
806         if (PyUnicode_Check(obj)) {
807             obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
808             if (obj == NULL)
809                 return -1;
810         }
811         else
812 #endif
813         {
814             PyErr_Format(PyExc_TypeError,
815                          "expected string or Unicode object, "
816                          "%.200s found", Py_TYPE(obj)->tp_name);
817             return -1;
818         }
819     }
820 
821     *s = PyString_AS_STRING(obj);
822     if (len != NULL)
823         *len = PyString_GET_SIZE(obj);
824     else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
825         PyErr_SetString(PyExc_TypeError,
826                         "expected string without null bytes");
827         return -1;
828     }
829     return 0;
830 }
831 
832 /* -------------------------------------------------------------------- */
833 /* Methods */
834 
835 #include "stringlib/stringdefs.h"
836 #include "stringlib/fastsearch.h"
837 
838 #include "stringlib/count.h"
839 #include "stringlib/find.h"
840 #include "stringlib/partition.h"
841 #include "stringlib/split.h"
842 
843 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
844 #include "stringlib/localeutil.h"
845 
846 
847 
848 static int
string_print(PyStringObject * op,FILE * fp,int flags)849 string_print(PyStringObject *op, FILE *fp, int flags)
850 {
851     Py_ssize_t i, str_len;
852     char c;
853     int quote;
854 
855     /* XXX Ought to check for interrupts when writing long strings */
856     if (! PyString_CheckExact(op)) {
857         int ret;
858         /* A str subclass may have its own __str__ method. */
859         op = (PyStringObject *) PyObject_Str((PyObject *)op);
860         if (op == NULL)
861             return -1;
862         ret = string_print(op, fp, flags);
863         Py_DECREF(op);
864         return ret;
865     }
866     if (flags & Py_PRINT_RAW) {
867         char *data = op->ob_sval;
868         Py_ssize_t size = Py_SIZE(op);
869         Py_BEGIN_ALLOW_THREADS
870         while (size > INT_MAX) {
871             /* Very long strings cannot be written atomically.
872              * But don't write exactly INT_MAX bytes at a time
873              * to avoid memory aligment issues.
874              */
875             const int chunk_size = INT_MAX & ~0x3FFF;
876             fwrite(data, 1, chunk_size, fp);
877             data += chunk_size;
878             size -= chunk_size;
879         }
880 #ifdef __VMS
881         if (size) fwrite(data, (int)size, 1, fp);
882 #else
883         fwrite(data, 1, (int)size, fp);
884 #endif
885         Py_END_ALLOW_THREADS
886         return 0;
887     }
888 
889     /* figure out which quote to use; single is preferred */
890     quote = '\'';
891     if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892         !memchr(op->ob_sval, '"', Py_SIZE(op)))
893         quote = '"';
894 
895     str_len = Py_SIZE(op);
896     Py_BEGIN_ALLOW_THREADS
897     fputc(quote, fp);
898     for (i = 0; i < str_len; i++) {
899         /* Since strings are immutable and the caller should have a
900         reference, accessing the interal buffer should not be an issue
901         with the GIL released. */
902         c = op->ob_sval[i];
903         if (c == quote || c == '\\')
904             fprintf(fp, "\\%c", c);
905         else if (c == '\t')
906             fprintf(fp, "\\t");
907         else if (c == '\n')
908             fprintf(fp, "\\n");
909         else if (c == '\r')
910             fprintf(fp, "\\r");
911         else if (c < ' ' || c >= 0x7f)
912             fprintf(fp, "\\x%02x", c & 0xff);
913         else
914             fputc(c, fp);
915     }
916     fputc(quote, fp);
917     Py_END_ALLOW_THREADS
918     return 0;
919 }
920 
921 PyObject *
PyString_Repr(PyObject * obj,int smartquotes)922 PyString_Repr(PyObject *obj, int smartquotes)
923 {
924     register PyStringObject* op = (PyStringObject*) obj;
925     size_t newsize = 2 + 4 * Py_SIZE(op);
926     PyObject *v;
927     if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
928         PyErr_SetString(PyExc_OverflowError,
929             "string is too large to make repr");
930         return NULL;
931     }
932     v = PyString_FromStringAndSize((char *)NULL, newsize);
933     if (v == NULL) {
934         return NULL;
935     }
936     else {
937         register Py_ssize_t i;
938         register char c;
939         register char *p;
940         int quote;
941 
942         /* figure out which quote to use; single is preferred */
943         quote = '\'';
944         if (smartquotes &&
945             memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
946             !memchr(op->ob_sval, '"', Py_SIZE(op)))
947             quote = '"';
948 
949         p = PyString_AS_STRING(v);
950         *p++ = quote;
951         for (i = 0; i < Py_SIZE(op); i++) {
952             /* There's at least enough room for a hex escape
953                and a closing quote. */
954             assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
955             c = op->ob_sval[i];
956             if (c == quote || c == '\\')
957                 *p++ = '\\', *p++ = c;
958             else if (c == '\t')
959                 *p++ = '\\', *p++ = 't';
960             else if (c == '\n')
961                 *p++ = '\\', *p++ = 'n';
962             else if (c == '\r')
963                 *p++ = '\\', *p++ = 'r';
964             else if (c < ' ' || c >= 0x7f) {
965                 /* For performance, we don't want to call
966                    PyOS_snprintf here (extra layers of
967                    function call). */
968                 sprintf(p, "\\x%02x", c & 0xff);
969                 p += 4;
970             }
971             else
972                 *p++ = c;
973         }
974         assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
975         *p++ = quote;
976         *p = '\0';
977         if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
978             return NULL;
979         return v;
980     }
981 }
982 
983 static PyObject *
string_repr(PyObject * op)984 string_repr(PyObject *op)
985 {
986     return PyString_Repr(op, 1);
987 }
988 
989 static PyObject *
string_str(PyObject * s)990 string_str(PyObject *s)
991 {
992     assert(PyString_Check(s));
993     if (PyString_CheckExact(s)) {
994         Py_INCREF(s);
995         return s;
996     }
997     else {
998         /* Subtype -- return genuine string with the same value. */
999         PyStringObject *t = (PyStringObject *) s;
1000         return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1001     }
1002 }
1003 
1004 static Py_ssize_t
string_length(PyStringObject * a)1005 string_length(PyStringObject *a)
1006 {
1007     return Py_SIZE(a);
1008 }
1009 
1010 static PyObject *
string_concat(register PyStringObject * a,register PyObject * bb)1011 string_concat(register PyStringObject *a, register PyObject *bb)
1012 {
1013     register Py_ssize_t size;
1014     register PyStringObject *op;
1015     if (!PyString_Check(bb)) {
1016 #ifdef Py_USING_UNICODE
1017         if (PyUnicode_Check(bb))
1018             return PyUnicode_Concat((PyObject *)a, bb);
1019 #endif
1020         if (PyByteArray_Check(bb))
1021             return PyByteArray_Concat((PyObject *)a, bb);
1022         PyErr_Format(PyExc_TypeError,
1023                      "cannot concatenate 'str' and '%.200s' objects",
1024                      Py_TYPE(bb)->tp_name);
1025         return NULL;
1026     }
1027 #define b ((PyStringObject *)bb)
1028     /* Optimize cases with empty left or right operand */
1029     if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1030         PyString_CheckExact(a) && PyString_CheckExact(b)) {
1031         if (Py_SIZE(a) == 0) {
1032             Py_INCREF(bb);
1033             return bb;
1034         }
1035         Py_INCREF(a);
1036         return (PyObject *)a;
1037     }
1038     size = Py_SIZE(a) + Py_SIZE(b);
1039     /* Check that string sizes are not negative, to prevent an
1040        overflow in cases where we are passed incorrectly-created
1041        strings with negative lengths (due to a bug in other code).
1042     */
1043     if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1044         Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1045         PyErr_SetString(PyExc_OverflowError,
1046                         "strings are too large to concat");
1047         return NULL;
1048     }
1049 
1050     /* Inline PyObject_NewVar */
1051     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1052         PyErr_SetString(PyExc_OverflowError,
1053                         "strings are too large to concat");
1054         return NULL;
1055     }
1056     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1057     if (op == NULL)
1058         return PyErr_NoMemory();
1059     PyObject_INIT_VAR(op, &PyString_Type, size);
1060     op->ob_shash = -1;
1061     op->ob_sstate = SSTATE_NOT_INTERNED;
1062     Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1063     Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1064     op->ob_sval[size] = '\0';
1065     return (PyObject *) op;
1066 #undef b
1067 }
1068 
1069 static PyObject *
string_repeat(register PyStringObject * a,register Py_ssize_t n)1070 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1071 {
1072     register Py_ssize_t i;
1073     register Py_ssize_t j;
1074     register Py_ssize_t size;
1075     register PyStringObject *op;
1076     size_t nbytes;
1077     if (n < 0)
1078         n = 0;
1079     /* watch out for overflows:  the size can overflow int,
1080      * and the # of bytes needed can overflow size_t
1081      */
1082     size = Py_SIZE(a) * n;
1083     if (n && size / n != Py_SIZE(a)) {
1084         PyErr_SetString(PyExc_OverflowError,
1085             "repeated string is too long");
1086         return NULL;
1087     }
1088     if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1089         Py_INCREF(a);
1090         return (PyObject *)a;
1091     }
1092     nbytes = (size_t)size;
1093     if (nbytes + PyStringObject_SIZE <= nbytes) {
1094         PyErr_SetString(PyExc_OverflowError,
1095             "repeated string is too long");
1096         return NULL;
1097     }
1098     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1099     if (op == NULL)
1100         return PyErr_NoMemory();
1101     PyObject_INIT_VAR(op, &PyString_Type, size);
1102     op->ob_shash = -1;
1103     op->ob_sstate = SSTATE_NOT_INTERNED;
1104     op->ob_sval[size] = '\0';
1105     if (Py_SIZE(a) == 1 && n > 0) {
1106         memset(op->ob_sval, a->ob_sval[0] , n);
1107         return (PyObject *) op;
1108     }
1109     i = 0;
1110     if (i < size) {
1111         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1112         i = Py_SIZE(a);
1113     }
1114     while (i < size) {
1115         j = (i <= size-i)  ?  i  :  size-i;
1116         Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1117         i += j;
1118     }
1119     return (PyObject *) op;
1120 }
1121 
1122 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1123 
1124 static PyObject *
string_slice(register PyStringObject * a,register Py_ssize_t i,register Py_ssize_t j)1125 string_slice(register PyStringObject *a, register Py_ssize_t i,
1126              register Py_ssize_t j)
1127      /* j -- may be negative! */
1128 {
1129     if (i < 0)
1130         i = 0;
1131     if (j < 0)
1132         j = 0; /* Avoid signed/unsigned bug in next line */
1133     if (j > Py_SIZE(a))
1134         j = Py_SIZE(a);
1135     if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1136         /* It's the same as a */
1137         Py_INCREF(a);
1138         return (PyObject *)a;
1139     }
1140     if (j < i)
1141         j = i;
1142     return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1143 }
1144 
1145 static int
string_contains(PyObject * str_obj,PyObject * sub_obj)1146 string_contains(PyObject *str_obj, PyObject *sub_obj)
1147 {
1148     if (!PyString_CheckExact(sub_obj)) {
1149 #ifdef Py_USING_UNICODE
1150         if (PyUnicode_Check(sub_obj))
1151             return PyUnicode_Contains(str_obj, sub_obj);
1152 #endif
1153         if (!PyString_Check(sub_obj)) {
1154             PyErr_Format(PyExc_TypeError,
1155                 "'in <string>' requires string as left operand, "
1156                 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1157             return -1;
1158         }
1159     }
1160 
1161     return stringlib_contains_obj(str_obj, sub_obj);
1162 }
1163 
1164 static PyObject *
string_item(PyStringObject * a,register Py_ssize_t i)1165 string_item(PyStringObject *a, register Py_ssize_t i)
1166 {
1167     char pchar;
1168     PyObject *v;
1169     if (i < 0 || i >= Py_SIZE(a)) {
1170         PyErr_SetString(PyExc_IndexError, "string index out of range");
1171         return NULL;
1172     }
1173     pchar = a->ob_sval[i];
1174     v = (PyObject *)characters[pchar & UCHAR_MAX];
1175     if (v == NULL)
1176         v = PyString_FromStringAndSize(&pchar, 1);
1177     else {
1178 #ifdef COUNT_ALLOCS
1179         one_strings++;
1180 #endif
1181         Py_INCREF(v);
1182     }
1183     return v;
1184 }
1185 
1186 static PyObject*
string_richcompare(PyStringObject * a,PyStringObject * b,int op)1187 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1188 {
1189     int c;
1190     Py_ssize_t len_a, len_b;
1191     Py_ssize_t min_len;
1192     PyObject *result;
1193 
1194     /* Make sure both arguments are strings. */
1195     if (!(PyString_Check(a) && PyString_Check(b))) {
1196         result = Py_NotImplemented;
1197         goto out;
1198     }
1199     if (a == b) {
1200         switch (op) {
1201         case Py_EQ:case Py_LE:case Py_GE:
1202             result = Py_True;
1203             goto out;
1204         case Py_NE:case Py_LT:case Py_GT:
1205             result = Py_False;
1206             goto out;
1207         }
1208     }
1209     if (op == Py_EQ) {
1210         /* Supporting Py_NE here as well does not save
1211            much time, since Py_NE is rarely used.  */
1212         if (Py_SIZE(a) == Py_SIZE(b)
1213             && (a->ob_sval[0] == b->ob_sval[0]
1214             && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1215             result = Py_True;
1216         } else {
1217             result = Py_False;
1218         }
1219         goto out;
1220     }
1221     len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1222     min_len = (len_a < len_b) ? len_a : len_b;
1223     if (min_len > 0) {
1224         c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1225         if (c==0)
1226             c = memcmp(a->ob_sval, b->ob_sval, min_len);
1227     } else
1228         c = 0;
1229     if (c == 0)
1230         c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1231     switch (op) {
1232     case Py_LT: c = c <  0; break;
1233     case Py_LE: c = c <= 0; break;
1234     case Py_EQ: assert(0);  break; /* unreachable */
1235     case Py_NE: c = c != 0; break;
1236     case Py_GT: c = c >  0; break;
1237     case Py_GE: c = c >= 0; break;
1238     default:
1239         result = Py_NotImplemented;
1240         goto out;
1241     }
1242     result = c ? Py_True : Py_False;
1243   out:
1244     Py_INCREF(result);
1245     return result;
1246 }
1247 
1248 int
_PyString_Eq(PyObject * o1,PyObject * o2)1249 _PyString_Eq(PyObject *o1, PyObject *o2)
1250 {
1251     PyStringObject *a = (PyStringObject*) o1;
1252     PyStringObject *b = (PyStringObject*) o2;
1253     return Py_SIZE(a) == Py_SIZE(b)
1254       && *a->ob_sval == *b->ob_sval
1255       && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1256 }
1257 
1258 static long
string_hash(PyStringObject * a)1259 string_hash(PyStringObject *a)
1260 {
1261     register Py_ssize_t len;
1262     register unsigned char *p;
1263     register long x;
1264 
1265     if (a->ob_shash != -1)
1266         return a->ob_shash;
1267     len = Py_SIZE(a);
1268     p = (unsigned char *) a->ob_sval;
1269     x = *p << 7;
1270     while (--len >= 0)
1271         x = (1000003*x) ^ *p++;
1272     x ^= Py_SIZE(a);
1273     if (x == -1)
1274         x = -2;
1275     a->ob_shash = x;
1276     return x;
1277 }
1278 
1279 static PyObject*
string_subscript(PyStringObject * self,PyObject * item)1280 string_subscript(PyStringObject* self, PyObject* item)
1281 {
1282     if (PyIndex_Check(item)) {
1283         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1284         if (i == -1 && PyErr_Occurred())
1285             return NULL;
1286         if (i < 0)
1287             i += PyString_GET_SIZE(self);
1288         return string_item(self, i);
1289     }
1290     else if (PySlice_Check(item)) {
1291         Py_ssize_t start, stop, step, slicelength, cur, i;
1292         char* source_buf;
1293         char* result_buf;
1294         PyObject* result;
1295 
1296         if (PySlice_GetIndicesEx((PySliceObject*)item,
1297                          PyString_GET_SIZE(self),
1298                          &start, &stop, &step, &slicelength) < 0) {
1299             return NULL;
1300         }
1301 
1302         if (slicelength <= 0) {
1303             return PyString_FromStringAndSize("", 0);
1304         }
1305         else if (start == 0 && step == 1 &&
1306                  slicelength == PyString_GET_SIZE(self) &&
1307                  PyString_CheckExact(self)) {
1308             Py_INCREF(self);
1309             return (PyObject *)self;
1310         }
1311         else if (step == 1) {
1312             return PyString_FromStringAndSize(
1313                 PyString_AS_STRING(self) + start,
1314                 slicelength);
1315         }
1316         else {
1317             source_buf = PyString_AsString((PyObject*)self);
1318             result_buf = (char *)PyMem_Malloc(slicelength);
1319             if (result_buf == NULL)
1320                 return PyErr_NoMemory();
1321 
1322             for (cur = start, i = 0; i < slicelength;
1323                  cur += step, i++) {
1324                 result_buf[i] = source_buf[cur];
1325             }
1326 
1327             result = PyString_FromStringAndSize(result_buf,
1328                                                 slicelength);
1329             PyMem_Free(result_buf);
1330             return result;
1331         }
1332     }
1333     else {
1334         PyErr_Format(PyExc_TypeError,
1335                      "string indices must be integers, not %.200s",
1336                      Py_TYPE(item)->tp_name);
1337         return NULL;
1338     }
1339 }
1340 
1341 static Py_ssize_t
string_buffer_getreadbuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1342 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1343 {
1344     if ( index != 0 ) {
1345         PyErr_SetString(PyExc_SystemError,
1346                         "accessing non-existent string segment");
1347         return -1;
1348     }
1349     *ptr = (void *)self->ob_sval;
1350     return Py_SIZE(self);
1351 }
1352 
1353 static Py_ssize_t
string_buffer_getwritebuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1354 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1355 {
1356     PyErr_SetString(PyExc_TypeError,
1357                     "Cannot use string as modifiable buffer");
1358     return -1;
1359 }
1360 
1361 static Py_ssize_t
string_buffer_getsegcount(PyStringObject * self,Py_ssize_t * lenp)1362 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1363 {
1364     if ( lenp )
1365         *lenp = Py_SIZE(self);
1366     return 1;
1367 }
1368 
1369 static Py_ssize_t
string_buffer_getcharbuf(PyStringObject * self,Py_ssize_t index,const char ** ptr)1370 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1371 {
1372     if ( index != 0 ) {
1373         PyErr_SetString(PyExc_SystemError,
1374                         "accessing non-existent string segment");
1375         return -1;
1376     }
1377     *ptr = self->ob_sval;
1378     return Py_SIZE(self);
1379 }
1380 
1381 static int
string_buffer_getbuffer(PyStringObject * self,Py_buffer * view,int flags)1382 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1383 {
1384     return PyBuffer_FillInfo(view, (PyObject*)self,
1385                              (void *)self->ob_sval, Py_SIZE(self),
1386                              1, flags);
1387 }
1388 
1389 static PySequenceMethods string_as_sequence = {
1390     (lenfunc)string_length, /*sq_length*/
1391     (binaryfunc)string_concat, /*sq_concat*/
1392     (ssizeargfunc)string_repeat, /*sq_repeat*/
1393     (ssizeargfunc)string_item, /*sq_item*/
1394     (ssizessizeargfunc)string_slice, /*sq_slice*/
1395     0,                  /*sq_ass_item*/
1396     0,                  /*sq_ass_slice*/
1397     (objobjproc)string_contains /*sq_contains*/
1398 };
1399 
1400 static PyMappingMethods string_as_mapping = {
1401     (lenfunc)string_length,
1402     (binaryfunc)string_subscript,
1403     0,
1404 };
1405 
1406 static PyBufferProcs string_as_buffer = {
1407     (readbufferproc)string_buffer_getreadbuf,
1408     (writebufferproc)string_buffer_getwritebuf,
1409     (segcountproc)string_buffer_getsegcount,
1410     (charbufferproc)string_buffer_getcharbuf,
1411     (getbufferproc)string_buffer_getbuffer,
1412     0, /* XXX */
1413 };
1414 
1415 
1416 
1417 #define LEFTSTRIP 0
1418 #define RIGHTSTRIP 1
1419 #define BOTHSTRIP 2
1420 
1421 /* Arrays indexed by above */
1422 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1423 
1424 #define STRIPNAME(i) (stripformat[i]+3)
1425 
1426 PyDoc_STRVAR(split__doc__,
1427 "S.split([sep [,maxsplit]]) -> list of strings\n\
1428 \n\
1429 Return a list of the words in the string S, using sep as the\n\
1430 delimiter string.  If maxsplit is given, at most maxsplit\n\
1431 splits are done. If sep is not specified or is None, any\n\
1432 whitespace string is a separator and empty strings are removed\n\
1433 from the result.");
1434 
1435 static PyObject *
string_split(PyStringObject * self,PyObject * args)1436 string_split(PyStringObject *self, PyObject *args)
1437 {
1438     Py_ssize_t len = PyString_GET_SIZE(self), n;
1439     Py_ssize_t maxsplit = -1;
1440     const char *s = PyString_AS_STRING(self), *sub;
1441     PyObject *subobj = Py_None;
1442 
1443     if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1444         return NULL;
1445     if (maxsplit < 0)
1446         maxsplit = PY_SSIZE_T_MAX;
1447     if (subobj == Py_None)
1448         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1449     if (PyString_Check(subobj)) {
1450         sub = PyString_AS_STRING(subobj);
1451         n = PyString_GET_SIZE(subobj);
1452     }
1453 #ifdef Py_USING_UNICODE
1454     else if (PyUnicode_Check(subobj))
1455         return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1456 #endif
1457     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1458         return NULL;
1459 
1460     return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1461 }
1462 
1463 PyDoc_STRVAR(partition__doc__,
1464 "S.partition(sep) -> (head, sep, tail)\n\
1465 \n\
1466 Search for the separator sep in S, and return the part before it,\n\
1467 the separator itself, and the part after it.  If the separator is not\n\
1468 found, return S and two empty strings.");
1469 
1470 static PyObject *
string_partition(PyStringObject * self,PyObject * sep_obj)1471 string_partition(PyStringObject *self, PyObject *sep_obj)
1472 {
1473     const char *sep;
1474     Py_ssize_t sep_len;
1475 
1476     if (PyString_Check(sep_obj)) {
1477         sep = PyString_AS_STRING(sep_obj);
1478         sep_len = PyString_GET_SIZE(sep_obj);
1479     }
1480 #ifdef Py_USING_UNICODE
1481     else if (PyUnicode_Check(sep_obj))
1482         return PyUnicode_Partition((PyObject *) self, sep_obj);
1483 #endif
1484     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1485         return NULL;
1486 
1487     return stringlib_partition(
1488         (PyObject*) self,
1489         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1490         sep_obj, sep, sep_len
1491         );
1492 }
1493 
1494 PyDoc_STRVAR(rpartition__doc__,
1495 "S.rpartition(sep) -> (head, sep, tail)\n\
1496 \n\
1497 Search for the separator sep in S, starting at the end of S, and return\n\
1498 the part before it, the separator itself, and the part after it.  If the\n\
1499 separator is not found, return two empty strings and S.");
1500 
1501 static PyObject *
string_rpartition(PyStringObject * self,PyObject * sep_obj)1502 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1503 {
1504     const char *sep;
1505     Py_ssize_t sep_len;
1506 
1507     if (PyString_Check(sep_obj)) {
1508         sep = PyString_AS_STRING(sep_obj);
1509         sep_len = PyString_GET_SIZE(sep_obj);
1510     }
1511 #ifdef Py_USING_UNICODE
1512     else if (PyUnicode_Check(sep_obj))
1513         return PyUnicode_RPartition((PyObject *) self, sep_obj);
1514 #endif
1515     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1516         return NULL;
1517 
1518     return stringlib_rpartition(
1519         (PyObject*) self,
1520         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1521         sep_obj, sep, sep_len
1522         );
1523 }
1524 
1525 PyDoc_STRVAR(rsplit__doc__,
1526 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1527 \n\
1528 Return a list of the words in the string S, using sep as the\n\
1529 delimiter string, starting at the end of the string and working\n\
1530 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1531 done. If sep is not specified or is None, any whitespace string\n\
1532 is a separator.");
1533 
1534 static PyObject *
string_rsplit(PyStringObject * self,PyObject * args)1535 string_rsplit(PyStringObject *self, PyObject *args)
1536 {
1537     Py_ssize_t len = PyString_GET_SIZE(self), n;
1538     Py_ssize_t maxsplit = -1;
1539     const char *s = PyString_AS_STRING(self), *sub;
1540     PyObject *subobj = Py_None;
1541 
1542     if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1543         return NULL;
1544     if (maxsplit < 0)
1545         maxsplit = PY_SSIZE_T_MAX;
1546     if (subobj == Py_None)
1547         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1548     if (PyString_Check(subobj)) {
1549         sub = PyString_AS_STRING(subobj);
1550         n = PyString_GET_SIZE(subobj);
1551     }
1552 #ifdef Py_USING_UNICODE
1553     else if (PyUnicode_Check(subobj))
1554         return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1555 #endif
1556     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1557         return NULL;
1558 
1559     return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1560 }
1561 
1562 
1563 PyDoc_STRVAR(join__doc__,
1564 "S.join(iterable) -> string\n\
1565 \n\
1566 Return a string which is the concatenation of the strings in the\n\
1567 iterable.  The separator between elements is S.");
1568 
1569 static PyObject *
string_join(PyStringObject * self,PyObject * orig)1570 string_join(PyStringObject *self, PyObject *orig)
1571 {
1572     char *sep = PyString_AS_STRING(self);
1573     const Py_ssize_t seplen = PyString_GET_SIZE(self);
1574     PyObject *res = NULL;
1575     char *p;
1576     Py_ssize_t seqlen = 0;
1577     size_t sz = 0;
1578     Py_ssize_t i;
1579     PyObject *seq, *item;
1580 
1581     seq = PySequence_Fast(orig, "");
1582     if (seq == NULL) {
1583         return NULL;
1584     }
1585 
1586     seqlen = PySequence_Size(seq);
1587     if (seqlen == 0) {
1588         Py_DECREF(seq);
1589         return PyString_FromString("");
1590     }
1591     if (seqlen == 1) {
1592         item = PySequence_Fast_GET_ITEM(seq, 0);
1593         if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1594             Py_INCREF(item);
1595             Py_DECREF(seq);
1596             return item;
1597         }
1598     }
1599 
1600     /* There are at least two things to join, or else we have a subclass
1601      * of the builtin types in the sequence.
1602      * Do a pre-pass to figure out the total amount of space we'll
1603      * need (sz), see whether any argument is absurd, and defer to
1604      * the Unicode join if appropriate.
1605      */
1606     for (i = 0; i < seqlen; i++) {
1607         const size_t old_sz = sz;
1608         item = PySequence_Fast_GET_ITEM(seq, i);
1609         if (!PyString_Check(item)){
1610 #ifdef Py_USING_UNICODE
1611             if (PyUnicode_Check(item)) {
1612                 /* Defer to Unicode join.
1613                  * CAUTION:  There's no gurantee that the
1614                  * original sequence can be iterated over
1615                  * again, so we must pass seq here.
1616                  */
1617                 PyObject *result;
1618                 result = PyUnicode_Join((PyObject *)self, seq);
1619                 Py_DECREF(seq);
1620                 return result;
1621             }
1622 #endif
1623             PyErr_Format(PyExc_TypeError,
1624                          "sequence item %zd: expected string,"
1625                          " %.80s found",
1626                          i, Py_TYPE(item)->tp_name);
1627             Py_DECREF(seq);
1628             return NULL;
1629         }
1630         sz += PyString_GET_SIZE(item);
1631         if (i != 0)
1632             sz += seplen;
1633         if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1634             PyErr_SetString(PyExc_OverflowError,
1635                 "join() result is too long for a Python string");
1636             Py_DECREF(seq);
1637             return NULL;
1638         }
1639     }
1640 
1641     /* Allocate result space. */
1642     res = PyString_FromStringAndSize((char*)NULL, sz);
1643     if (res == NULL) {
1644         Py_DECREF(seq);
1645         return NULL;
1646     }
1647 
1648     /* Catenate everything. */
1649     p = PyString_AS_STRING(res);
1650     for (i = 0; i < seqlen; ++i) {
1651         size_t n;
1652         item = PySequence_Fast_GET_ITEM(seq, i);
1653         n = PyString_GET_SIZE(item);
1654         Py_MEMCPY(p, PyString_AS_STRING(item), n);
1655         p += n;
1656         if (i < seqlen - 1) {
1657             Py_MEMCPY(p, sep, seplen);
1658             p += seplen;
1659         }
1660     }
1661 
1662     Py_DECREF(seq);
1663     return res;
1664 }
1665 
1666 PyObject *
_PyString_Join(PyObject * sep,PyObject * x)1667 _PyString_Join(PyObject *sep, PyObject *x)
1668 {
1669     assert(sep != NULL && PyString_Check(sep));
1670     assert(x != NULL);
1671     return string_join((PyStringObject *)sep, x);
1672 }
1673 
1674 /* helper macro to fixup start/end slice values */
1675 #define ADJUST_INDICES(start, end, len)         \
1676     if (end > len)                          \
1677         end = len;                          \
1678     else if (end < 0) {                     \
1679         end += len;                         \
1680         if (end < 0)                        \
1681         end = 0;                        \
1682     }                                       \
1683     if (start < 0) {                        \
1684         start += len;                       \
1685         if (start < 0)                      \
1686         start = 0;                      \
1687     }
1688 
1689 Py_LOCAL_INLINE(Py_ssize_t)
string_find_internal(PyStringObject * self,PyObject * args,int dir)1690 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1691 {
1692     PyObject *subobj;
1693     const char *sub;
1694     Py_ssize_t sub_len;
1695     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1696 
1697     if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1698                                     args, &subobj, &start, &end))
1699         return -2;
1700 
1701     if (PyString_Check(subobj)) {
1702         sub = PyString_AS_STRING(subobj);
1703         sub_len = PyString_GET_SIZE(subobj);
1704     }
1705 #ifdef Py_USING_UNICODE
1706     else if (PyUnicode_Check(subobj))
1707         return PyUnicode_Find(
1708             (PyObject *)self, subobj, start, end, dir);
1709 #endif
1710     else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1711         /* XXX - the "expected a character buffer object" is pretty
1712            confusing for a non-expert.  remap to something else ? */
1713         return -2;
1714 
1715     if (dir > 0)
1716         return stringlib_find_slice(
1717             PyString_AS_STRING(self), PyString_GET_SIZE(self),
1718             sub, sub_len, start, end);
1719     else
1720         return stringlib_rfind_slice(
1721             PyString_AS_STRING(self), PyString_GET_SIZE(self),
1722             sub, sub_len, start, end);
1723 }
1724 
1725 
1726 PyDoc_STRVAR(find__doc__,
1727 "S.find(sub [,start [,end]]) -> int\n\
1728 \n\
1729 Return the lowest index in S where substring sub is found,\n\
1730 such that sub is contained within s[start:end].  Optional\n\
1731 arguments start and end are interpreted as in slice notation.\n\
1732 \n\
1733 Return -1 on failure.");
1734 
1735 static PyObject *
string_find(PyStringObject * self,PyObject * args)1736 string_find(PyStringObject *self, PyObject *args)
1737 {
1738     Py_ssize_t result = string_find_internal(self, args, +1);
1739     if (result == -2)
1740         return NULL;
1741     return PyInt_FromSsize_t(result);
1742 }
1743 
1744 
1745 PyDoc_STRVAR(index__doc__,
1746 "S.index(sub [,start [,end]]) -> int\n\
1747 \n\
1748 Like S.find() but raise ValueError when the substring is not found.");
1749 
1750 static PyObject *
string_index(PyStringObject * self,PyObject * args)1751 string_index(PyStringObject *self, PyObject *args)
1752 {
1753     Py_ssize_t result = string_find_internal(self, args, +1);
1754     if (result == -2)
1755         return NULL;
1756     if (result == -1) {
1757         PyErr_SetString(PyExc_ValueError,
1758                         "substring not found");
1759         return NULL;
1760     }
1761     return PyInt_FromSsize_t(result);
1762 }
1763 
1764 
1765 PyDoc_STRVAR(rfind__doc__,
1766 "S.rfind(sub [,start [,end]]) -> int\n\
1767 \n\
1768 Return the highest index in S where substring sub is found,\n\
1769 such that sub is contained within s[start:end].  Optional\n\
1770 arguments start and end are interpreted as in slice notation.\n\
1771 \n\
1772 Return -1 on failure.");
1773 
1774 static PyObject *
string_rfind(PyStringObject * self,PyObject * args)1775 string_rfind(PyStringObject *self, PyObject *args)
1776 {
1777     Py_ssize_t result = string_find_internal(self, args, -1);
1778     if (result == -2)
1779         return NULL;
1780     return PyInt_FromSsize_t(result);
1781 }
1782 
1783 
1784 PyDoc_STRVAR(rindex__doc__,
1785 "S.rindex(sub [,start [,end]]) -> int\n\
1786 \n\
1787 Like S.rfind() but raise ValueError when the substring is not found.");
1788 
1789 static PyObject *
string_rindex(PyStringObject * self,PyObject * args)1790 string_rindex(PyStringObject *self, PyObject *args)
1791 {
1792     Py_ssize_t result = string_find_internal(self, args, -1);
1793     if (result == -2)
1794         return NULL;
1795     if (result == -1) {
1796         PyErr_SetString(PyExc_ValueError,
1797                         "substring not found");
1798         return NULL;
1799     }
1800     return PyInt_FromSsize_t(result);
1801 }
1802 
1803 
1804 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyStringObject * self,int striptype,PyObject * sepobj)1805 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1806 {
1807     char *s = PyString_AS_STRING(self);
1808     Py_ssize_t len = PyString_GET_SIZE(self);
1809     char *sep = PyString_AS_STRING(sepobj);
1810     Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1811     Py_ssize_t i, j;
1812 
1813     i = 0;
1814     if (striptype != RIGHTSTRIP) {
1815         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1816             i++;
1817         }
1818     }
1819 
1820     j = len;
1821     if (striptype != LEFTSTRIP) {
1822         do {
1823             j--;
1824         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1825         j++;
1826     }
1827 
1828     if (i == 0 && j == len && PyString_CheckExact(self)) {
1829         Py_INCREF(self);
1830         return (PyObject*)self;
1831     }
1832     else
1833         return PyString_FromStringAndSize(s+i, j-i);
1834 }
1835 
1836 
1837 Py_LOCAL_INLINE(PyObject *)
do_strip(PyStringObject * self,int striptype)1838 do_strip(PyStringObject *self, int striptype)
1839 {
1840     char *s = PyString_AS_STRING(self);
1841     Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1842 
1843     i = 0;
1844     if (striptype != RIGHTSTRIP) {
1845         while (i < len && isspace(Py_CHARMASK(s[i]))) {
1846             i++;
1847         }
1848     }
1849 
1850     j = len;
1851     if (striptype != LEFTSTRIP) {
1852         do {
1853             j--;
1854         } while (j >= i && isspace(Py_CHARMASK(s[j])));
1855         j++;
1856     }
1857 
1858     if (i == 0 && j == len && PyString_CheckExact(self)) {
1859         Py_INCREF(self);
1860         return (PyObject*)self;
1861     }
1862     else
1863         return PyString_FromStringAndSize(s+i, j-i);
1864 }
1865 
1866 
1867 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyStringObject * self,int striptype,PyObject * args)1868 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1869 {
1870     PyObject *sep = NULL;
1871 
1872     if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1873         return NULL;
1874 
1875     if (sep != NULL && sep != Py_None) {
1876         if (PyString_Check(sep))
1877             return do_xstrip(self, striptype, sep);
1878 #ifdef Py_USING_UNICODE
1879         else if (PyUnicode_Check(sep)) {
1880             PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1881             PyObject *res;
1882             if (uniself==NULL)
1883                 return NULL;
1884             res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1885                 striptype, sep);
1886             Py_DECREF(uniself);
1887             return res;
1888         }
1889 #endif
1890         PyErr_Format(PyExc_TypeError,
1891 #ifdef Py_USING_UNICODE
1892                      "%s arg must be None, str or unicode",
1893 #else
1894                      "%s arg must be None or str",
1895 #endif
1896                      STRIPNAME(striptype));
1897         return NULL;
1898     }
1899 
1900     return do_strip(self, striptype);
1901 }
1902 
1903 
1904 PyDoc_STRVAR(strip__doc__,
1905 "S.strip([chars]) -> string or unicode\n\
1906 \n\
1907 Return a copy of the string S with leading and trailing\n\
1908 whitespace removed.\n\
1909 If chars is given and not None, remove characters in chars instead.\n\
1910 If chars is unicode, S will be converted to unicode before stripping");
1911 
1912 static PyObject *
string_strip(PyStringObject * self,PyObject * args)1913 string_strip(PyStringObject *self, PyObject *args)
1914 {
1915     if (PyTuple_GET_SIZE(args) == 0)
1916         return do_strip(self, BOTHSTRIP); /* Common case */
1917     else
1918         return do_argstrip(self, BOTHSTRIP, args);
1919 }
1920 
1921 
1922 PyDoc_STRVAR(lstrip__doc__,
1923 "S.lstrip([chars]) -> string or unicode\n\
1924 \n\
1925 Return a copy of the string S with leading whitespace removed.\n\
1926 If chars is given and not None, remove characters in chars instead.\n\
1927 If chars is unicode, S will be converted to unicode before stripping");
1928 
1929 static PyObject *
string_lstrip(PyStringObject * self,PyObject * args)1930 string_lstrip(PyStringObject *self, PyObject *args)
1931 {
1932     if (PyTuple_GET_SIZE(args) == 0)
1933         return do_strip(self, LEFTSTRIP); /* Common case */
1934     else
1935         return do_argstrip(self, LEFTSTRIP, args);
1936 }
1937 
1938 
1939 PyDoc_STRVAR(rstrip__doc__,
1940 "S.rstrip([chars]) -> string or unicode\n\
1941 \n\
1942 Return a copy of the string S with trailing whitespace removed.\n\
1943 If chars is given and not None, remove characters in chars instead.\n\
1944 If chars is unicode, S will be converted to unicode before stripping");
1945 
1946 static PyObject *
string_rstrip(PyStringObject * self,PyObject * args)1947 string_rstrip(PyStringObject *self, PyObject *args)
1948 {
1949     if (PyTuple_GET_SIZE(args) == 0)
1950         return do_strip(self, RIGHTSTRIP); /* Common case */
1951     else
1952         return do_argstrip(self, RIGHTSTRIP, args);
1953 }
1954 
1955 
1956 PyDoc_STRVAR(lower__doc__,
1957 "S.lower() -> string\n\
1958 \n\
1959 Return a copy of the string S converted to lowercase.");
1960 
1961 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1962 #ifndef _tolower
1963 #define _tolower tolower
1964 #endif
1965 
1966 static PyObject *
string_lower(PyStringObject * self)1967 string_lower(PyStringObject *self)
1968 {
1969     char *s;
1970     Py_ssize_t i, n = PyString_GET_SIZE(self);
1971     PyObject *newobj;
1972 
1973     newobj = PyString_FromStringAndSize(NULL, n);
1974     if (!newobj)
1975         return NULL;
1976 
1977     s = PyString_AS_STRING(newobj);
1978 
1979     Py_MEMCPY(s, PyString_AS_STRING(self), n);
1980 
1981     for (i = 0; i < n; i++) {
1982         int c = Py_CHARMASK(s[i]);
1983         if (isupper(c))
1984             s[i] = _tolower(c);
1985     }
1986 
1987     return newobj;
1988 }
1989 
1990 PyDoc_STRVAR(upper__doc__,
1991 "S.upper() -> string\n\
1992 \n\
1993 Return a copy of the string S converted to uppercase.");
1994 
1995 #ifndef _toupper
1996 #define _toupper toupper
1997 #endif
1998 
1999 static PyObject *
string_upper(PyStringObject * self)2000 string_upper(PyStringObject *self)
2001 {
2002     char *s;
2003     Py_ssize_t i, n = PyString_GET_SIZE(self);
2004     PyObject *newobj;
2005 
2006     newobj = PyString_FromStringAndSize(NULL, n);
2007     if (!newobj)
2008         return NULL;
2009 
2010     s = PyString_AS_STRING(newobj);
2011 
2012     Py_MEMCPY(s, PyString_AS_STRING(self), n);
2013 
2014     for (i = 0; i < n; i++) {
2015         int c = Py_CHARMASK(s[i]);
2016         if (islower(c))
2017             s[i] = _toupper(c);
2018     }
2019 
2020     return newobj;
2021 }
2022 
2023 PyDoc_STRVAR(title__doc__,
2024 "S.title() -> string\n\
2025 \n\
2026 Return a titlecased version of S, i.e. words start with uppercase\n\
2027 characters, all remaining cased characters have lowercase.");
2028 
2029 static PyObject*
string_title(PyStringObject * self)2030 string_title(PyStringObject *self)
2031 {
2032     char *s = PyString_AS_STRING(self), *s_new;
2033     Py_ssize_t i, n = PyString_GET_SIZE(self);
2034     int previous_is_cased = 0;
2035     PyObject *newobj;
2036 
2037     newobj = PyString_FromStringAndSize(NULL, n);
2038     if (newobj == NULL)
2039         return NULL;
2040     s_new = PyString_AsString(newobj);
2041     for (i = 0; i < n; i++) {
2042         int c = Py_CHARMASK(*s++);
2043         if (islower(c)) {
2044             if (!previous_is_cased)
2045                 c = toupper(c);
2046             previous_is_cased = 1;
2047         } else if (isupper(c)) {
2048             if (previous_is_cased)
2049                 c = tolower(c);
2050             previous_is_cased = 1;
2051         } else
2052             previous_is_cased = 0;
2053         *s_new++ = c;
2054     }
2055     return newobj;
2056 }
2057 
2058 PyDoc_STRVAR(capitalize__doc__,
2059 "S.capitalize() -> string\n\
2060 \n\
2061 Return a copy of the string S with only its first character\n\
2062 capitalized.");
2063 
2064 static PyObject *
string_capitalize(PyStringObject * self)2065 string_capitalize(PyStringObject *self)
2066 {
2067     char *s = PyString_AS_STRING(self), *s_new;
2068     Py_ssize_t i, n = PyString_GET_SIZE(self);
2069     PyObject *newobj;
2070 
2071     newobj = PyString_FromStringAndSize(NULL, n);
2072     if (newobj == NULL)
2073         return NULL;
2074     s_new = PyString_AsString(newobj);
2075     if (0 < n) {
2076         int c = Py_CHARMASK(*s++);
2077         if (islower(c))
2078             *s_new = toupper(c);
2079         else
2080             *s_new = c;
2081         s_new++;
2082     }
2083     for (i = 1; i < n; i++) {
2084         int c = Py_CHARMASK(*s++);
2085         if (isupper(c))
2086             *s_new = tolower(c);
2087         else
2088             *s_new = c;
2089         s_new++;
2090     }
2091     return newobj;
2092 }
2093 
2094 
2095 PyDoc_STRVAR(count__doc__,
2096 "S.count(sub[, start[, end]]) -> int\n\
2097 \n\
2098 Return the number of non-overlapping occurrences of substring sub in\n\
2099 string S[start:end].  Optional arguments start and end are interpreted\n\
2100 as in slice notation.");
2101 
2102 static PyObject *
string_count(PyStringObject * self,PyObject * args)2103 string_count(PyStringObject *self, PyObject *args)
2104 {
2105     PyObject *sub_obj;
2106     const char *str = PyString_AS_STRING(self), *sub;
2107     Py_ssize_t sub_len;
2108     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2109 
2110     if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2111         return NULL;
2112 
2113     if (PyString_Check(sub_obj)) {
2114         sub = PyString_AS_STRING(sub_obj);
2115         sub_len = PyString_GET_SIZE(sub_obj);
2116     }
2117 #ifdef Py_USING_UNICODE
2118     else if (PyUnicode_Check(sub_obj)) {
2119         Py_ssize_t count;
2120         count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2121         if (count == -1)
2122             return NULL;
2123         else
2124             return PyInt_FromSsize_t(count);
2125     }
2126 #endif
2127     else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2128         return NULL;
2129 
2130     ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2131 
2132     return PyInt_FromSsize_t(
2133         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2134         );
2135 }
2136 
2137 PyDoc_STRVAR(swapcase__doc__,
2138 "S.swapcase() -> string\n\
2139 \n\
2140 Return a copy of the string S with uppercase characters\n\
2141 converted to lowercase and vice versa.");
2142 
2143 static PyObject *
string_swapcase(PyStringObject * self)2144 string_swapcase(PyStringObject *self)
2145 {
2146     char *s = PyString_AS_STRING(self), *s_new;
2147     Py_ssize_t i, n = PyString_GET_SIZE(self);
2148     PyObject *newobj;
2149 
2150     newobj = PyString_FromStringAndSize(NULL, n);
2151     if (newobj == NULL)
2152         return NULL;
2153     s_new = PyString_AsString(newobj);
2154     for (i = 0; i < n; i++) {
2155         int c = Py_CHARMASK(*s++);
2156         if (islower(c)) {
2157             *s_new = toupper(c);
2158         }
2159         else if (isupper(c)) {
2160             *s_new = tolower(c);
2161         }
2162         else
2163             *s_new = c;
2164         s_new++;
2165     }
2166     return newobj;
2167 }
2168 
2169 
2170 PyDoc_STRVAR(translate__doc__,
2171 "S.translate(table [,deletechars]) -> string\n\
2172 \n\
2173 Return a copy of the string S, where all characters occurring\n\
2174 in the optional argument deletechars are removed, and the\n\
2175 remaining characters have been mapped through the given\n\
2176 translation table, which must be a string of length 256.");
2177 
2178 static PyObject *
string_translate(PyStringObject * self,PyObject * args)2179 string_translate(PyStringObject *self, PyObject *args)
2180 {
2181     register char *input, *output;
2182     const char *table;
2183     register Py_ssize_t i, c, changed = 0;
2184     PyObject *input_obj = (PyObject*)self;
2185     const char *output_start, *del_table=NULL;
2186     Py_ssize_t inlen, tablen, dellen = 0;
2187     PyObject *result;
2188     int trans_table[256];
2189     PyObject *tableobj, *delobj = NULL;
2190 
2191     if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2192                           &tableobj, &delobj))
2193         return NULL;
2194 
2195     if (PyString_Check(tableobj)) {
2196         table = PyString_AS_STRING(tableobj);
2197         tablen = PyString_GET_SIZE(tableobj);
2198     }
2199     else if (tableobj == Py_None) {
2200         table = NULL;
2201         tablen = 256;
2202     }
2203 #ifdef Py_USING_UNICODE
2204     else if (PyUnicode_Check(tableobj)) {
2205         /* Unicode .translate() does not support the deletechars
2206            parameter; instead a mapping to None will cause characters
2207            to be deleted. */
2208         if (delobj != NULL) {
2209             PyErr_SetString(PyExc_TypeError,
2210             "deletions are implemented differently for unicode");
2211             return NULL;
2212         }
2213         return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2214     }
2215 #endif
2216     else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2217         return NULL;
2218 
2219     if (tablen != 256) {
2220         PyErr_SetString(PyExc_ValueError,
2221           "translation table must be 256 characters long");
2222         return NULL;
2223     }
2224 
2225     if (delobj != NULL) {
2226         if (PyString_Check(delobj)) {
2227             del_table = PyString_AS_STRING(delobj);
2228             dellen = PyString_GET_SIZE(delobj);
2229         }
2230 #ifdef Py_USING_UNICODE
2231         else if (PyUnicode_Check(delobj)) {
2232             PyErr_SetString(PyExc_TypeError,
2233             "deletions are implemented differently for unicode");
2234             return NULL;
2235         }
2236 #endif
2237         else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2238             return NULL;
2239     }
2240     else {
2241         del_table = NULL;
2242         dellen = 0;
2243     }
2244 
2245     inlen = PyString_GET_SIZE(input_obj);
2246     result = PyString_FromStringAndSize((char *)NULL, inlen);
2247     if (result == NULL)
2248         return NULL;
2249     output_start = output = PyString_AsString(result);
2250     input = PyString_AS_STRING(input_obj);
2251 
2252     if (dellen == 0 && table != NULL) {
2253         /* If no deletions are required, use faster code */
2254         for (i = inlen; --i >= 0; ) {
2255             c = Py_CHARMASK(*input++);
2256             if (Py_CHARMASK((*output++ = table[c])) != c)
2257                 changed = 1;
2258         }
2259         if (changed || !PyString_CheckExact(input_obj))
2260             return result;
2261         Py_DECREF(result);
2262         Py_INCREF(input_obj);
2263         return input_obj;
2264     }
2265 
2266     if (table == NULL) {
2267         for (i = 0; i < 256; i++)
2268             trans_table[i] = Py_CHARMASK(i);
2269     } else {
2270         for (i = 0; i < 256; i++)
2271             trans_table[i] = Py_CHARMASK(table[i]);
2272     }
2273 
2274     for (i = 0; i < dellen; i++)
2275         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2276 
2277     for (i = inlen; --i >= 0; ) {
2278         c = Py_CHARMASK(*input++);
2279         if (trans_table[c] != -1)
2280             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2281                 continue;
2282         changed = 1;
2283     }
2284     if (!changed && PyString_CheckExact(input_obj)) {
2285         Py_DECREF(result);
2286         Py_INCREF(input_obj);
2287         return input_obj;
2288     }
2289     /* Fix the size of the resulting string */
2290     if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2291         return NULL;
2292     return result;
2293 }
2294 
2295 
2296 /* find and count characters and substrings */
2297 
2298 #define findchar(target, target_len, c)                         \
2299   ((char *)memchr((const void *)(target), c, target_len))
2300 
2301 /* String ops must return a string.  */
2302 /* If the object is subclass of string, create a copy */
2303 Py_LOCAL(PyStringObject *)
return_self(PyStringObject * self)2304 return_self(PyStringObject *self)
2305 {
2306     if (PyString_CheckExact(self)) {
2307         Py_INCREF(self);
2308         return self;
2309     }
2310     return (PyStringObject *)PyString_FromStringAndSize(
2311         PyString_AS_STRING(self),
2312         PyString_GET_SIZE(self));
2313 }
2314 
2315 Py_LOCAL_INLINE(Py_ssize_t)
countchar(const char * target,int target_len,char c,Py_ssize_t maxcount)2316 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2317 {
2318     Py_ssize_t count=0;
2319     const char *start=target;
2320     const char *end=target+target_len;
2321 
2322     while ( (start=findchar(start, end-start, c)) != NULL ) {
2323         count++;
2324         if (count >= maxcount)
2325             break;
2326         start += 1;
2327     }
2328     return count;
2329 }
2330 
2331 
2332 /* Algorithms for different cases of string replacement */
2333 
2334 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2335 Py_LOCAL(PyStringObject *)
replace_interleave(PyStringObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2336 replace_interleave(PyStringObject *self,
2337                    const char *to_s, Py_ssize_t to_len,
2338                    Py_ssize_t maxcount)
2339 {
2340     char *self_s, *result_s;
2341     Py_ssize_t self_len, result_len;
2342     Py_ssize_t count, i, product;
2343     PyStringObject *result;
2344 
2345     self_len = PyString_GET_SIZE(self);
2346 
2347     /* 1 at the end plus 1 after every character */
2348     count = self_len+1;
2349     if (maxcount < count)
2350         count = maxcount;
2351 
2352     /* Check for overflow */
2353     /*   result_len = count * to_len + self_len; */
2354     product = count * to_len;
2355     if (product / to_len != count) {
2356         PyErr_SetString(PyExc_OverflowError,
2357                         "replace string is too long");
2358         return NULL;
2359     }
2360     result_len = product + self_len;
2361     if (result_len < 0) {
2362         PyErr_SetString(PyExc_OverflowError,
2363                         "replace string is too long");
2364         return NULL;
2365     }
2366 
2367     if (! (result = (PyStringObject *)
2368                      PyString_FromStringAndSize(NULL, result_len)) )
2369         return NULL;
2370 
2371     self_s = PyString_AS_STRING(self);
2372     result_s = PyString_AS_STRING(result);
2373 
2374     /* TODO: special case single character, which doesn't need memcpy */
2375 
2376     /* Lay the first one down (guaranteed this will occur) */
2377     Py_MEMCPY(result_s, to_s, to_len);
2378     result_s += to_len;
2379     count -= 1;
2380 
2381     for (i=0; i<count; i++) {
2382         *result_s++ = *self_s++;
2383         Py_MEMCPY(result_s, to_s, to_len);
2384         result_s += to_len;
2385     }
2386 
2387     /* Copy the rest of the original string */
2388     Py_MEMCPY(result_s, self_s, self_len-i);
2389 
2390     return result;
2391 }
2392 
2393 /* Special case for deleting a single character */
2394 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2395 Py_LOCAL(PyStringObject *)
replace_delete_single_character(PyStringObject * self,char from_c,Py_ssize_t maxcount)2396 replace_delete_single_character(PyStringObject *self,
2397                                 char from_c, Py_ssize_t maxcount)
2398 {
2399     char *self_s, *result_s;
2400     char *start, *next, *end;
2401     Py_ssize_t self_len, result_len;
2402     Py_ssize_t count;
2403     PyStringObject *result;
2404 
2405     self_len = PyString_GET_SIZE(self);
2406     self_s = PyString_AS_STRING(self);
2407 
2408     count = countchar(self_s, self_len, from_c, maxcount);
2409     if (count == 0) {
2410         return return_self(self);
2411     }
2412 
2413     result_len = self_len - count;  /* from_len == 1 */
2414     assert(result_len>=0);
2415 
2416     if ( (result = (PyStringObject *)
2417                     PyString_FromStringAndSize(NULL, result_len)) == NULL)
2418         return NULL;
2419     result_s = PyString_AS_STRING(result);
2420 
2421     start = self_s;
2422     end = self_s + self_len;
2423     while (count-- > 0) {
2424         next = findchar(start, end-start, from_c);
2425         if (next == NULL)
2426             break;
2427         Py_MEMCPY(result_s, start, next-start);
2428         result_s += (next-start);
2429         start = next+1;
2430     }
2431     Py_MEMCPY(result_s, start, end-start);
2432 
2433     return result;
2434 }
2435 
2436 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2437 
2438 Py_LOCAL(PyStringObject *)
replace_delete_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)2439 replace_delete_substring(PyStringObject *self,
2440                          const char *from_s, Py_ssize_t from_len,
2441                          Py_ssize_t maxcount) {
2442     char *self_s, *result_s;
2443     char *start, *next, *end;
2444     Py_ssize_t self_len, result_len;
2445     Py_ssize_t count, offset;
2446     PyStringObject *result;
2447 
2448     self_len = PyString_GET_SIZE(self);
2449     self_s = PyString_AS_STRING(self);
2450 
2451     count = stringlib_count(self_s, self_len,
2452                             from_s, from_len,
2453                             maxcount);
2454 
2455     if (count == 0) {
2456         /* no matches */
2457         return return_self(self);
2458     }
2459 
2460     result_len = self_len - (count * from_len);
2461     assert (result_len>=0);
2462 
2463     if ( (result = (PyStringObject *)
2464           PyString_FromStringAndSize(NULL, result_len)) == NULL )
2465         return NULL;
2466 
2467     result_s = PyString_AS_STRING(result);
2468 
2469     start = self_s;
2470     end = self_s + self_len;
2471     while (count-- > 0) {
2472         offset = stringlib_find(start, end-start,
2473                                 from_s, from_len,
2474                                 0);
2475         if (offset == -1)
2476             break;
2477         next = start + offset;
2478 
2479         Py_MEMCPY(result_s, start, next-start);
2480 
2481         result_s += (next-start);
2482         start = next+from_len;
2483     }
2484     Py_MEMCPY(result_s, start, end-start);
2485     return result;
2486 }
2487 
2488 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2489 Py_LOCAL(PyStringObject *)
replace_single_character_in_place(PyStringObject * self,char from_c,char to_c,Py_ssize_t maxcount)2490 replace_single_character_in_place(PyStringObject *self,
2491                                   char from_c, char to_c,
2492                                   Py_ssize_t maxcount)
2493 {
2494     char *self_s, *result_s, *start, *end, *next;
2495     Py_ssize_t self_len;
2496     PyStringObject *result;
2497 
2498     /* The result string will be the same size */
2499     self_s = PyString_AS_STRING(self);
2500     self_len = PyString_GET_SIZE(self);
2501 
2502     next = findchar(self_s, self_len, from_c);
2503 
2504     if (next == NULL) {
2505         /* No matches; return the original string */
2506         return return_self(self);
2507     }
2508 
2509     /* Need to make a new string */
2510     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2511     if (result == NULL)
2512         return NULL;
2513     result_s = PyString_AS_STRING(result);
2514     Py_MEMCPY(result_s, self_s, self_len);
2515 
2516     /* change everything in-place, starting with this one */
2517     start =  result_s + (next-self_s);
2518     *start = to_c;
2519     start++;
2520     end = result_s + self_len;
2521 
2522     while (--maxcount > 0) {
2523         next = findchar(start, end-start, from_c);
2524         if (next == NULL)
2525             break;
2526         *next = to_c;
2527         start = next+1;
2528     }
2529 
2530     return result;
2531 }
2532 
2533 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2534 Py_LOCAL(PyStringObject *)
replace_substring_in_place(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2535 replace_substring_in_place(PyStringObject *self,
2536                            const char *from_s, Py_ssize_t from_len,
2537                            const char *to_s, Py_ssize_t to_len,
2538                            Py_ssize_t maxcount)
2539 {
2540     char *result_s, *start, *end;
2541     char *self_s;
2542     Py_ssize_t self_len, offset;
2543     PyStringObject *result;
2544 
2545     /* The result string will be the same size */
2546 
2547     self_s = PyString_AS_STRING(self);
2548     self_len = PyString_GET_SIZE(self);
2549 
2550     offset = stringlib_find(self_s, self_len,
2551                             from_s, from_len,
2552                             0);
2553     if (offset == -1) {
2554         /* No matches; return the original string */
2555         return return_self(self);
2556     }
2557 
2558     /* Need to make a new string */
2559     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2560     if (result == NULL)
2561         return NULL;
2562     result_s = PyString_AS_STRING(result);
2563     Py_MEMCPY(result_s, self_s, self_len);
2564 
2565     /* change everything in-place, starting with this one */
2566     start =  result_s + offset;
2567     Py_MEMCPY(start, to_s, from_len);
2568     start += from_len;
2569     end = result_s + self_len;
2570 
2571     while ( --maxcount > 0) {
2572         offset = stringlib_find(start, end-start,
2573                                 from_s, from_len,
2574                                 0);
2575         if (offset==-1)
2576             break;
2577         Py_MEMCPY(start+offset, to_s, from_len);
2578         start += offset+from_len;
2579     }
2580 
2581     return result;
2582 }
2583 
2584 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2585 Py_LOCAL(PyStringObject *)
replace_single_character(PyStringObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2586 replace_single_character(PyStringObject *self,
2587                          char from_c,
2588                          const char *to_s, Py_ssize_t to_len,
2589                          Py_ssize_t maxcount)
2590 {
2591     char *self_s, *result_s;
2592     char *start, *next, *end;
2593     Py_ssize_t self_len, result_len;
2594     Py_ssize_t count, product;
2595     PyStringObject *result;
2596 
2597     self_s = PyString_AS_STRING(self);
2598     self_len = PyString_GET_SIZE(self);
2599 
2600     count = countchar(self_s, self_len, from_c, maxcount);
2601     if (count == 0) {
2602         /* no matches, return unchanged */
2603         return return_self(self);
2604     }
2605 
2606     /* use the difference between current and new, hence the "-1" */
2607     /*   result_len = self_len + count * (to_len-1)  */
2608     product = count * (to_len-1);
2609     if (product / (to_len-1) != count) {
2610         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2611         return NULL;
2612     }
2613     result_len = self_len + product;
2614     if (result_len < 0) {
2615         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2616         return NULL;
2617     }
2618 
2619     if ( (result = (PyStringObject *)
2620           PyString_FromStringAndSize(NULL, result_len)) == NULL)
2621         return NULL;
2622     result_s = PyString_AS_STRING(result);
2623 
2624     start = self_s;
2625     end = self_s + self_len;
2626     while (count-- > 0) {
2627         next = findchar(start, end-start, from_c);
2628         if (next == NULL)
2629             break;
2630 
2631         if (next == start) {
2632             /* replace with the 'to' */
2633             Py_MEMCPY(result_s, to_s, to_len);
2634             result_s += to_len;
2635             start += 1;
2636         } else {
2637             /* copy the unchanged old then the 'to' */
2638             Py_MEMCPY(result_s, start, next-start);
2639             result_s += (next-start);
2640             Py_MEMCPY(result_s, to_s, to_len);
2641             result_s += to_len;
2642             start = next+1;
2643         }
2644     }
2645     /* Copy the remainder of the remaining string */
2646     Py_MEMCPY(result_s, start, end-start);
2647 
2648     return result;
2649 }
2650 
2651 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2652 Py_LOCAL(PyStringObject *)
replace_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2653 replace_substring(PyStringObject *self,
2654                   const char *from_s, Py_ssize_t from_len,
2655                   const char *to_s, Py_ssize_t to_len,
2656                   Py_ssize_t maxcount) {
2657     char *self_s, *result_s;
2658     char *start, *next, *end;
2659     Py_ssize_t self_len, result_len;
2660     Py_ssize_t count, offset, product;
2661     PyStringObject *result;
2662 
2663     self_s = PyString_AS_STRING(self);
2664     self_len = PyString_GET_SIZE(self);
2665 
2666     count = stringlib_count(self_s, self_len,
2667                             from_s, from_len,
2668                             maxcount);
2669 
2670     if (count == 0) {
2671         /* no matches, return unchanged */
2672         return return_self(self);
2673     }
2674 
2675     /* Check for overflow */
2676     /*    result_len = self_len + count * (to_len-from_len) */
2677     product = count * (to_len-from_len);
2678     if (product / (to_len-from_len) != count) {
2679         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2680         return NULL;
2681     }
2682     result_len = self_len + product;
2683     if (result_len < 0) {
2684         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2685         return NULL;
2686     }
2687 
2688     if ( (result = (PyStringObject *)
2689           PyString_FromStringAndSize(NULL, result_len)) == NULL)
2690         return NULL;
2691     result_s = PyString_AS_STRING(result);
2692 
2693     start = self_s;
2694     end = self_s + self_len;
2695     while (count-- > 0) {
2696         offset = stringlib_find(start, end-start,
2697                                 from_s, from_len,
2698                                 0);
2699         if (offset == -1)
2700             break;
2701         next = start+offset;
2702         if (next == start) {
2703             /* replace with the 'to' */
2704             Py_MEMCPY(result_s, to_s, to_len);
2705             result_s += to_len;
2706             start += from_len;
2707         } else {
2708             /* copy the unchanged old then the 'to' */
2709             Py_MEMCPY(result_s, start, next-start);
2710             result_s += (next-start);
2711             Py_MEMCPY(result_s, to_s, to_len);
2712             result_s += to_len;
2713             start = next+from_len;
2714         }
2715     }
2716     /* Copy the remainder of the remaining string */
2717     Py_MEMCPY(result_s, start, end-start);
2718 
2719     return result;
2720 }
2721 
2722 
2723 Py_LOCAL(PyStringObject *)
replace(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2724 replace(PyStringObject *self,
2725     const char *from_s, Py_ssize_t from_len,
2726     const char *to_s, Py_ssize_t to_len,
2727     Py_ssize_t maxcount)
2728 {
2729     if (maxcount < 0) {
2730         maxcount = PY_SSIZE_T_MAX;
2731     } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2732         /* nothing to do; return the original string */
2733         return return_self(self);
2734     }
2735 
2736     if (maxcount == 0 ||
2737         (from_len == 0 && to_len == 0)) {
2738         /* nothing to do; return the original string */
2739         return return_self(self);
2740     }
2741 
2742     /* Handle zero-length special cases */
2743 
2744     if (from_len == 0) {
2745         /* insert the 'to' string everywhere.   */
2746         /*    >>> "Python".replace("", ".")     */
2747         /*    '.P.y.t.h.o.n.'                   */
2748         return replace_interleave(self, to_s, to_len, maxcount);
2749     }
2750 
2751     /* Except for "".replace("", "A") == "A" there is no way beyond this */
2752     /* point for an empty self string to generate a non-empty string */
2753     /* Special case so the remaining code always gets a non-empty string */
2754     if (PyString_GET_SIZE(self) == 0) {
2755         return return_self(self);
2756     }
2757 
2758     if (to_len == 0) {
2759         /* delete all occurances of 'from' string */
2760         if (from_len == 1) {
2761             return replace_delete_single_character(
2762                 self, from_s[0], maxcount);
2763         } else {
2764             return replace_delete_substring(self, from_s, from_len, maxcount);
2765         }
2766     }
2767 
2768     /* Handle special case where both strings have the same length */
2769 
2770     if (from_len == to_len) {
2771         if (from_len == 1) {
2772             return replace_single_character_in_place(
2773                 self,
2774                 from_s[0],
2775                 to_s[0],
2776                 maxcount);
2777         } else {
2778             return replace_substring_in_place(
2779                 self, from_s, from_len, to_s, to_len, maxcount);
2780         }
2781     }
2782 
2783     /* Otherwise use the more generic algorithms */
2784     if (from_len == 1) {
2785         return replace_single_character(self, from_s[0],
2786                                         to_s, to_len, maxcount);
2787     } else {
2788         /* len('from')>=2, len('to')>=1 */
2789         return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2790     }
2791 }
2792 
2793 PyDoc_STRVAR(replace__doc__,
2794 "S.replace(old, new[, count]) -> string\n\
2795 \n\
2796 Return a copy of string S with all occurrences of substring\n\
2797 old replaced by new.  If the optional argument count is\n\
2798 given, only the first count occurrences are replaced.");
2799 
2800 static PyObject *
string_replace(PyStringObject * self,PyObject * args)2801 string_replace(PyStringObject *self, PyObject *args)
2802 {
2803     Py_ssize_t count = -1;
2804     PyObject *from, *to;
2805     const char *from_s, *to_s;
2806     Py_ssize_t from_len, to_len;
2807 
2808     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2809         return NULL;
2810 
2811     if (PyString_Check(from)) {
2812         from_s = PyString_AS_STRING(from);
2813         from_len = PyString_GET_SIZE(from);
2814     }
2815 #ifdef Py_USING_UNICODE
2816     if (PyUnicode_Check(from))
2817         return PyUnicode_Replace((PyObject *)self,
2818                                  from, to, count);
2819 #endif
2820     else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2821         return NULL;
2822 
2823     if (PyString_Check(to)) {
2824         to_s = PyString_AS_STRING(to);
2825         to_len = PyString_GET_SIZE(to);
2826     }
2827 #ifdef Py_USING_UNICODE
2828     else if (PyUnicode_Check(to))
2829         return PyUnicode_Replace((PyObject *)self,
2830                                  from, to, count);
2831 #endif
2832     else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2833         return NULL;
2834 
2835     return (PyObject *)replace((PyStringObject *) self,
2836                                from_s, from_len,
2837                                to_s, to_len, count);
2838 }
2839 
2840 /** End DALKE **/
2841 
2842 /* Matches the end (direction >= 0) or start (direction < 0) of self
2843  * against substr, using the start and end arguments. Returns
2844  * -1 on error, 0 if not found and 1 if found.
2845  */
2846 Py_LOCAL(int)
_string_tailmatch(PyStringObject * self,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)2847 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2848                   Py_ssize_t end, int direction)
2849 {
2850     Py_ssize_t len = PyString_GET_SIZE(self);
2851     Py_ssize_t slen;
2852     const char* sub;
2853     const char* str;
2854 
2855     if (PyString_Check(substr)) {
2856         sub = PyString_AS_STRING(substr);
2857         slen = PyString_GET_SIZE(substr);
2858     }
2859 #ifdef Py_USING_UNICODE
2860     else if (PyUnicode_Check(substr))
2861         return PyUnicode_Tailmatch((PyObject *)self,
2862                                    substr, start, end, direction);
2863 #endif
2864     else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2865         return -1;
2866     str = PyString_AS_STRING(self);
2867 
2868     ADJUST_INDICES(start, end, len);
2869 
2870     if (direction < 0) {
2871         /* startswith */
2872         if (start+slen > len)
2873             return 0;
2874     } else {
2875         /* endswith */
2876         if (end-start < slen || start > len)
2877             return 0;
2878 
2879         if (end-slen > start)
2880             start = end - slen;
2881     }
2882     if (end-start >= slen)
2883         return ! memcmp(str+start, sub, slen);
2884     return 0;
2885 }
2886 
2887 
2888 PyDoc_STRVAR(startswith__doc__,
2889 "S.startswith(prefix[, start[, end]]) -> bool\n\
2890 \n\
2891 Return True if S starts with the specified prefix, False otherwise.\n\
2892 With optional start, test S beginning at that position.\n\
2893 With optional end, stop comparing S at that position.\n\
2894 prefix can also be a tuple of strings to try.");
2895 
2896 static PyObject *
string_startswith(PyStringObject * self,PyObject * args)2897 string_startswith(PyStringObject *self, PyObject *args)
2898 {
2899     Py_ssize_t start = 0;
2900     Py_ssize_t end = PY_SSIZE_T_MAX;
2901     PyObject *subobj;
2902     int result;
2903 
2904     if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2905         return NULL;
2906     if (PyTuple_Check(subobj)) {
2907         Py_ssize_t i;
2908         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2909             result = _string_tailmatch(self,
2910                             PyTuple_GET_ITEM(subobj, i),
2911                             start, end, -1);
2912             if (result == -1)
2913                 return NULL;
2914             else if (result) {
2915                 Py_RETURN_TRUE;
2916             }
2917         }
2918         Py_RETURN_FALSE;
2919     }
2920     result = _string_tailmatch(self, subobj, start, end, -1);
2921     if (result == -1) {
2922         if (PyErr_ExceptionMatches(PyExc_TypeError))
2923             PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2924                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2925         return NULL;
2926     }
2927     else
2928         return PyBool_FromLong(result);
2929 }
2930 
2931 
2932 PyDoc_STRVAR(endswith__doc__,
2933 "S.endswith(suffix[, start[, end]]) -> bool\n\
2934 \n\
2935 Return True if S ends with the specified suffix, False otherwise.\n\
2936 With optional start, test S beginning at that position.\n\
2937 With optional end, stop comparing S at that position.\n\
2938 suffix can also be a tuple of strings to try.");
2939 
2940 static PyObject *
string_endswith(PyStringObject * self,PyObject * args)2941 string_endswith(PyStringObject *self, PyObject *args)
2942 {
2943     Py_ssize_t start = 0;
2944     Py_ssize_t end = PY_SSIZE_T_MAX;
2945     PyObject *subobj;
2946     int result;
2947 
2948     if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2949         return NULL;
2950     if (PyTuple_Check(subobj)) {
2951         Py_ssize_t i;
2952         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2953             result = _string_tailmatch(self,
2954                             PyTuple_GET_ITEM(subobj, i),
2955                             start, end, +1);
2956             if (result == -1)
2957                 return NULL;
2958             else if (result) {
2959                 Py_RETURN_TRUE;
2960             }
2961         }
2962         Py_RETURN_FALSE;
2963     }
2964     result = _string_tailmatch(self, subobj, start, end, +1);
2965     if (result == -1) {
2966         if (PyErr_ExceptionMatches(PyExc_TypeError))
2967             PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2968                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2969         return NULL;
2970     }
2971     else
2972         return PyBool_FromLong(result);
2973 }
2974 
2975 
2976 PyDoc_STRVAR(encode__doc__,
2977 "S.encode([encoding[,errors]]) -> object\n\
2978 \n\
2979 Encodes S using the codec registered for encoding. encoding defaults\n\
2980 to the default encoding. errors may be given to set a different error\n\
2981 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2982 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2983 'xmlcharrefreplace' as well as any other name registered with\n\
2984 codecs.register_error that is able to handle UnicodeEncodeErrors.");
2985 
2986 static PyObject *
string_encode(PyStringObject * self,PyObject * args,PyObject * kwargs)2987 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
2988 {
2989     static char *kwlist[] = {"encoding", "errors", 0};
2990     char *encoding = NULL;
2991     char *errors = NULL;
2992     PyObject *v;
2993 
2994     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
2995                                      kwlist, &encoding, &errors))
2996         return NULL;
2997     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2998     if (v == NULL)
2999         goto onError;
3000     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3001         PyErr_Format(PyExc_TypeError,
3002                      "encoder did not return a string/unicode object "
3003                      "(type=%.400s)",
3004                      Py_TYPE(v)->tp_name);
3005         Py_DECREF(v);
3006         return NULL;
3007     }
3008     return v;
3009 
3010  onError:
3011     return NULL;
3012 }
3013 
3014 
3015 PyDoc_STRVAR(decode__doc__,
3016 "S.decode([encoding[,errors]]) -> object\n\
3017 \n\
3018 Decodes S using the codec registered for encoding. encoding defaults\n\
3019 to the default encoding. errors may be given to set a different error\n\
3020 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3021 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3022 as well as any other name registered with codecs.register_error that is\n\
3023 able to handle UnicodeDecodeErrors.");
3024 
3025 static PyObject *
string_decode(PyStringObject * self,PyObject * args,PyObject * kwargs)3026 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3027 {
3028     static char *kwlist[] = {"encoding", "errors", 0};
3029     char *encoding = NULL;
3030     char *errors = NULL;
3031     PyObject *v;
3032 
3033     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3034                                      kwlist, &encoding, &errors))
3035         return NULL;
3036     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3037     if (v == NULL)
3038         goto onError;
3039     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3040         PyErr_Format(PyExc_TypeError,
3041                      "decoder did not return a string/unicode object "
3042                      "(type=%.400s)",
3043                      Py_TYPE(v)->tp_name);
3044         Py_DECREF(v);
3045         return NULL;
3046     }
3047     return v;
3048 
3049  onError:
3050     return NULL;
3051 }
3052 
3053 
3054 PyDoc_STRVAR(expandtabs__doc__,
3055 "S.expandtabs([tabsize]) -> string\n\
3056 \n\
3057 Return a copy of S where all tab characters are expanded using spaces.\n\
3058 If tabsize is not given, a tab size of 8 characters is assumed.");
3059 
3060 static PyObject*
string_expandtabs(PyStringObject * self,PyObject * args)3061 string_expandtabs(PyStringObject *self, PyObject *args)
3062 {
3063     const char *e, *p, *qe;
3064     char *q;
3065     Py_ssize_t i, j, incr;
3066     PyObject *u;
3067     int tabsize = 8;
3068 
3069     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3070         return NULL;
3071 
3072     /* First pass: determine size of output string */
3073     i = 0; /* chars up to and including most recent \n or \r */
3074     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3075     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3076     for (p = PyString_AS_STRING(self); p < e; p++)
3077     if (*p == '\t') {
3078         if (tabsize > 0) {
3079             incr = tabsize - (j % tabsize);
3080             if (j > PY_SSIZE_T_MAX - incr)
3081                 goto overflow1;
3082             j += incr;
3083         }
3084     }
3085     else {
3086         if (j > PY_SSIZE_T_MAX - 1)
3087             goto overflow1;
3088         j++;
3089         if (*p == '\n' || *p == '\r') {
3090             if (i > PY_SSIZE_T_MAX - j)
3091                 goto overflow1;
3092             i += j;
3093             j = 0;
3094         }
3095     }
3096 
3097     if (i > PY_SSIZE_T_MAX - j)
3098         goto overflow1;
3099 
3100     /* Second pass: create output string and fill it */
3101     u = PyString_FromStringAndSize(NULL, i + j);
3102     if (!u)
3103         return NULL;
3104 
3105     j = 0; /* same as in first pass */
3106     q = PyString_AS_STRING(u); /* next output char */
3107     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3108 
3109     for (p = PyString_AS_STRING(self); p < e; p++)
3110     if (*p == '\t') {
3111         if (tabsize > 0) {
3112             i = tabsize - (j % tabsize);
3113             j += i;
3114             while (i--) {
3115                 if (q >= qe)
3116                     goto overflow2;
3117                 *q++ = ' ';
3118             }
3119         }
3120     }
3121     else {
3122         if (q >= qe)
3123             goto overflow2;
3124         *q++ = *p;
3125         j++;
3126         if (*p == '\n' || *p == '\r')
3127             j = 0;
3128     }
3129 
3130     return u;
3131 
3132   overflow2:
3133     Py_DECREF(u);
3134   overflow1:
3135     PyErr_SetString(PyExc_OverflowError, "new string is too long");
3136     return NULL;
3137 }
3138 
3139 Py_LOCAL_INLINE(PyObject *)
pad(PyStringObject * self,Py_ssize_t left,Py_ssize_t right,char fill)3140 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3141 {
3142     PyObject *u;
3143 
3144     if (left < 0)
3145         left = 0;
3146     if (right < 0)
3147         right = 0;
3148 
3149     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3150         Py_INCREF(self);
3151         return (PyObject *)self;
3152     }
3153 
3154     u = PyString_FromStringAndSize(NULL,
3155                                    left + PyString_GET_SIZE(self) + right);
3156     if (u) {
3157         if (left)
3158             memset(PyString_AS_STRING(u), fill, left);
3159         Py_MEMCPY(PyString_AS_STRING(u) + left,
3160                PyString_AS_STRING(self),
3161                PyString_GET_SIZE(self));
3162         if (right)
3163             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3164                fill, right);
3165     }
3166 
3167     return u;
3168 }
3169 
3170 PyDoc_STRVAR(ljust__doc__,
3171 "S.ljust(width[, fillchar]) -> string\n"
3172 "\n"
3173 "Return S left-justified in a string of length width. Padding is\n"
3174 "done using the specified fill character (default is a space).");
3175 
3176 static PyObject *
string_ljust(PyStringObject * self,PyObject * args)3177 string_ljust(PyStringObject *self, PyObject *args)
3178 {
3179     Py_ssize_t width;
3180     char fillchar = ' ';
3181 
3182     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3183         return NULL;
3184 
3185     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3186         Py_INCREF(self);
3187         return (PyObject*) self;
3188     }
3189 
3190     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3191 }
3192 
3193 
3194 PyDoc_STRVAR(rjust__doc__,
3195 "S.rjust(width[, fillchar]) -> string\n"
3196 "\n"
3197 "Return S right-justified in a string of length width. Padding is\n"
3198 "done using the specified fill character (default is a space)");
3199 
3200 static PyObject *
string_rjust(PyStringObject * self,PyObject * args)3201 string_rjust(PyStringObject *self, PyObject *args)
3202 {
3203     Py_ssize_t width;
3204     char fillchar = ' ';
3205 
3206     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3207         return NULL;
3208 
3209     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3210         Py_INCREF(self);
3211         return (PyObject*) self;
3212     }
3213 
3214     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3215 }
3216 
3217 
3218 PyDoc_STRVAR(center__doc__,
3219 "S.center(width[, fillchar]) -> string\n"
3220 "\n"
3221 "Return S centered in a string of length width. Padding is\n"
3222 "done using the specified fill character (default is a space)");
3223 
3224 static PyObject *
string_center(PyStringObject * self,PyObject * args)3225 string_center(PyStringObject *self, PyObject *args)
3226 {
3227     Py_ssize_t marg, left;
3228     Py_ssize_t width;
3229     char fillchar = ' ';
3230 
3231     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3232         return NULL;
3233 
3234     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3235         Py_INCREF(self);
3236         return (PyObject*) self;
3237     }
3238 
3239     marg = width - PyString_GET_SIZE(self);
3240     left = marg / 2 + (marg & width & 1);
3241 
3242     return pad(self, left, marg - left, fillchar);
3243 }
3244 
3245 PyDoc_STRVAR(zfill__doc__,
3246 "S.zfill(width) -> string\n"
3247 "\n"
3248 "Pad a numeric string S with zeros on the left, to fill a field\n"
3249 "of the specified width.  The string S is never truncated.");
3250 
3251 static PyObject *
string_zfill(PyStringObject * self,PyObject * args)3252 string_zfill(PyStringObject *self, PyObject *args)
3253 {
3254     Py_ssize_t fill;
3255     PyObject *s;
3256     char *p;
3257     Py_ssize_t width;
3258 
3259     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3260         return NULL;
3261 
3262     if (PyString_GET_SIZE(self) >= width) {
3263         if (PyString_CheckExact(self)) {
3264             Py_INCREF(self);
3265             return (PyObject*) self;
3266         }
3267         else
3268             return PyString_FromStringAndSize(
3269             PyString_AS_STRING(self),
3270             PyString_GET_SIZE(self)
3271             );
3272     }
3273 
3274     fill = width - PyString_GET_SIZE(self);
3275 
3276     s = pad(self, fill, 0, '0');
3277 
3278     if (s == NULL)
3279         return NULL;
3280 
3281     p = PyString_AS_STRING(s);
3282     if (p[fill] == '+' || p[fill] == '-') {
3283         /* move sign to beginning of string */
3284         p[0] = p[fill];
3285         p[fill] = '0';
3286     }
3287 
3288     return (PyObject*) s;
3289 }
3290 
3291 PyDoc_STRVAR(isspace__doc__,
3292 "S.isspace() -> bool\n\
3293 \n\
3294 Return True if all characters in S are whitespace\n\
3295 and there is at least one character in S, False otherwise.");
3296 
3297 static PyObject*
string_isspace(PyStringObject * self)3298 string_isspace(PyStringObject *self)
3299 {
3300     register const unsigned char *p
3301         = (unsigned char *) PyString_AS_STRING(self);
3302     register const unsigned char *e;
3303 
3304     /* Shortcut for single character strings */
3305     if (PyString_GET_SIZE(self) == 1 &&
3306         isspace(*p))
3307         return PyBool_FromLong(1);
3308 
3309     /* Special case for empty strings */
3310     if (PyString_GET_SIZE(self) == 0)
3311         return PyBool_FromLong(0);
3312 
3313     e = p + PyString_GET_SIZE(self);
3314     for (; p < e; p++) {
3315         if (!isspace(*p))
3316             return PyBool_FromLong(0);
3317     }
3318     return PyBool_FromLong(1);
3319 }
3320 
3321 
3322 PyDoc_STRVAR(isalpha__doc__,
3323 "S.isalpha() -> bool\n\
3324 \n\
3325 Return True if all characters in S are alphabetic\n\
3326 and there is at least one character in S, False otherwise.");
3327 
3328 static PyObject*
string_isalpha(PyStringObject * self)3329 string_isalpha(PyStringObject *self)
3330 {
3331     register const unsigned char *p
3332         = (unsigned char *) PyString_AS_STRING(self);
3333     register const unsigned char *e;
3334 
3335     /* Shortcut for single character strings */
3336     if (PyString_GET_SIZE(self) == 1 &&
3337         isalpha(*p))
3338         return PyBool_FromLong(1);
3339 
3340     /* Special case for empty strings */
3341     if (PyString_GET_SIZE(self) == 0)
3342         return PyBool_FromLong(0);
3343 
3344     e = p + PyString_GET_SIZE(self);
3345     for (; p < e; p++) {
3346         if (!isalpha(*p))
3347             return PyBool_FromLong(0);
3348     }
3349     return PyBool_FromLong(1);
3350 }
3351 
3352 
3353 PyDoc_STRVAR(isalnum__doc__,
3354 "S.isalnum() -> bool\n\
3355 \n\
3356 Return True if all characters in S are alphanumeric\n\
3357 and there is at least one character in S, False otherwise.");
3358 
3359 static PyObject*
string_isalnum(PyStringObject * self)3360 string_isalnum(PyStringObject *self)
3361 {
3362     register const unsigned char *p
3363         = (unsigned char *) PyString_AS_STRING(self);
3364     register const unsigned char *e;
3365 
3366     /* Shortcut for single character strings */
3367     if (PyString_GET_SIZE(self) == 1 &&
3368         isalnum(*p))
3369         return PyBool_FromLong(1);
3370 
3371     /* Special case for empty strings */
3372     if (PyString_GET_SIZE(self) == 0)
3373         return PyBool_FromLong(0);
3374 
3375     e = p + PyString_GET_SIZE(self);
3376     for (; p < e; p++) {
3377         if (!isalnum(*p))
3378             return PyBool_FromLong(0);
3379     }
3380     return PyBool_FromLong(1);
3381 }
3382 
3383 
3384 PyDoc_STRVAR(isdigit__doc__,
3385 "S.isdigit() -> bool\n\
3386 \n\
3387 Return True if all characters in S are digits\n\
3388 and there is at least one character in S, False otherwise.");
3389 
3390 static PyObject*
string_isdigit(PyStringObject * self)3391 string_isdigit(PyStringObject *self)
3392 {
3393     register const unsigned char *p
3394         = (unsigned char *) PyString_AS_STRING(self);
3395     register const unsigned char *e;
3396 
3397     /* Shortcut for single character strings */
3398     if (PyString_GET_SIZE(self) == 1 &&
3399         isdigit(*p))
3400         return PyBool_FromLong(1);
3401 
3402     /* Special case for empty strings */
3403     if (PyString_GET_SIZE(self) == 0)
3404         return PyBool_FromLong(0);
3405 
3406     e = p + PyString_GET_SIZE(self);
3407     for (; p < e; p++) {
3408         if (!isdigit(*p))
3409             return PyBool_FromLong(0);
3410     }
3411     return PyBool_FromLong(1);
3412 }
3413 
3414 
3415 PyDoc_STRVAR(islower__doc__,
3416 "S.islower() -> bool\n\
3417 \n\
3418 Return True if all cased characters in S are lowercase and there is\n\
3419 at least one cased character in S, False otherwise.");
3420 
3421 static PyObject*
string_islower(PyStringObject * self)3422 string_islower(PyStringObject *self)
3423 {
3424     register const unsigned char *p
3425         = (unsigned char *) PyString_AS_STRING(self);
3426     register const unsigned char *e;
3427     int cased;
3428 
3429     /* Shortcut for single character strings */
3430     if (PyString_GET_SIZE(self) == 1)
3431         return PyBool_FromLong(islower(*p) != 0);
3432 
3433     /* Special case for empty strings */
3434     if (PyString_GET_SIZE(self) == 0)
3435         return PyBool_FromLong(0);
3436 
3437     e = p + PyString_GET_SIZE(self);
3438     cased = 0;
3439     for (; p < e; p++) {
3440         if (isupper(*p))
3441             return PyBool_FromLong(0);
3442         else if (!cased && islower(*p))
3443             cased = 1;
3444     }
3445     return PyBool_FromLong(cased);
3446 }
3447 
3448 
3449 PyDoc_STRVAR(isupper__doc__,
3450 "S.isupper() -> bool\n\
3451 \n\
3452 Return True if all cased characters in S are uppercase and there is\n\
3453 at least one cased character in S, False otherwise.");
3454 
3455 static PyObject*
string_isupper(PyStringObject * self)3456 string_isupper(PyStringObject *self)
3457 {
3458     register const unsigned char *p
3459         = (unsigned char *) PyString_AS_STRING(self);
3460     register const unsigned char *e;
3461     int cased;
3462 
3463     /* Shortcut for single character strings */
3464     if (PyString_GET_SIZE(self) == 1)
3465         return PyBool_FromLong(isupper(*p) != 0);
3466 
3467     /* Special case for empty strings */
3468     if (PyString_GET_SIZE(self) == 0)
3469         return PyBool_FromLong(0);
3470 
3471     e = p + PyString_GET_SIZE(self);
3472     cased = 0;
3473     for (; p < e; p++) {
3474         if (islower(*p))
3475             return PyBool_FromLong(0);
3476         else if (!cased && isupper(*p))
3477             cased = 1;
3478     }
3479     return PyBool_FromLong(cased);
3480 }
3481 
3482 
3483 PyDoc_STRVAR(istitle__doc__,
3484 "S.istitle() -> bool\n\
3485 \n\
3486 Return True if S is a titlecased string and there is at least one\n\
3487 character in S, i.e. uppercase characters may only follow uncased\n\
3488 characters and lowercase characters only cased ones. Return False\n\
3489 otherwise.");
3490 
3491 static PyObject*
string_istitle(PyStringObject * self,PyObject * uncased)3492 string_istitle(PyStringObject *self, PyObject *uncased)
3493 {
3494     register const unsigned char *p
3495         = (unsigned char *) PyString_AS_STRING(self);
3496     register const unsigned char *e;
3497     int cased, previous_is_cased;
3498 
3499     /* Shortcut for single character strings */
3500     if (PyString_GET_SIZE(self) == 1)
3501         return PyBool_FromLong(isupper(*p) != 0);
3502 
3503     /* Special case for empty strings */
3504     if (PyString_GET_SIZE(self) == 0)
3505         return PyBool_FromLong(0);
3506 
3507     e = p + PyString_GET_SIZE(self);
3508     cased = 0;
3509     previous_is_cased = 0;
3510     for (; p < e; p++) {
3511         register const unsigned char ch = *p;
3512 
3513         if (isupper(ch)) {
3514             if (previous_is_cased)
3515                 return PyBool_FromLong(0);
3516             previous_is_cased = 1;
3517             cased = 1;
3518         }
3519         else if (islower(ch)) {
3520             if (!previous_is_cased)
3521                 return PyBool_FromLong(0);
3522             previous_is_cased = 1;
3523             cased = 1;
3524         }
3525         else
3526             previous_is_cased = 0;
3527     }
3528     return PyBool_FromLong(cased);
3529 }
3530 
3531 
3532 PyDoc_STRVAR(splitlines__doc__,
3533 "S.splitlines([keepends]) -> list of strings\n\
3534 \n\
3535 Return a list of the lines in S, breaking at line boundaries.\n\
3536 Line breaks are not included in the resulting list unless keepends\n\
3537 is given and true.");
3538 
3539 static PyObject*
string_splitlines(PyStringObject * self,PyObject * args)3540 string_splitlines(PyStringObject *self, PyObject *args)
3541 {
3542     int keepends = 0;
3543 
3544     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3545         return NULL;
3546 
3547     return stringlib_splitlines(
3548         (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3549         keepends
3550     );
3551 }
3552 
3553 PyDoc_STRVAR(sizeof__doc__,
3554 "S.__sizeof__() -> size of S in memory, in bytes");
3555 
3556 static PyObject *
string_sizeof(PyStringObject * v)3557 string_sizeof(PyStringObject *v)
3558 {
3559     Py_ssize_t res;
3560     res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3561     return PyInt_FromSsize_t(res);
3562 }
3563 
3564 static PyObject *
string_getnewargs(PyStringObject * v)3565 string_getnewargs(PyStringObject *v)
3566 {
3567     return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3568 }
3569 
3570 
3571 #include "stringlib/string_format.h"
3572 
3573 PyDoc_STRVAR(format__doc__,
3574 "S.format(*args, **kwargs) -> string\n\
3575 \n\
3576 Return a formatted version of S, using substitutions from args and kwargs.\n\
3577 The substitutions are identified by braces ('{' and '}').");
3578 
3579 static PyObject *
string__format__(PyObject * self,PyObject * args)3580 string__format__(PyObject* self, PyObject* args)
3581 {
3582     PyObject *format_spec;
3583     PyObject *result = NULL;
3584     PyObject *tmp = NULL;
3585 
3586     /* If 2.x, convert format_spec to the same type as value */
3587     /* This is to allow things like u''.format('') */
3588     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3589         goto done;
3590     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3591         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3592                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3593         goto done;
3594     }
3595     tmp = PyObject_Str(format_spec);
3596     if (tmp == NULL)
3597         goto done;
3598     format_spec = tmp;
3599 
3600     result = _PyBytes_FormatAdvanced(self,
3601                                      PyString_AS_STRING(format_spec),
3602                                      PyString_GET_SIZE(format_spec));
3603 done:
3604     Py_XDECREF(tmp);
3605     return result;
3606 }
3607 
3608 PyDoc_STRVAR(p_format__doc__,
3609 "S.__format__(format_spec) -> string\n\
3610 \n\
3611 Return a formatted version of S as described by format_spec.");
3612 
3613 
3614 static PyMethodDef
3615 string_methods[] = {
3616     /* Counterparts of the obsolete stropmodule functions; except
3617        string.maketrans(). */
3618     {"join", (PyCFunction)string_join, METH_O, join__doc__},
3619     {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3620     {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3621     {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3622     {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3623     {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3624     {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3625     {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3626     {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3627     {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3628     {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3629     {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3630     {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3631      capitalize__doc__},
3632     {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3633     {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3634      endswith__doc__},
3635     {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3636     {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3637     {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3638     {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3639     {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3640     {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3641     {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3642     {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3643     {"rpartition", (PyCFunction)string_rpartition, METH_O,
3644      rpartition__doc__},
3645     {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3646      startswith__doc__},
3647     {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3648     {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3649      swapcase__doc__},
3650     {"translate", (PyCFunction)string_translate, METH_VARARGS,
3651      translate__doc__},
3652     {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3653     {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3654     {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3655     {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3656     {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3657     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3658     {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3659     {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3660     {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3661     {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3662     {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3663     {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3664      expandtabs__doc__},
3665     {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3666      splitlines__doc__},
3667     {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3668      sizeof__doc__},
3669     {"__getnewargs__",          (PyCFunction)string_getnewargs, METH_NOARGS},
3670     {NULL,     NULL}                         /* sentinel */
3671 };
3672 
3673 static PyObject *
3674 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3675 
3676 static PyObject *
string_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3677 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3678 {
3679     PyObject *x = NULL;
3680     static char *kwlist[] = {"object", 0};
3681 
3682     if (type != &PyString_Type)
3683         return str_subtype_new(type, args, kwds);
3684     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3685         return NULL;
3686     if (x == NULL)
3687         return PyString_FromString("");
3688     return PyObject_Str(x);
3689 }
3690 
3691 static PyObject *
str_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3692 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3693 {
3694     PyObject *tmp, *pnew;
3695     Py_ssize_t n;
3696 
3697     assert(PyType_IsSubtype(type, &PyString_Type));
3698     tmp = string_new(&PyString_Type, args, kwds);
3699     if (tmp == NULL)
3700         return NULL;
3701     assert(PyString_CheckExact(tmp));
3702     n = PyString_GET_SIZE(tmp);
3703     pnew = type->tp_alloc(type, n);
3704     if (pnew != NULL) {
3705         Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3706         ((PyStringObject *)pnew)->ob_shash =
3707             ((PyStringObject *)tmp)->ob_shash;
3708         ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3709     }
3710     Py_DECREF(tmp);
3711     return pnew;
3712 }
3713 
3714 static PyObject *
basestring_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3715 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3716 {
3717     PyErr_SetString(PyExc_TypeError,
3718                     "The basestring type cannot be instantiated");
3719     return NULL;
3720 }
3721 
3722 static PyObject *
string_mod(PyObject * v,PyObject * w)3723 string_mod(PyObject *v, PyObject *w)
3724 {
3725     if (!PyString_Check(v)) {
3726         Py_INCREF(Py_NotImplemented);
3727         return Py_NotImplemented;
3728     }
3729     return PyString_Format(v, w);
3730 }
3731 
3732 PyDoc_STRVAR(basestring_doc,
3733 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3734 
3735 static PyNumberMethods string_as_number = {
3736     0,                          /*nb_add*/
3737     0,                          /*nb_subtract*/
3738     0,                          /*nb_multiply*/
3739     0,                          /*nb_divide*/
3740     string_mod,                 /*nb_remainder*/
3741 };
3742 
3743 
3744 PyTypeObject PyBaseString_Type = {
3745     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3746     "basestring",
3747     0,
3748     0,
3749     0,                                          /* tp_dealloc */
3750     0,                                          /* tp_print */
3751     0,                                          /* tp_getattr */
3752     0,                                          /* tp_setattr */
3753     0,                                          /* tp_compare */
3754     0,                                          /* tp_repr */
3755     0,                                          /* tp_as_number */
3756     0,                                          /* tp_as_sequence */
3757     0,                                          /* tp_as_mapping */
3758     0,                                          /* tp_hash */
3759     0,                                          /* tp_call */
3760     0,                                          /* tp_str */
3761     0,                                          /* tp_getattro */
3762     0,                                          /* tp_setattro */
3763     0,                                          /* tp_as_buffer */
3764     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3765     basestring_doc,                             /* tp_doc */
3766     0,                                          /* tp_traverse */
3767     0,                                          /* tp_clear */
3768     0,                                          /* tp_richcompare */
3769     0,                                          /* tp_weaklistoffset */
3770     0,                                          /* tp_iter */
3771     0,                                          /* tp_iternext */
3772     0,                                          /* tp_methods */
3773     0,                                          /* tp_members */
3774     0,                                          /* tp_getset */
3775     &PyBaseObject_Type,                         /* tp_base */
3776     0,                                          /* tp_dict */
3777     0,                                          /* tp_descr_get */
3778     0,                                          /* tp_descr_set */
3779     0,                                          /* tp_dictoffset */
3780     0,                                          /* tp_init */
3781     0,                                          /* tp_alloc */
3782     basestring_new,                             /* tp_new */
3783     0,                                          /* tp_free */
3784 };
3785 
3786 PyDoc_STRVAR(string_doc,
3787 "str(object) -> string\n\
3788 \n\
3789 Return a nice string representation of the object.\n\
3790 If the argument is a string, the return value is the same object.");
3791 
3792 PyTypeObject PyString_Type = {
3793     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3794     "str",
3795     PyStringObject_SIZE,
3796     sizeof(char),
3797     string_dealloc,                             /* tp_dealloc */
3798     (printfunc)string_print,                    /* tp_print */
3799     0,                                          /* tp_getattr */
3800     0,                                          /* tp_setattr */
3801     0,                                          /* tp_compare */
3802     string_repr,                                /* tp_repr */
3803     &string_as_number,                          /* tp_as_number */
3804     &string_as_sequence,                        /* tp_as_sequence */
3805     &string_as_mapping,                         /* tp_as_mapping */
3806     (hashfunc)string_hash,                      /* tp_hash */
3807     0,                                          /* tp_call */
3808     string_str,                                 /* tp_str */
3809     PyObject_GenericGetAttr,                    /* tp_getattro */
3810     0,                                          /* tp_setattro */
3811     &string_as_buffer,                          /* tp_as_buffer */
3812     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3813         Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3814         Py_TPFLAGS_HAVE_NEWBUFFER,              /* tp_flags */
3815     string_doc,                                 /* tp_doc */
3816     0,                                          /* tp_traverse */
3817     0,                                          /* tp_clear */
3818     (richcmpfunc)string_richcompare,            /* tp_richcompare */
3819     0,                                          /* tp_weaklistoffset */
3820     0,                                          /* tp_iter */
3821     0,                                          /* tp_iternext */
3822     string_methods,                             /* tp_methods */
3823     0,                                          /* tp_members */
3824     0,                                          /* tp_getset */
3825     &PyBaseString_Type,                         /* tp_base */
3826     0,                                          /* tp_dict */
3827     0,                                          /* tp_descr_get */
3828     0,                                          /* tp_descr_set */
3829     0,                                          /* tp_dictoffset */
3830     0,                                          /* tp_init */
3831     0,                                          /* tp_alloc */
3832     string_new,                                 /* tp_new */
3833     PyObject_Del,                               /* tp_free */
3834 };
3835 
3836 void
PyString_Concat(register PyObject ** pv,register PyObject * w)3837 PyString_Concat(register PyObject **pv, register PyObject *w)
3838 {
3839     register PyObject *v;
3840     if (*pv == NULL)
3841         return;
3842     if (w == NULL || !PyString_Check(*pv)) {
3843         Py_DECREF(*pv);
3844         *pv = NULL;
3845         return;
3846     }
3847     v = string_concat((PyStringObject *) *pv, w);
3848     Py_DECREF(*pv);
3849     *pv = v;
3850 }
3851 
3852 void
PyString_ConcatAndDel(register PyObject ** pv,register PyObject * w)3853 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3854 {
3855     PyString_Concat(pv, w);
3856     Py_XDECREF(w);
3857 }
3858 
3859 
3860 /* The following function breaks the notion that strings are immutable:
3861    it changes the size of a string.  We get away with this only if there
3862    is only one module referencing the object.  You can also think of it
3863    as creating a new string object and destroying the old one, only
3864    more efficiently.  In any case, don't use this if the string may
3865    already be known to some other part of the code...
3866    Note that if there's not enough memory to resize the string, the original
3867    string object at *pv is deallocated, *pv is set to NULL, an "out of
3868    memory" exception is set, and -1 is returned.  Else (on success) 0 is
3869    returned, and the value in *pv may or may not be the same as on input.
3870    As always, an extra byte is allocated for a trailing \0 byte (newsize
3871    does *not* include that), and a trailing \0 byte is stored.
3872 */
3873 
3874 int
_PyString_Resize(PyObject ** pv,Py_ssize_t newsize)3875 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3876 {
3877     register PyObject *v;
3878     register PyStringObject *sv;
3879     v = *pv;
3880     if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3881         PyString_CHECK_INTERNED(v)) {
3882         *pv = 0;
3883         Py_DECREF(v);
3884         PyErr_BadInternalCall();
3885         return -1;
3886     }
3887     /* XXX UNREF/NEWREF interface should be more symmetrical */
3888     _Py_DEC_REFTOTAL;
3889     _Py_ForgetReference(v);
3890     *pv = (PyObject *)
3891         PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3892     if (*pv == NULL) {
3893         PyObject_Del(v);
3894         PyErr_NoMemory();
3895         return -1;
3896     }
3897     _Py_NewReference(*pv);
3898     sv = (PyStringObject *) *pv;
3899     Py_SIZE(sv) = newsize;
3900     sv->ob_sval[newsize] = '\0';
3901     sv->ob_shash = -1;          /* invalidate cached hash value */
3902     return 0;
3903 }
3904 
3905 /* Helpers for formatstring */
3906 
3907 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)3908 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3909 {
3910     Py_ssize_t argidx = *p_argidx;
3911     if (argidx < arglen) {
3912         (*p_argidx)++;
3913         if (arglen < 0)
3914             return args;
3915         else
3916             return PyTuple_GetItem(args, argidx);
3917     }
3918     PyErr_SetString(PyExc_TypeError,
3919                     "not enough arguments for format string");
3920     return NULL;
3921 }
3922 
3923 /* Format codes
3924  * F_LJUST      '-'
3925  * F_SIGN       '+'
3926  * F_BLANK      ' '
3927  * F_ALT        '#'
3928  * F_ZERO       '0'
3929  */
3930 #define F_LJUST (1<<0)
3931 #define F_SIGN  (1<<1)
3932 #define F_BLANK (1<<2)
3933 #define F_ALT   (1<<3)
3934 #define F_ZERO  (1<<4)
3935 
3936 /* Returns a new reference to a PyString object, or NULL on failure. */
3937 
3938 static PyObject *
formatfloat(PyObject * v,int flags,int prec,int type)3939 formatfloat(PyObject *v, int flags, int prec, int type)
3940 {
3941     char *p;
3942     PyObject *result;
3943     double x;
3944 
3945     x = PyFloat_AsDouble(v);
3946     if (x == -1.0 && PyErr_Occurred()) {
3947         PyErr_Format(PyExc_TypeError, "float argument required, "
3948                      "not %.200s", Py_TYPE(v)->tp_name);
3949         return NULL;
3950     }
3951 
3952     if (prec < 0)
3953         prec = 6;
3954 
3955     p = PyOS_double_to_string(x, type, prec,
3956                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3957 
3958     if (p == NULL)
3959         return NULL;
3960     result = PyString_FromStringAndSize(p, strlen(p));
3961     PyMem_Free(p);
3962     return result;
3963 }
3964 
3965 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3966  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
3967  * Python's regular ints.
3968  * Return value:  a new PyString*, or NULL if error.
3969  *  .  *pbuf is set to point into it,
3970  *     *plen set to the # of chars following that.
3971  *     Caller must decref it when done using pbuf.
3972  *     The string starting at *pbuf is of the form
3973  *         "-"? ("0x" | "0X")? digit+
3974  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
3975  *         set in flags.  The case of hex digits will be correct,
3976  *     There will be at least prec digits, zero-filled on the left if
3977  *         necessary to get that many.
3978  * val          object to be converted
3979  * flags        bitmask of format flags; only F_ALT is looked at
3980  * prec         minimum number of digits; 0-fill on left if needed
3981  * type         a character in [duoxX]; u acts the same as d
3982  *
3983  * CAUTION:  o, x and X conversions on regular ints can never
3984  * produce a '-' sign, but can for Python's unbounded ints.
3985  */
3986 PyObject*
_PyString_FormatLong(PyObject * val,int flags,int prec,int type,char ** pbuf,int * plen)3987 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3988                      char **pbuf, int *plen)
3989 {
3990     PyObject *result = NULL;
3991     char *buf;
3992     Py_ssize_t i;
3993     int sign;           /* 1 if '-', else 0 */
3994     int len;            /* number of characters */
3995     Py_ssize_t llen;
3996     int numdigits;      /* len == numnondigits + numdigits */
3997     int numnondigits = 0;
3998 
3999     switch (type) {
4000     case 'd':
4001     case 'u':
4002         result = Py_TYPE(val)->tp_str(val);
4003         break;
4004     case 'o':
4005         result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4006         break;
4007     case 'x':
4008     case 'X':
4009         numnondigits = 2;
4010         result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4011         break;
4012     default:
4013         assert(!"'type' not in [duoxX]");
4014     }
4015     if (!result)
4016         return NULL;
4017 
4018     buf = PyString_AsString(result);
4019     if (!buf) {
4020         Py_DECREF(result);
4021         return NULL;
4022     }
4023 
4024     /* To modify the string in-place, there can only be one reference. */
4025     if (Py_REFCNT(result) != 1) {
4026         PyErr_BadInternalCall();
4027         return NULL;
4028     }
4029     llen = PyString_Size(result);
4030     if (llen > INT_MAX) {
4031         PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4032         return NULL;
4033     }
4034     len = (int)llen;
4035     if (buf[len-1] == 'L') {
4036         --len;
4037         buf[len] = '\0';
4038     }
4039     sign = buf[0] == '-';
4040     numnondigits += sign;
4041     numdigits = len - numnondigits;
4042     assert(numdigits > 0);
4043 
4044     /* Get rid of base marker unless F_ALT */
4045     if ((flags & F_ALT) == 0) {
4046         /* Need to skip 0x, 0X or 0. */
4047         int skipped = 0;
4048         switch (type) {
4049         case 'o':
4050             assert(buf[sign] == '0');
4051             /* If 0 is only digit, leave it alone. */
4052             if (numdigits > 1) {
4053                 skipped = 1;
4054                 --numdigits;
4055             }
4056             break;
4057         case 'x':
4058         case 'X':
4059             assert(buf[sign] == '0');
4060             assert(buf[sign + 1] == 'x');
4061             skipped = 2;
4062             numnondigits -= 2;
4063             break;
4064         }
4065         if (skipped) {
4066             buf += skipped;
4067             len -= skipped;
4068             if (sign)
4069                 buf[0] = '-';
4070         }
4071         assert(len == numnondigits + numdigits);
4072         assert(numdigits > 0);
4073     }
4074 
4075     /* Fill with leading zeroes to meet minimum width. */
4076     if (prec > numdigits) {
4077         PyObject *r1 = PyString_FromStringAndSize(NULL,
4078                                 numnondigits + prec);
4079         char *b1;
4080         if (!r1) {
4081             Py_DECREF(result);
4082             return NULL;
4083         }
4084         b1 = PyString_AS_STRING(r1);
4085         for (i = 0; i < numnondigits; ++i)
4086             *b1++ = *buf++;
4087         for (i = 0; i < prec - numdigits; i++)
4088             *b1++ = '0';
4089         for (i = 0; i < numdigits; i++)
4090             *b1++ = *buf++;
4091         *b1 = '\0';
4092         Py_DECREF(result);
4093         result = r1;
4094         buf = PyString_AS_STRING(result);
4095         len = numnondigits + prec;
4096     }
4097 
4098     /* Fix up case for hex conversions. */
4099     if (type == 'X') {
4100         /* Need to convert all lower case letters to upper case.
4101            and need to convert 0x to 0X (and -0x to -0X). */
4102         for (i = 0; i < len; i++)
4103             if (buf[i] >= 'a' && buf[i] <= 'x')
4104                 buf[i] -= 'a'-'A';
4105     }
4106     *pbuf = buf;
4107     *plen = len;
4108     return result;
4109 }
4110 
4111 Py_LOCAL_INLINE(int)
formatint(char * buf,size_t buflen,int flags,int prec,int type,PyObject * v)4112 formatint(char *buf, size_t buflen, int flags,
4113           int prec, int type, PyObject *v)
4114 {
4115     /* fmt = '%#.' + `prec` + 'l' + `type`
4116        worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4117        + 1 + 1 = 24 */
4118     char fmt[64];       /* plenty big enough! */
4119     char *sign;
4120     long x;
4121 
4122     x = PyInt_AsLong(v);
4123     if (x == -1 && PyErr_Occurred()) {
4124         PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4125                      Py_TYPE(v)->tp_name);
4126         return -1;
4127     }
4128     if (x < 0 && type == 'u') {
4129         type = 'd';
4130     }
4131     if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4132         sign = "-";
4133     else
4134         sign = "";
4135     if (prec < 0)
4136         prec = 1;
4137 
4138     if ((flags & F_ALT) &&
4139         (type == 'x' || type == 'X')) {
4140         /* When converting under %#x or %#X, there are a number
4141          * of issues that cause pain:
4142          * - when 0 is being converted, the C standard leaves off
4143          *   the '0x' or '0X', which is inconsistent with other
4144          *   %#x/%#X conversions and inconsistent with Python's
4145          *   hex() function
4146          * - there are platforms that violate the standard and
4147          *   convert 0 with the '0x' or '0X'
4148          *   (Metrowerks, Compaq Tru64)
4149          * - there are platforms that give '0x' when converting
4150          *   under %#X, but convert 0 in accordance with the
4151          *   standard (OS/2 EMX)
4152          *
4153          * We can achieve the desired consistency by inserting our
4154          * own '0x' or '0X' prefix, and substituting %x/%X in place
4155          * of %#x/%#X.
4156          *
4157          * Note that this is the same approach as used in
4158          * formatint() in unicodeobject.c
4159          */
4160         PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4161                       sign, type, prec, type);
4162     }
4163     else {
4164         PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4165                       sign, (flags&F_ALT) ? "#" : "",
4166                       prec, type);
4167     }
4168 
4169     /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4170      * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4171      */
4172     if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4173         PyErr_SetString(PyExc_OverflowError,
4174             "formatted integer is too long (precision too large?)");
4175         return -1;
4176     }
4177     if (sign[0])
4178         PyOS_snprintf(buf, buflen, fmt, -x);
4179     else
4180         PyOS_snprintf(buf, buflen, fmt, x);
4181     return (int)strlen(buf);
4182 }
4183 
4184 Py_LOCAL_INLINE(int)
formatchar(char * buf,size_t buflen,PyObject * v)4185 formatchar(char *buf, size_t buflen, PyObject *v)
4186 {
4187     /* presume that the buffer is at least 2 characters long */
4188     if (PyString_Check(v)) {
4189         if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4190             return -1;
4191     }
4192     else {
4193         if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4194             return -1;
4195     }
4196     buf[1] = '\0';
4197     return 1;
4198 }
4199 
4200 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4201 
4202    FORMATBUFLEN is the length of the buffer in which the ints &
4203    chars are formatted. XXX This is a magic number. Each formatting
4204    routine does bounds checking to ensure no overflow, but a better
4205    solution may be to malloc a buffer of appropriate size for each
4206    format. For now, the current solution is sufficient.
4207 */
4208 #define FORMATBUFLEN (size_t)120
4209 
4210 PyObject *
PyString_Format(PyObject * format,PyObject * args)4211 PyString_Format(PyObject *format, PyObject *args)
4212 {
4213     char *fmt, *res;
4214     Py_ssize_t arglen, argidx;
4215     Py_ssize_t reslen, rescnt, fmtcnt;
4216     int args_owned = 0;
4217     PyObject *result, *orig_args;
4218 #ifdef Py_USING_UNICODE
4219     PyObject *v, *w;
4220 #endif
4221     PyObject *dict = NULL;
4222     if (format == NULL || !PyString_Check(format) || args == NULL) {
4223         PyErr_BadInternalCall();
4224         return NULL;
4225     }
4226     orig_args = args;
4227     fmt = PyString_AS_STRING(format);
4228     fmtcnt = PyString_GET_SIZE(format);
4229     reslen = rescnt = fmtcnt + 100;
4230     result = PyString_FromStringAndSize((char *)NULL, reslen);
4231     if (result == NULL)
4232         return NULL;
4233     res = PyString_AsString(result);
4234     if (PyTuple_Check(args)) {
4235         arglen = PyTuple_GET_SIZE(args);
4236         argidx = 0;
4237     }
4238     else {
4239         arglen = -1;
4240         argidx = -2;
4241     }
4242     if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4243         !PyObject_TypeCheck(args, &PyBaseString_Type))
4244         dict = args;
4245     while (--fmtcnt >= 0) {
4246         if (*fmt != '%') {
4247             if (--rescnt < 0) {
4248                 rescnt = fmtcnt + 100;
4249                 reslen += rescnt;
4250                 if (_PyString_Resize(&result, reslen))
4251                     return NULL;
4252                 res = PyString_AS_STRING(result)
4253                     + reslen - rescnt;
4254                 --rescnt;
4255             }
4256             *res++ = *fmt++;
4257         }
4258         else {
4259             /* Got a format specifier */
4260             int flags = 0;
4261             Py_ssize_t width = -1;
4262             int prec = -1;
4263             int c = '\0';
4264             int fill;
4265             int isnumok;
4266             PyObject *v     = NULL;
4267             PyObject *temp  = NULL;
4268             char *pbuf      = NULL;
4269             int sign;
4270             Py_ssize_t len;
4271             char formatbuf[FORMATBUFLEN];
4272                  /* For format{int,char}() */
4273 #ifdef Py_USING_UNICODE
4274             char *fmt_start = fmt;
4275             Py_ssize_t argidx_start = argidx;
4276 #endif
4277 
4278             fmt++;
4279             if (*fmt == '(') {
4280                 char *keystart;
4281                 Py_ssize_t keylen;
4282                 PyObject *key;
4283                 int pcount = 1;
4284 
4285                 if (dict == NULL) {
4286                     PyErr_SetString(PyExc_TypeError,
4287                              "format requires a mapping");
4288                     goto error;
4289                 }
4290                 ++fmt;
4291                 --fmtcnt;
4292                 keystart = fmt;
4293                 /* Skip over balanced parentheses */
4294                 while (pcount > 0 && --fmtcnt >= 0) {
4295                     if (*fmt == ')')
4296                         --pcount;
4297                     else if (*fmt == '(')
4298                         ++pcount;
4299                     fmt++;
4300                 }
4301                 keylen = fmt - keystart - 1;
4302                 if (fmtcnt < 0 || pcount > 0) {
4303                     PyErr_SetString(PyExc_ValueError,
4304                                "incomplete format key");
4305                     goto error;
4306                 }
4307                 key = PyString_FromStringAndSize(keystart,
4308                                                  keylen);
4309                 if (key == NULL)
4310                     goto error;
4311                 if (args_owned) {
4312                     Py_DECREF(args);
4313                     args_owned = 0;
4314                 }
4315                 args = PyObject_GetItem(dict, key);
4316                 Py_DECREF(key);
4317                 if (args == NULL) {
4318                     goto error;
4319                 }
4320                 args_owned = 1;
4321                 arglen = -1;
4322                 argidx = -2;
4323             }
4324             while (--fmtcnt >= 0) {
4325                 switch (c = *fmt++) {
4326                 case '-': flags |= F_LJUST; continue;
4327                 case '+': flags |= F_SIGN; continue;
4328                 case ' ': flags |= F_BLANK; continue;
4329                 case '#': flags |= F_ALT; continue;
4330                 case '0': flags |= F_ZERO; continue;
4331                 }
4332                 break;
4333             }
4334             if (c == '*') {
4335                 v = getnextarg(args, arglen, &argidx);
4336                 if (v == NULL)
4337                     goto error;
4338                 if (!PyInt_Check(v)) {
4339                     PyErr_SetString(PyExc_TypeError,
4340                                     "* wants int");
4341                     goto error;
4342                 }
4343                 width = PyInt_AsLong(v);
4344                 if (width < 0) {
4345                     flags |= F_LJUST;
4346                     width = -width;
4347                 }
4348                 if (--fmtcnt >= 0)
4349                     c = *fmt++;
4350             }
4351             else if (c >= 0 && isdigit(c)) {
4352                 width = c - '0';
4353                 while (--fmtcnt >= 0) {
4354                     c = Py_CHARMASK(*fmt++);
4355                     if (!isdigit(c))
4356                         break;
4357                     if ((width*10) / 10 != width) {
4358                         PyErr_SetString(
4359                             PyExc_ValueError,
4360                             "width too big");
4361                         goto error;
4362                     }
4363                     width = width*10 + (c - '0');
4364                 }
4365             }
4366             if (c == '.') {
4367                 prec = 0;
4368                 if (--fmtcnt >= 0)
4369                     c = *fmt++;
4370                 if (c == '*') {
4371                     v = getnextarg(args, arglen, &argidx);
4372                     if (v == NULL)
4373                         goto error;
4374                     if (!PyInt_Check(v)) {
4375                         PyErr_SetString(
4376                             PyExc_TypeError,
4377                             "* wants int");
4378                         goto error;
4379                     }
4380                     prec = PyInt_AsLong(v);
4381                     if (prec < 0)
4382                         prec = 0;
4383                     if (--fmtcnt >= 0)
4384                         c = *fmt++;
4385                 }
4386                 else if (c >= 0 && isdigit(c)) {
4387                     prec = c - '0';
4388                     while (--fmtcnt >= 0) {
4389                         c = Py_CHARMASK(*fmt++);
4390                         if (!isdigit(c))
4391                             break;
4392                         if ((prec*10) / 10 != prec) {
4393                             PyErr_SetString(
4394                                 PyExc_ValueError,
4395                                 "prec too big");
4396                             goto error;
4397                         }
4398                         prec = prec*10 + (c - '0');
4399                     }
4400                 }
4401             } /* prec */
4402             if (fmtcnt >= 0) {
4403                 if (c == 'h' || c == 'l' || c == 'L') {
4404                     if (--fmtcnt >= 0)
4405                         c = *fmt++;
4406                 }
4407             }
4408             if (fmtcnt < 0) {
4409                 PyErr_SetString(PyExc_ValueError,
4410                                 "incomplete format");
4411                 goto error;
4412             }
4413             if (c != '%') {
4414                 v = getnextarg(args, arglen, &argidx);
4415                 if (v == NULL)
4416                     goto error;
4417             }
4418             sign = 0;
4419             fill = ' ';
4420             switch (c) {
4421             case '%':
4422                 pbuf = "%";
4423                 len = 1;
4424                 break;
4425             case 's':
4426 #ifdef Py_USING_UNICODE
4427                 if (PyUnicode_Check(v)) {
4428                     fmt = fmt_start;
4429                     argidx = argidx_start;
4430                     goto unicode;
4431                 }
4432 #endif
4433                 temp = _PyObject_Str(v);
4434 #ifdef Py_USING_UNICODE
4435                 if (temp != NULL && PyUnicode_Check(temp)) {
4436                     Py_DECREF(temp);
4437                     fmt = fmt_start;
4438                     argidx = argidx_start;
4439                     goto unicode;
4440                 }
4441 #endif
4442                 /* Fall through */
4443             case 'r':
4444                 if (c == 'r')
4445                     temp = PyObject_Repr(v);
4446                 if (temp == NULL)
4447                     goto error;
4448                 if (!PyString_Check(temp)) {
4449                     PyErr_SetString(PyExc_TypeError,
4450                       "%s argument has non-string str()");
4451                     Py_DECREF(temp);
4452                     goto error;
4453                 }
4454                 pbuf = PyString_AS_STRING(temp);
4455                 len = PyString_GET_SIZE(temp);
4456                 if (prec >= 0 && len > prec)
4457                     len = prec;
4458                 break;
4459             case 'i':
4460             case 'd':
4461             case 'u':
4462             case 'o':
4463             case 'x':
4464             case 'X':
4465                 if (c == 'i')
4466                     c = 'd';
4467                 isnumok = 0;
4468                 if (PyNumber_Check(v)) {
4469                     PyObject *iobj=NULL;
4470 
4471                     if (PyInt_Check(v) || (PyLong_Check(v))) {
4472                         iobj = v;
4473                         Py_INCREF(iobj);
4474                     }
4475                     else {
4476                         iobj = PyNumber_Int(v);
4477                         if (iobj==NULL) iobj = PyNumber_Long(v);
4478                     }
4479                     if (iobj!=NULL) {
4480                         if (PyInt_Check(iobj)) {
4481                             isnumok = 1;
4482                             pbuf = formatbuf;
4483                             len = formatint(pbuf,
4484                                             sizeof(formatbuf),
4485                                             flags, prec, c, iobj);
4486                             Py_DECREF(iobj);
4487                             if (len < 0)
4488                                 goto error;
4489                             sign = 1;
4490                         }
4491                         else if (PyLong_Check(iobj)) {
4492                             int ilen;
4493 
4494                             isnumok = 1;
4495                             temp = _PyString_FormatLong(iobj, flags,
4496                                 prec, c, &pbuf, &ilen);
4497                             Py_DECREF(iobj);
4498                             len = ilen;
4499                             if (!temp)
4500                                 goto error;
4501                             sign = 1;
4502                         }
4503                         else {
4504                             Py_DECREF(iobj);
4505                         }
4506                     }
4507                 }
4508                 if (!isnumok) {
4509                     PyErr_Format(PyExc_TypeError,
4510                         "%%%c format: a number is required, "
4511                         "not %.200s", c, Py_TYPE(v)->tp_name);
4512                     goto error;
4513                 }
4514                 if (flags & F_ZERO)
4515                     fill = '0';
4516                 break;
4517             case 'e':
4518             case 'E':
4519             case 'f':
4520             case 'F':
4521             case 'g':
4522             case 'G':
4523                 temp = formatfloat(v, flags, prec, c);
4524                 if (temp == NULL)
4525                     goto error;
4526                 pbuf = PyString_AS_STRING(temp);
4527                 len = PyString_GET_SIZE(temp);
4528                 sign = 1;
4529                 if (flags & F_ZERO)
4530                     fill = '0';
4531                 break;
4532             case 'c':
4533 #ifdef Py_USING_UNICODE
4534                 if (PyUnicode_Check(v)) {
4535                     fmt = fmt_start;
4536                     argidx = argidx_start;
4537                     goto unicode;
4538                 }
4539 #endif
4540                 pbuf = formatbuf;
4541                 len = formatchar(pbuf, sizeof(formatbuf), v);
4542                 if (len < 0)
4543                     goto error;
4544                 break;
4545             default:
4546                 PyErr_Format(PyExc_ValueError,
4547                   "unsupported format character '%c' (0x%x) "
4548                   "at index %zd",
4549                   c, c,
4550                   (Py_ssize_t)(fmt - 1 -
4551                                PyString_AsString(format)));
4552                 goto error;
4553             }
4554             if (sign) {
4555                 if (*pbuf == '-' || *pbuf == '+') {
4556                     sign = *pbuf++;
4557                     len--;
4558                 }
4559                 else if (flags & F_SIGN)
4560                     sign = '+';
4561                 else if (flags & F_BLANK)
4562                     sign = ' ';
4563                 else
4564                     sign = 0;
4565             }
4566             if (width < len)
4567                 width = len;
4568             if (rescnt - (sign != 0) < width) {
4569                 reslen -= rescnt;
4570                 rescnt = width + fmtcnt + 100;
4571                 reslen += rescnt;
4572                 if (reslen < 0) {
4573                     Py_DECREF(result);
4574                     Py_XDECREF(temp);
4575                     return PyErr_NoMemory();
4576                 }
4577                 if (_PyString_Resize(&result, reslen)) {
4578                     Py_XDECREF(temp);
4579                     return NULL;
4580                 }
4581                 res = PyString_AS_STRING(result)
4582                     + reslen - rescnt;
4583             }
4584             if (sign) {
4585                 if (fill != ' ')
4586                     *res++ = sign;
4587                 rescnt--;
4588                 if (width > len)
4589                     width--;
4590             }
4591             if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4592                 assert(pbuf[0] == '0');
4593                 assert(pbuf[1] == c);
4594                 if (fill != ' ') {
4595                     *res++ = *pbuf++;
4596                     *res++ = *pbuf++;
4597                 }
4598                 rescnt -= 2;
4599                 width -= 2;
4600                 if (width < 0)
4601                     width = 0;
4602                 len -= 2;
4603             }
4604             if (width > len && !(flags & F_LJUST)) {
4605                 do {
4606                     --rescnt;
4607                     *res++ = fill;
4608                 } while (--width > len);
4609             }
4610             if (fill == ' ') {
4611                 if (sign)
4612                     *res++ = sign;
4613                 if ((flags & F_ALT) &&
4614                     (c == 'x' || c == 'X')) {
4615                     assert(pbuf[0] == '0');
4616                     assert(pbuf[1] == c);
4617                     *res++ = *pbuf++;
4618                     *res++ = *pbuf++;
4619                 }
4620             }
4621             Py_MEMCPY(res, pbuf, len);
4622             res += len;
4623             rescnt -= len;
4624             while (--width >= len) {
4625                 --rescnt;
4626                 *res++ = ' ';
4627             }
4628             if (dict && (argidx < arglen) && c != '%') {
4629                 PyErr_SetString(PyExc_TypeError,
4630                            "not all arguments converted during string formatting");
4631                 Py_XDECREF(temp);
4632                 goto error;
4633             }
4634             Py_XDECREF(temp);
4635         } /* '%' */
4636     } /* until end */
4637     if (argidx < arglen && !dict) {
4638         PyErr_SetString(PyExc_TypeError,
4639                         "not all arguments converted during string formatting");
4640         goto error;
4641     }
4642     if (args_owned) {
4643         Py_DECREF(args);
4644     }
4645     if (_PyString_Resize(&result, reslen - rescnt))
4646         return NULL;
4647     return result;
4648 
4649 #ifdef Py_USING_UNICODE
4650  unicode:
4651     if (args_owned) {
4652         Py_DECREF(args);
4653         args_owned = 0;
4654     }
4655     /* Fiddle args right (remove the first argidx arguments) */
4656     if (PyTuple_Check(orig_args) && argidx > 0) {
4657         PyObject *v;
4658         Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4659         v = PyTuple_New(n);
4660         if (v == NULL)
4661             goto error;
4662         while (--n >= 0) {
4663             PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4664             Py_INCREF(w);
4665             PyTuple_SET_ITEM(v, n, w);
4666         }
4667         args = v;
4668     } else {
4669         Py_INCREF(orig_args);
4670         args = orig_args;
4671     }
4672     args_owned = 1;
4673     /* Take what we have of the result and let the Unicode formatting
4674        function format the rest of the input. */
4675     rescnt = res - PyString_AS_STRING(result);
4676     if (_PyString_Resize(&result, rescnt))
4677         goto error;
4678     fmtcnt = PyString_GET_SIZE(format) - \
4679              (fmt - PyString_AS_STRING(format));
4680     format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4681     if (format == NULL)
4682         goto error;
4683     v = PyUnicode_Format(format, args);
4684     Py_DECREF(format);
4685     if (v == NULL)
4686         goto error;
4687     /* Paste what we have (result) to what the Unicode formatting
4688        function returned (v) and return the result (or error) */
4689     w = PyUnicode_Concat(result, v);
4690     Py_DECREF(result);
4691     Py_DECREF(v);
4692     Py_DECREF(args);
4693     return w;
4694 #endif /* Py_USING_UNICODE */
4695 
4696  error:
4697     Py_DECREF(result);
4698     if (args_owned) {
4699         Py_DECREF(args);
4700     }
4701     return NULL;
4702 }
4703 
4704 void
PyString_InternInPlace(PyObject ** p)4705 PyString_InternInPlace(PyObject **p)
4706 {
4707     register PyStringObject *s = (PyStringObject *)(*p);
4708     PyObject *t;
4709     if (s == NULL || !PyString_Check(s))
4710         Py_FatalError("PyString_InternInPlace: strings only please!");
4711     /* If it's a string subclass, we don't really know what putting
4712        it in the interned dict might do. */
4713     if (!PyString_CheckExact(s))
4714         return;
4715     if (PyString_CHECK_INTERNED(s))
4716         return;
4717     if (interned == NULL) {
4718         interned = PyDict_New();
4719         if (interned == NULL) {
4720             PyErr_Clear(); /* Don't leave an exception */
4721             return;
4722         }
4723     }
4724     t = PyDict_GetItem(interned, (PyObject *)s);
4725     if (t) {
4726         Py_INCREF(t);
4727         Py_DECREF(*p);
4728         *p = t;
4729         return;
4730     }
4731 
4732     if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4733         PyErr_Clear();
4734         return;
4735     }
4736     /* The two references in interned are not counted by refcnt.
4737        The string deallocator will take care of this */
4738     Py_REFCNT(s) -= 2;
4739     PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4740 }
4741 
4742 void
PyString_InternImmortal(PyObject ** p)4743 PyString_InternImmortal(PyObject **p)
4744 {
4745     PyString_InternInPlace(p);
4746     if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4747         PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4748         Py_INCREF(*p);
4749     }
4750 }
4751 
4752 
4753 PyObject *
PyString_InternFromString(const char * cp)4754 PyString_InternFromString(const char *cp)
4755 {
4756     PyObject *s = PyString_FromString(cp);
4757     if (s == NULL)
4758         return NULL;
4759     PyString_InternInPlace(&s);
4760     return s;
4761 }
4762 
4763 void
PyString_Fini(void)4764 PyString_Fini(void)
4765 {
4766     int i;
4767     for (i = 0; i < UCHAR_MAX + 1; i++) {
4768         Py_XDECREF(characters[i]);
4769         characters[i] = NULL;
4770     }
4771     Py_XDECREF(nullstring);
4772     nullstring = NULL;
4773 }
4774 
_Py_ReleaseInternedStrings(void)4775 void _Py_ReleaseInternedStrings(void)
4776 {
4777     PyObject *keys;
4778     PyStringObject *s;
4779     Py_ssize_t i, n;
4780     Py_ssize_t immortal_size = 0, mortal_size = 0;
4781 
4782     if (interned == NULL || !PyDict_Check(interned))
4783         return;
4784     keys = PyDict_Keys(interned);
4785     if (keys == NULL || !PyList_Check(keys)) {
4786         PyErr_Clear();
4787         return;
4788     }
4789 
4790     /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4791        detector, interned strings are not forcibly deallocated; rather, we
4792        give them their stolen references back, and then clear and DECREF
4793        the interned dict. */
4794 
4795     n = PyList_GET_SIZE(keys);
4796     fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4797         n);
4798     for (i = 0; i < n; i++) {
4799         s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4800         switch (s->ob_sstate) {
4801         case SSTATE_NOT_INTERNED:
4802             /* XXX Shouldn't happen */
4803             break;
4804         case SSTATE_INTERNED_IMMORTAL:
4805             Py_REFCNT(s) += 1;
4806             immortal_size += Py_SIZE(s);
4807             break;
4808         case SSTATE_INTERNED_MORTAL:
4809             Py_REFCNT(s) += 2;
4810             mortal_size += Py_SIZE(s);
4811             break;
4812         default:
4813             Py_FatalError("Inconsistent interned string state.");
4814         }
4815         s->ob_sstate = SSTATE_NOT_INTERNED;
4816     }
4817     fprintf(stderr, "total size of all interned strings: "
4818                     "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4819                     "mortal/immortal\n", mortal_size, immortal_size);
4820     Py_DECREF(keys);
4821     PyDict_Clear(interned);
4822     Py_DECREF(interned);
4823     interned = NULL;
4824 }
4825