1 /* String (str/bytes) object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include <ctype.h>
7 #include <stddef.h>
8 
9 #ifdef COUNT_ALLOCS
10 Py_ssize_t null_strings, one_strings;
11 #endif
12 
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
15 
16 /* This dictionary holds all interned strings.  Note that references to
17    strings in this dictionary are *not* counted in the string's ob_refcnt.
18    When the interned string reaches a refcnt of 0 the string deallocation
19    function will delete the reference from this dictionary.
20 
21    Another way to look at this is that to say that the actual reference
22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
23 */
24 static PyObject *interned;
25 
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27    for a string of length n should request PyStringObject_SIZE + n bytes.
28 
29    Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30    3 bytes per string allocation on a typical system.
31 */
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33 
34 /*
35    For PyString_FromString(), the parameter `str' points to a null-terminated
36    string containing exactly `size' bytes.
37 
38    For PyString_FromStringAndSize(), the parameter `str' is
39    either NULL or else points to a string containing at least `size' bytes.
40    For PyString_FromStringAndSize(), the string in the `str' parameter does
41    not have to be null-terminated.  (Therefore it is safe to construct a
42    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44    bytes (setting the last byte to the null terminating character) and you can
45    fill in the data yourself.  If `str' is non-NULL then the resulting
46    PyString object must be treated as immutable and you must not fill in nor
47    alter the data yourself, since the strings may be shared.
48 
49    The PyObject member `op->ob_size', which denotes the number of "extra
50    items" in a variable-size object, will contain the number of bytes
51    allocated for string data, not counting the null terminating character.
52    It is therefore equal to the `size' parameter (for
53    PyString_FromStringAndSize()) or the length of the string in the `str'
54    parameter (for PyString_FromString()).
55 */
56 PyObject *
PyString_FromStringAndSize(const char * str,Py_ssize_t size)57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
58 {
59     register PyStringObject *op;
60     if (size < 0) {
61         PyErr_SetString(PyExc_SystemError,
62             "Negative size passed to PyString_FromStringAndSize");
63         return NULL;
64     }
65     if (size == 0 && (op = nullstring) != NULL) {
66 #ifdef COUNT_ALLOCS
67         null_strings++;
68 #endif
69         Py_INCREF(op);
70         return (PyObject *)op;
71     }
72     if (size == 1 && str != NULL &&
73         (op = characters[*str & UCHAR_MAX]) != NULL)
74     {
75 #ifdef COUNT_ALLOCS
76         one_strings++;
77 #endif
78         Py_INCREF(op);
79         return (PyObject *)op;
80     }
81 
82     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83         PyErr_SetString(PyExc_OverflowError, "string is too large");
84         return NULL;
85     }
86 
87     /* Inline PyObject_NewVar */
88     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89     if (op == NULL)
90         return PyErr_NoMemory();
91     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
92     op->ob_shash = -1;
93     op->ob_sstate = SSTATE_NOT_INTERNED;
94     if (str != NULL)
95         Py_MEMCPY(op->ob_sval, str, size);
96     op->ob_sval[size] = '\0';
97     /* share short strings */
98     if (size == 0) {
99         PyObject *t = (PyObject *)op;
100         PyString_InternInPlace(&t);
101         op = (PyStringObject *)t;
102         nullstring = op;
103         Py_INCREF(op);
104     } else if (size == 1 && str != NULL) {
105         PyObject *t = (PyObject *)op;
106         PyString_InternInPlace(&t);
107         op = (PyStringObject *)t;
108         characters[*str & UCHAR_MAX] = op;
109         Py_INCREF(op);
110     }
111     return (PyObject *) op;
112 }
113 
114 PyObject *
PyString_FromString(const char * str)115 PyString_FromString(const char *str)
116 {
117     register size_t size;
118     register PyStringObject *op;
119 
120     assert(str != NULL);
121     size = strlen(str);
122     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123         PyErr_SetString(PyExc_OverflowError,
124             "string is too long for a Python string");
125         return NULL;
126     }
127     if (size == 0 && (op = nullstring) != NULL) {
128 #ifdef COUNT_ALLOCS
129         null_strings++;
130 #endif
131         Py_INCREF(op);
132         return (PyObject *)op;
133     }
134     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135 #ifdef COUNT_ALLOCS
136         one_strings++;
137 #endif
138         Py_INCREF(op);
139         return (PyObject *)op;
140     }
141 
142     /* Inline PyObject_NewVar */
143     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144     if (op == NULL)
145         return PyErr_NoMemory();
146     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
147     op->ob_shash = -1;
148     op->ob_sstate = SSTATE_NOT_INTERNED;
149     Py_MEMCPY(op->ob_sval, str, size+1);
150     /* share short strings */
151     if (size == 0) {
152         PyObject *t = (PyObject *)op;
153         PyString_InternInPlace(&t);
154         op = (PyStringObject *)t;
155         nullstring = op;
156         Py_INCREF(op);
157     } else if (size == 1) {
158         PyObject *t = (PyObject *)op;
159         PyString_InternInPlace(&t);
160         op = (PyStringObject *)t;
161         characters[*str & UCHAR_MAX] = op;
162         Py_INCREF(op);
163     }
164     return (PyObject *) op;
165 }
166 
167 PyObject *
PyString_FromFormatV(const char * format,va_list vargs)168 PyString_FromFormatV(const char *format, va_list vargs)
169 {
170     va_list count;
171     Py_ssize_t n = 0;
172     const char* f;
173     char *s;
174     PyObject* string;
175 
176 #ifdef VA_LIST_IS_ARRAY
177     Py_MEMCPY(count, vargs, sizeof(va_list));
178 #else
179 #ifdef  __va_copy
180     __va_copy(count, vargs);
181 #else
182     count = vargs;
183 #endif
184 #endif
185     /* step 1: figure out how large a buffer we need */
186     for (f = format; *f; f++) {
187         if (*f == '%') {
188 #ifdef HAVE_LONG_LONG
189             int longlongflag = 0;
190 #endif
191             const char* p = f;
192             while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193                 ;
194 
195             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196              * they don't affect the amount of space we reserve.
197              */
198             if (*f == 'l') {
199                 if (f[1] == 'd' || f[1] == 'u') {
200                     ++f;
201                 }
202 #ifdef HAVE_LONG_LONG
203                 else if (f[1] == 'l' &&
204                          (f[2] == 'd' || f[2] == 'u')) {
205                     longlongflag = 1;
206                     f += 2;
207                 }
208 #endif
209             }
210             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211                 ++f;
212             }
213 
214             switch (*f) {
215             case 'c':
216                 (void)va_arg(count, int);
217                 /* fall through... */
218             case '%':
219                 n++;
220                 break;
221             case 'd': case 'u': case 'i': case 'x':
222                 (void) va_arg(count, int);
223 #ifdef HAVE_LONG_LONG
224                 /* Need at most
225                    ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226                    plus 1 for the sign.  53/22 is an upper
227                    bound for log10(256). */
228                 if (longlongflag)
229                     n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230                 else
231 #endif
232                     /* 20 bytes is enough to hold a 64-bit
233                        integer.  Decimal takes the most
234                        space.  This isn't enough for
235                        octal. */
236                     n += 20;
237 
238                 break;
239             case 's':
240                 s = va_arg(count, char*);
241                 n += strlen(s);
242                 break;
243             case 'p':
244                 (void) va_arg(count, int);
245                 /* maximum 64-bit pointer representation:
246                  * 0xffffffffffffffff
247                  * so 19 characters is enough.
248                  * XXX I count 18 -- what's the extra for?
249                  */
250                 n += 19;
251                 break;
252             default:
253                 /* if we stumble upon an unknown
254                    formatting code, copy the rest of
255                    the format string to the output
256                    string. (we cannot just skip the
257                    code, since there's no way to know
258                    what's in the argument list) */
259                 n += strlen(p);
260                 goto expand;
261             }
262         } else
263             n++;
264     }
265  expand:
266     /* step 2: fill the buffer */
267     /* Since we've analyzed how much space we need for the worst case,
268        use sprintf directly instead of the slower PyOS_snprintf. */
269     string = PyString_FromStringAndSize(NULL, n);
270     if (!string)
271         return NULL;
272 
273     s = PyString_AsString(string);
274 
275     for (f = format; *f; f++) {
276         if (*f == '%') {
277             const char* p = f++;
278             Py_ssize_t i;
279             int longflag = 0;
280 #ifdef HAVE_LONG_LONG
281             int longlongflag = 0;
282 #endif
283             int size_tflag = 0;
284             /* parse the width.precision part (we're only
285                interested in the precision value, if any) */
286             n = 0;
287             while (isdigit(Py_CHARMASK(*f)))
288                 n = (n*10) + *f++ - '0';
289             if (*f == '.') {
290                 f++;
291                 n = 0;
292                 while (isdigit(Py_CHARMASK(*f)))
293                     n = (n*10) + *f++ - '0';
294             }
295             while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296                 f++;
297             /* Handle %ld, %lu, %lld and %llu. */
298             if (*f == 'l') {
299                 if (f[1] == 'd' || f[1] == 'u') {
300                     longflag = 1;
301                     ++f;
302                 }
303 #ifdef HAVE_LONG_LONG
304                 else if (f[1] == 'l' &&
305                          (f[2] == 'd' || f[2] == 'u')) {
306                     longlongflag = 1;
307                     f += 2;
308                 }
309 #endif
310             }
311             /* handle the size_t flag. */
312             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313                 size_tflag = 1;
314                 ++f;
315             }
316 
317             switch (*f) {
318             case 'c':
319                 *s++ = va_arg(vargs, int);
320                 break;
321             case 'd':
322                 if (longflag)
323                     sprintf(s, "%ld", va_arg(vargs, long));
324 #ifdef HAVE_LONG_LONG
325                 else if (longlongflag)
326                     sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327                         va_arg(vargs, PY_LONG_LONG));
328 #endif
329                 else if (size_tflag)
330                     sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331                         va_arg(vargs, Py_ssize_t));
332                 else
333                     sprintf(s, "%d", va_arg(vargs, int));
334                 s += strlen(s);
335                 break;
336             case 'u':
337                 if (longflag)
338                     sprintf(s, "%lu",
339                         va_arg(vargs, unsigned long));
340 #ifdef HAVE_LONG_LONG
341                 else if (longlongflag)
342                     sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343                         va_arg(vargs, PY_LONG_LONG));
344 #endif
345                 else if (size_tflag)
346                     sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347                         va_arg(vargs, size_t));
348                 else
349                     sprintf(s, "%u",
350                         va_arg(vargs, unsigned int));
351                 s += strlen(s);
352                 break;
353             case 'i':
354                 sprintf(s, "%i", va_arg(vargs, int));
355                 s += strlen(s);
356                 break;
357             case 'x':
358                 sprintf(s, "%x", va_arg(vargs, int));
359                 s += strlen(s);
360                 break;
361             case 's':
362                 p = va_arg(vargs, char*);
363                 i = strlen(p);
364                 if (n > 0 && i > n)
365                     i = n;
366                 Py_MEMCPY(s, p, i);
367                 s += i;
368                 break;
369             case 'p':
370                 sprintf(s, "%p", va_arg(vargs, void*));
371                 /* %p is ill-defined:  ensure leading 0x. */
372                 if (s[1] == 'X')
373                     s[1] = 'x';
374                 else if (s[1] != 'x') {
375                     memmove(s+2, s, strlen(s)+1);
376                     s[0] = '0';
377                     s[1] = 'x';
378                 }
379                 s += strlen(s);
380                 break;
381             case '%':
382                 *s++ = '%';
383                 break;
384             default:
385                 strcpy(s, p);
386                 s += strlen(s);
387                 goto end;
388             }
389         } else
390             *s++ = *f;
391     }
392 
393  end:
394     if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395         return NULL;
396     return string;
397 }
398 
399 PyObject *
PyString_FromFormat(const char * format,...)400 PyString_FromFormat(const char *format, ...)
401 {
402     PyObject* ret;
403     va_list vargs;
404 
405 #ifdef HAVE_STDARG_PROTOTYPES
406     va_start(vargs, format);
407 #else
408     va_start(vargs);
409 #endif
410     ret = PyString_FromFormatV(format, vargs);
411     va_end(vargs);
412     return ret;
413 }
414 
415 
PyString_Decode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)416 PyObject *PyString_Decode(const char *s,
417                           Py_ssize_t size,
418                           const char *encoding,
419                           const char *errors)
420 {
421     PyObject *v, *str;
422 
423     str = PyString_FromStringAndSize(s, size);
424     if (str == NULL)
425         return NULL;
426     v = PyString_AsDecodedString(str, encoding, errors);
427     Py_DECREF(str);
428     return v;
429 }
430 
PyString_AsDecodedObject(PyObject * str,const char * encoding,const char * errors)431 PyObject *PyString_AsDecodedObject(PyObject *str,
432                                    const char *encoding,
433                                    const char *errors)
434 {
435     PyObject *v;
436 
437     if (!PyString_Check(str)) {
438         PyErr_BadArgument();
439         goto onError;
440     }
441 
442     if (encoding == NULL) {
443 #ifdef Py_USING_UNICODE
444         encoding = PyUnicode_GetDefaultEncoding();
445 #else
446         PyErr_SetString(PyExc_ValueError, "no encoding specified");
447         goto onError;
448 #endif
449     }
450 
451     /* Decode via the codec registry */
452     v = _PyCodec_DecodeText(str, encoding, errors);
453     if (v == NULL)
454         goto onError;
455 
456     return v;
457 
458  onError:
459     return NULL;
460 }
461 
PyString_AsDecodedString(PyObject * str,const char * encoding,const char * errors)462 PyObject *PyString_AsDecodedString(PyObject *str,
463                                    const char *encoding,
464                                    const char *errors)
465 {
466     PyObject *v;
467 
468     v = PyString_AsDecodedObject(str, encoding, errors);
469     if (v == NULL)
470         goto onError;
471 
472 #ifdef Py_USING_UNICODE
473     /* Convert Unicode to a string using the default encoding */
474     if (PyUnicode_Check(v)) {
475         PyObject *temp = v;
476         v = PyUnicode_AsEncodedString(v, NULL, NULL);
477         Py_DECREF(temp);
478         if (v == NULL)
479             goto onError;
480     }
481 #endif
482     if (!PyString_Check(v)) {
483         PyErr_Format(PyExc_TypeError,
484                      "decoder did not return a string object (type=%.400s)",
485                      Py_TYPE(v)->tp_name);
486         Py_DECREF(v);
487         goto onError;
488     }
489 
490     return v;
491 
492  onError:
493     return NULL;
494 }
495 
PyString_Encode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)496 PyObject *PyString_Encode(const char *s,
497                           Py_ssize_t size,
498                           const char *encoding,
499                           const char *errors)
500 {
501     PyObject *v, *str;
502 
503     str = PyString_FromStringAndSize(s, size);
504     if (str == NULL)
505         return NULL;
506     v = PyString_AsEncodedString(str, encoding, errors);
507     Py_DECREF(str);
508     return v;
509 }
510 
PyString_AsEncodedObject(PyObject * str,const char * encoding,const char * errors)511 PyObject *PyString_AsEncodedObject(PyObject *str,
512                                    const char *encoding,
513                                    const char *errors)
514 {
515     PyObject *v;
516 
517     if (!PyString_Check(str)) {
518         PyErr_BadArgument();
519         goto onError;
520     }
521 
522     if (encoding == NULL) {
523 #ifdef Py_USING_UNICODE
524         encoding = PyUnicode_GetDefaultEncoding();
525 #else
526         PyErr_SetString(PyExc_ValueError, "no encoding specified");
527         goto onError;
528 #endif
529     }
530 
531     /* Encode via the codec registry */
532     v = _PyCodec_EncodeText(str, encoding, errors);
533     if (v == NULL)
534         goto onError;
535 
536     return v;
537 
538  onError:
539     return NULL;
540 }
541 
PyString_AsEncodedString(PyObject * str,const char * encoding,const char * errors)542 PyObject *PyString_AsEncodedString(PyObject *str,
543                                    const char *encoding,
544                                    const char *errors)
545 {
546     PyObject *v;
547 
548     v = PyString_AsEncodedObject(str, encoding, errors);
549     if (v == NULL)
550         goto onError;
551 
552 #ifdef Py_USING_UNICODE
553     /* Convert Unicode to a string using the default encoding */
554     if (PyUnicode_Check(v)) {
555         PyObject *temp = v;
556         v = PyUnicode_AsEncodedString(v, NULL, NULL);
557         Py_DECREF(temp);
558         if (v == NULL)
559             goto onError;
560     }
561 #endif
562     if (!PyString_Check(v)) {
563         PyErr_Format(PyExc_TypeError,
564                      "encoder did not return a string object (type=%.400s)",
565                      Py_TYPE(v)->tp_name);
566         Py_DECREF(v);
567         goto onError;
568     }
569 
570     return v;
571 
572  onError:
573     return NULL;
574 }
575 
576 static void
string_dealloc(PyObject * op)577 string_dealloc(PyObject *op)
578 {
579     switch (PyString_CHECK_INTERNED(op)) {
580         case SSTATE_NOT_INTERNED:
581             break;
582 
583         case SSTATE_INTERNED_MORTAL:
584             /* revive dead object temporarily for DelItem */
585             Py_REFCNT(op) = 3;
586             if (PyDict_DelItem(interned, op) != 0)
587                 Py_FatalError(
588                     "deletion of interned string failed");
589             break;
590 
591         case SSTATE_INTERNED_IMMORTAL:
592             Py_FatalError("Immortal interned string died.");
593 
594         default:
595             Py_FatalError("Inconsistent interned string state.");
596     }
597     Py_TYPE(op)->tp_free(op);
598 }
599 
600 /* Unescape a backslash-escaped string. If unicode is non-zero,
601    the string is a u-literal. If recode_encoding is non-zero,
602    the string is UTF-8 encoded and should be re-encoded in the
603    specified encoding.  */
604 
PyString_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)605 PyObject *PyString_DecodeEscape(const char *s,
606                                 Py_ssize_t len,
607                                 const char *errors,
608                                 Py_ssize_t unicode,
609                                 const char *recode_encoding)
610 {
611     int c;
612     char *p, *buf;
613     const char *end;
614     PyObject *v;
615     Py_ssize_t newlen;
616     /* Check for integer overflow */
617     if (recode_encoding && (len > PY_SSIZE_T_MAX / 4)) {
618         PyErr_SetString(PyExc_OverflowError, "string is too large");
619         return NULL;
620     }
621     newlen = recode_encoding ? 4*len:len;
622     v = PyString_FromStringAndSize((char *)NULL, newlen);
623     if (v == NULL)
624         return NULL;
625     p = buf = PyString_AsString(v);
626     end = s + len;
627     while (s < end) {
628         if (*s != '\\') {
629           non_esc:
630 #ifdef Py_USING_UNICODE
631             if (recode_encoding && (*s & 0x80)) {
632                 PyObject *u, *w;
633                 char *r;
634                 const char* t;
635                 Py_ssize_t rn;
636                 t = s;
637                 /* Decode non-ASCII bytes as UTF-8. */
638                 while (t < end && (*t & 0x80)) t++;
639                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
640                 if(!u) goto failed;
641 
642                 /* Recode them in target encoding. */
643                 w = PyUnicode_AsEncodedString(
644                     u, recode_encoding, errors);
645                 Py_DECREF(u);
646                 if (!w)                 goto failed;
647 
648                 /* Append bytes to output buffer. */
649                 assert(PyString_Check(w));
650                 r = PyString_AS_STRING(w);
651                 rn = PyString_GET_SIZE(w);
652                 Py_MEMCPY(p, r, rn);
653                 p += rn;
654                 Py_DECREF(w);
655                 s = t;
656             } else {
657                 *p++ = *s++;
658             }
659 #else
660             *p++ = *s++;
661 #endif
662             continue;
663         }
664         s++;
665         if (s==end) {
666             PyErr_SetString(PyExc_ValueError,
667                             "Trailing \\ in string");
668             goto failed;
669         }
670         switch (*s++) {
671         /* XXX This assumes ASCII! */
672         case '\n': break;
673         case '\\': *p++ = '\\'; break;
674         case '\'': *p++ = '\''; break;
675         case '\"': *p++ = '\"'; break;
676         case 'b': *p++ = '\b'; break;
677         case 'f': *p++ = '\014'; break; /* FF */
678         case 't': *p++ = '\t'; break;
679         case 'n': *p++ = '\n'; break;
680         case 'r': *p++ = '\r'; break;
681         case 'v': *p++ = '\013'; break; /* VT */
682         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
683         case '0': case '1': case '2': case '3':
684         case '4': case '5': case '6': case '7':
685             c = s[-1] - '0';
686             if (s < end && '0' <= *s && *s <= '7') {
687                 c = (c<<3) + *s++ - '0';
688                 if (s < end && '0' <= *s && *s <= '7')
689                     c = (c<<3) + *s++ - '0';
690             }
691             *p++ = c;
692             break;
693         case 'x':
694             if (s+1 < end &&
695                 isxdigit(Py_CHARMASK(s[0])) &&
696                 isxdigit(Py_CHARMASK(s[1])))
697             {
698                 unsigned int x = 0;
699                 c = Py_CHARMASK(*s);
700                 s++;
701                 if (isdigit(c))
702                     x = c - '0';
703                 else if (islower(c))
704                     x = 10 + c - 'a';
705                 else
706                     x = 10 + c - 'A';
707                 x = x << 4;
708                 c = Py_CHARMASK(*s);
709                 s++;
710                 if (isdigit(c))
711                     x += c - '0';
712                 else if (islower(c))
713                     x += 10 + c - 'a';
714                 else
715                     x += 10 + c - 'A';
716                 *p++ = x;
717                 break;
718             }
719             if (!errors || strcmp(errors, "strict") == 0) {
720                 PyErr_SetString(PyExc_ValueError,
721                                 "invalid \\x escape");
722                 goto failed;
723             }
724             if (strcmp(errors, "replace") == 0) {
725                 *p++ = '?';
726             } else if (strcmp(errors, "ignore") == 0)
727                 /* do nothing */;
728             else {
729                 PyErr_Format(PyExc_ValueError,
730                              "decoding error; "
731                              "unknown error handling code: %.400s",
732                              errors);
733                 goto failed;
734             }
735             /* skip \x */
736             if (s < end && isxdigit(Py_CHARMASK(s[0])))
737                 s++; /* and a hexdigit */
738             break;
739 #ifndef Py_USING_UNICODE
740         case 'u':
741         case 'U':
742         case 'N':
743             if (unicode) {
744                 PyErr_SetString(PyExc_ValueError,
745                           "Unicode escapes not legal "
746                           "when Unicode disabled");
747                 goto failed;
748             }
749 #endif
750         default:
751             *p++ = '\\';
752             s--;
753             goto non_esc; /* an arbitrary number of unescaped
754                              UTF-8 bytes may follow. */
755         }
756     }
757     if (p-buf < newlen)
758         _PyString_Resize(&v, p - buf); /* v is cleared on error */
759     return v;
760   failed:
761     Py_DECREF(v);
762     return NULL;
763 }
764 
765 /* -------------------------------------------------------------------- */
766 /* object api */
767 
768 static Py_ssize_t
string_getsize(register PyObject * op)769 string_getsize(register PyObject *op)
770 {
771     char *s;
772     Py_ssize_t len;
773     if (PyString_AsStringAndSize(op, &s, &len))
774         return -1;
775     return len;
776 }
777 
778 static /*const*/ char *
string_getbuffer(register PyObject * op)779 string_getbuffer(register PyObject *op)
780 {
781     char *s;
782     Py_ssize_t len;
783     if (PyString_AsStringAndSize(op, &s, &len))
784         return NULL;
785     return s;
786 }
787 
788 Py_ssize_t
PyString_Size(register PyObject * op)789 PyString_Size(register PyObject *op)
790 {
791     if (!PyString_Check(op))
792         return string_getsize(op);
793     return Py_SIZE(op);
794 }
795 
796 /*const*/ char *
PyString_AsString(register PyObject * op)797 PyString_AsString(register PyObject *op)
798 {
799     if (!PyString_Check(op))
800         return string_getbuffer(op);
801     return ((PyStringObject *)op) -> ob_sval;
802 }
803 
804 int
PyString_AsStringAndSize(register PyObject * obj,register char ** s,register Py_ssize_t * len)805 PyString_AsStringAndSize(register PyObject *obj,
806                          register char **s,
807                          register Py_ssize_t *len)
808 {
809     if (s == NULL) {
810         PyErr_BadInternalCall();
811         return -1;
812     }
813 
814     if (!PyString_Check(obj)) {
815 #ifdef Py_USING_UNICODE
816         if (PyUnicode_Check(obj)) {
817             obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
818             if (obj == NULL)
819                 return -1;
820         }
821         else
822 #endif
823         {
824             PyErr_Format(PyExc_TypeError,
825                          "expected string or Unicode object, "
826                          "%.200s found", Py_TYPE(obj)->tp_name);
827             return -1;
828         }
829     }
830 
831     *s = PyString_AS_STRING(obj);
832     if (len != NULL)
833         *len = PyString_GET_SIZE(obj);
834     else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
835         PyErr_SetString(PyExc_TypeError,
836                         "expected string without null bytes");
837         return -1;
838     }
839     return 0;
840 }
841 
842 /* -------------------------------------------------------------------- */
843 /* Methods */
844 
845 #include "stringlib/stringdefs.h"
846 #include "stringlib/fastsearch.h"
847 
848 #include "stringlib/count.h"
849 #include "stringlib/find.h"
850 #include "stringlib/partition.h"
851 #include "stringlib/split.h"
852 
853 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
854 #include "stringlib/localeutil.h"
855 
856 
857 
858 static int
string_print(PyStringObject * op,FILE * fp,int flags)859 string_print(PyStringObject *op, FILE *fp, int flags)
860 {
861     Py_ssize_t i, str_len;
862     char c;
863     int quote;
864 
865     /* XXX Ought to check for interrupts when writing long strings */
866     if (! PyString_CheckExact(op)) {
867         int ret;
868         /* A str subclass may have its own __str__ method. */
869         op = (PyStringObject *) PyObject_Str((PyObject *)op);
870         if (op == NULL)
871             return -1;
872         ret = string_print(op, fp, flags);
873         Py_DECREF(op);
874         return ret;
875     }
876     if (flags & Py_PRINT_RAW) {
877         char *data = op->ob_sval;
878         Py_ssize_t size = Py_SIZE(op);
879         Py_BEGIN_ALLOW_THREADS
880         while (size > INT_MAX) {
881             /* Very long strings cannot be written atomically.
882              * But don't write exactly INT_MAX bytes at a time
883              * to avoid memory aligment issues.
884              */
885             const int chunk_size = INT_MAX & ~0x3FFF;
886             fwrite(data, 1, chunk_size, fp);
887             data += chunk_size;
888             size -= chunk_size;
889         }
890 #ifdef __VMS
891         if (size) fwrite(data, (size_t)size, 1, fp);
892 #else
893         fwrite(data, 1, (size_t)size, fp);
894 #endif
895         Py_END_ALLOW_THREADS
896         return 0;
897     }
898 
899     /* figure out which quote to use; single is preferred */
900     quote = '\'';
901     if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
902         !memchr(op->ob_sval, '"', Py_SIZE(op)))
903         quote = '"';
904 
905     str_len = Py_SIZE(op);
906     Py_BEGIN_ALLOW_THREADS
907     fputc(quote, fp);
908     for (i = 0; i < str_len; i++) {
909         /* Since strings are immutable and the caller should have a
910         reference, accessing the internal buffer should not be an issue
911         with the GIL released. */
912         c = op->ob_sval[i];
913         if (c == quote || c == '\\')
914             fprintf(fp, "\\%c", c);
915         else if (c == '\t')
916             fprintf(fp, "\\t");
917         else if (c == '\n')
918             fprintf(fp, "\\n");
919         else if (c == '\r')
920             fprintf(fp, "\\r");
921         else if (c < ' ' || c >= 0x7f)
922             fprintf(fp, "\\x%02x", c & 0xff);
923         else
924             fputc(c, fp);
925     }
926     fputc(quote, fp);
927     Py_END_ALLOW_THREADS
928     return 0;
929 }
930 
931 PyObject *
PyString_Repr(PyObject * obj,int smartquotes)932 PyString_Repr(PyObject *obj, int smartquotes)
933 {
934     register PyStringObject* op = (PyStringObject*) obj;
935     size_t newsize;
936     PyObject *v;
937     if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
938         PyErr_SetString(PyExc_OverflowError,
939             "string is too large to make repr");
940         return NULL;
941     }
942     newsize = 2 + 4*Py_SIZE(op);
943     v = PyString_FromStringAndSize((char *)NULL, newsize);
944     if (v == NULL) {
945         return NULL;
946     }
947     else {
948         register Py_ssize_t i;
949         register char c;
950         register char *p;
951         int quote;
952 
953         /* figure out which quote to use; single is preferred */
954         quote = '\'';
955         if (smartquotes &&
956             memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
957             !memchr(op->ob_sval, '"', Py_SIZE(op)))
958             quote = '"';
959 
960         p = PyString_AS_STRING(v);
961         *p++ = quote;
962         for (i = 0; i < Py_SIZE(op); i++) {
963             /* There's at least enough room for a hex escape
964                and a closing quote. */
965             assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
966             c = op->ob_sval[i];
967             if (c == quote || c == '\\')
968                 *p++ = '\\', *p++ = c;
969             else if (c == '\t')
970                 *p++ = '\\', *p++ = 't';
971             else if (c == '\n')
972                 *p++ = '\\', *p++ = 'n';
973             else if (c == '\r')
974                 *p++ = '\\', *p++ = 'r';
975             else if (c < ' ' || c >= 0x7f) {
976                 /* For performance, we don't want to call
977                    PyOS_snprintf here (extra layers of
978                    function call). */
979                 sprintf(p, "\\x%02x", c & 0xff);
980                 p += 4;
981             }
982             else
983                 *p++ = c;
984         }
985         assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
986         *p++ = quote;
987         *p = '\0';
988         if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
989             return NULL;
990         return v;
991     }
992 }
993 
994 static PyObject *
string_repr(PyObject * op)995 string_repr(PyObject *op)
996 {
997     return PyString_Repr(op, 1);
998 }
999 
1000 static PyObject *
string_str(PyObject * s)1001 string_str(PyObject *s)
1002 {
1003     assert(PyString_Check(s));
1004     if (PyString_CheckExact(s)) {
1005         Py_INCREF(s);
1006         return s;
1007     }
1008     else {
1009         /* Subtype -- return genuine string with the same value. */
1010         PyStringObject *t = (PyStringObject *) s;
1011         return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1012     }
1013 }
1014 
1015 static Py_ssize_t
string_length(PyStringObject * a)1016 string_length(PyStringObject *a)
1017 {
1018     return Py_SIZE(a);
1019 }
1020 
1021 static PyObject *
string_concat(register PyStringObject * a,register PyObject * bb)1022 string_concat(register PyStringObject *a, register PyObject *bb)
1023 {
1024     register Py_ssize_t size;
1025     register PyStringObject *op;
1026     if (!PyString_Check(bb)) {
1027 #ifdef Py_USING_UNICODE
1028         if (PyUnicode_Check(bb))
1029             return PyUnicode_Concat((PyObject *)a, bb);
1030 #endif
1031         if (PyByteArray_Check(bb))
1032             return PyByteArray_Concat((PyObject *)a, bb);
1033         PyErr_Format(PyExc_TypeError,
1034                      "cannot concatenate 'str' and '%.200s' objects",
1035                      Py_TYPE(bb)->tp_name);
1036         return NULL;
1037     }
1038 #define b ((PyStringObject *)bb)
1039     /* Optimize cases with empty left or right operand */
1040     if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1041         PyString_CheckExact(a) && PyString_CheckExact(b)) {
1042         if (Py_SIZE(a) == 0) {
1043             Py_INCREF(bb);
1044             return bb;
1045         }
1046         Py_INCREF(a);
1047         return (PyObject *)a;
1048     }
1049     /* Check that string sizes are not negative, to prevent an
1050        overflow in cases where we are passed incorrectly-created
1051        strings with negative lengths (due to a bug in other code).
1052     */
1053     if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1054         Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1055         PyErr_SetString(PyExc_OverflowError,
1056                         "strings are too large to concat");
1057         return NULL;
1058     }
1059     size = Py_SIZE(a) + Py_SIZE(b);
1060 
1061     /* Inline PyObject_NewVar */
1062     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1063         PyErr_SetString(PyExc_OverflowError,
1064                         "strings are too large to concat");
1065         return NULL;
1066     }
1067     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1068     if (op == NULL)
1069         return PyErr_NoMemory();
1070     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1071     op->ob_shash = -1;
1072     op->ob_sstate = SSTATE_NOT_INTERNED;
1073     Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1074     Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1075     op->ob_sval[size] = '\0';
1076     return (PyObject *) op;
1077 #undef b
1078 }
1079 
1080 static PyObject *
string_repeat(register PyStringObject * a,register Py_ssize_t n)1081 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1082 {
1083     register Py_ssize_t i;
1084     register Py_ssize_t j;
1085     register Py_ssize_t size;
1086     register PyStringObject *op;
1087     size_t nbytes;
1088     if (n < 0)
1089         n = 0;
1090     /* watch out for overflows:  the size can overflow Py_ssize_t,
1091      * and the # of bytes needed can overflow size_t
1092      */
1093     if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1094         PyErr_SetString(PyExc_OverflowError,
1095             "repeated string is too long");
1096         return NULL;
1097     }
1098     size = Py_SIZE(a) * n;
1099     if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1100         Py_INCREF(a);
1101         return (PyObject *)a;
1102     }
1103     nbytes = (size_t)size;
1104     if (nbytes + PyStringObject_SIZE <= nbytes) {
1105         PyErr_SetString(PyExc_OverflowError,
1106             "repeated string is too long");
1107         return NULL;
1108     }
1109     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1110     if (op == NULL)
1111         return PyErr_NoMemory();
1112     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1113     op->ob_shash = -1;
1114     op->ob_sstate = SSTATE_NOT_INTERNED;
1115     op->ob_sval[size] = '\0';
1116     if (Py_SIZE(a) == 1 && n > 0) {
1117         memset(op->ob_sval, a->ob_sval[0] , n);
1118         return (PyObject *) op;
1119     }
1120     i = 0;
1121     if (i < size) {
1122         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1123         i = Py_SIZE(a);
1124     }
1125     while (i < size) {
1126         j = (i <= size-i)  ?  i  :  size-i;
1127         Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1128         i += j;
1129     }
1130     return (PyObject *) op;
1131 }
1132 
1133 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1134 
1135 static PyObject *
string_slice(register PyStringObject * a,register Py_ssize_t i,register Py_ssize_t j)1136 string_slice(register PyStringObject *a, register Py_ssize_t i,
1137              register Py_ssize_t j)
1138      /* j -- may be negative! */
1139 {
1140     if (i < 0)
1141         i = 0;
1142     if (j < 0)
1143         j = 0; /* Avoid signed/unsigned bug in next line */
1144     if (j > Py_SIZE(a))
1145         j = Py_SIZE(a);
1146     if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1147         /* It's the same as a */
1148         Py_INCREF(a);
1149         return (PyObject *)a;
1150     }
1151     if (j < i)
1152         j = i;
1153     return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1154 }
1155 
1156 static int
string_contains(PyObject * str_obj,PyObject * sub_obj)1157 string_contains(PyObject *str_obj, PyObject *sub_obj)
1158 {
1159     if (!PyString_CheckExact(sub_obj)) {
1160 #ifdef Py_USING_UNICODE
1161         if (PyUnicode_Check(sub_obj))
1162             return PyUnicode_Contains(str_obj, sub_obj);
1163 #endif
1164         if (!PyString_Check(sub_obj)) {
1165             PyErr_Format(PyExc_TypeError,
1166                 "'in <string>' requires string as left operand, "
1167                 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1168             return -1;
1169         }
1170     }
1171 
1172     return stringlib_contains_obj(str_obj, sub_obj);
1173 }
1174 
1175 static PyObject *
string_item(PyStringObject * a,register Py_ssize_t i)1176 string_item(PyStringObject *a, register Py_ssize_t i)
1177 {
1178     char pchar;
1179     PyObject *v;
1180     if (i < 0 || i >= Py_SIZE(a)) {
1181         PyErr_SetString(PyExc_IndexError, "string index out of range");
1182         return NULL;
1183     }
1184     pchar = a->ob_sval[i];
1185     v = (PyObject *)characters[pchar & UCHAR_MAX];
1186     if (v == NULL)
1187         v = PyString_FromStringAndSize(&pchar, 1);
1188     else {
1189 #ifdef COUNT_ALLOCS
1190         one_strings++;
1191 #endif
1192         Py_INCREF(v);
1193     }
1194     return v;
1195 }
1196 
1197 static PyObject*
string_richcompare(PyStringObject * a,PyStringObject * b,int op)1198 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1199 {
1200     int c;
1201     Py_ssize_t len_a, len_b;
1202     Py_ssize_t min_len;
1203     PyObject *result;
1204 
1205     /* Make sure both arguments are strings. */
1206     if (!(PyString_Check(a) && PyString_Check(b))) {
1207         result = Py_NotImplemented;
1208         goto out;
1209     }
1210     if (a == b) {
1211         switch (op) {
1212         case Py_EQ:case Py_LE:case Py_GE:
1213             result = Py_True;
1214             goto out;
1215         case Py_NE:case Py_LT:case Py_GT:
1216             result = Py_False;
1217             goto out;
1218         }
1219     }
1220     if (op == Py_EQ) {
1221         /* Supporting Py_NE here as well does not save
1222            much time, since Py_NE is rarely used.  */
1223         if (Py_SIZE(a) == Py_SIZE(b)
1224             && (a->ob_sval[0] == b->ob_sval[0]
1225             && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1226             result = Py_True;
1227         } else {
1228             result = Py_False;
1229         }
1230         goto out;
1231     }
1232     len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1233     min_len = (len_a < len_b) ? len_a : len_b;
1234     if (min_len > 0) {
1235         c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1236         if (c==0)
1237             c = memcmp(a->ob_sval, b->ob_sval, min_len);
1238     } else
1239         c = 0;
1240     if (c == 0)
1241         c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1242     switch (op) {
1243     case Py_LT: c = c <  0; break;
1244     case Py_LE: c = c <= 0; break;
1245     case Py_EQ: assert(0);  break; /* unreachable */
1246     case Py_NE: c = c != 0; break;
1247     case Py_GT: c = c >  0; break;
1248     case Py_GE: c = c >= 0; break;
1249     default:
1250         result = Py_NotImplemented;
1251         goto out;
1252     }
1253     result = c ? Py_True : Py_False;
1254   out:
1255     Py_INCREF(result);
1256     return result;
1257 }
1258 
1259 int
_PyString_Eq(PyObject * o1,PyObject * o2)1260 _PyString_Eq(PyObject *o1, PyObject *o2)
1261 {
1262     PyStringObject *a = (PyStringObject*) o1;
1263     PyStringObject *b = (PyStringObject*) o2;
1264     return Py_SIZE(a) == Py_SIZE(b)
1265       && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1266 }
1267 
1268 static long
string_hash(PyStringObject * a)1269 string_hash(PyStringObject *a)
1270 {
1271     register Py_ssize_t len;
1272     register unsigned char *p;
1273     register long x;
1274 
1275 #ifdef Py_DEBUG
1276     assert(_Py_HashSecret_Initialized);
1277 #endif
1278     if (a->ob_shash != -1)
1279         return a->ob_shash;
1280     len = Py_SIZE(a);
1281     /*
1282       We make the hash of the empty string be 0, rather than using
1283       (prefix ^ suffix), since this slightly obfuscates the hash secret
1284     */
1285     if (len == 0) {
1286         a->ob_shash = 0;
1287         return 0;
1288     }
1289     p = (unsigned char *) a->ob_sval;
1290     x = _Py_HashSecret.prefix;
1291     x ^= *p << 7;
1292     while (--len >= 0)
1293         x = (1000003*x) ^ *p++;
1294     x ^= Py_SIZE(a);
1295     x ^= _Py_HashSecret.suffix;
1296     if (x == -1)
1297         x = -2;
1298     a->ob_shash = x;
1299     return x;
1300 }
1301 
1302 static PyObject*
string_subscript(PyStringObject * self,PyObject * item)1303 string_subscript(PyStringObject* self, PyObject* item)
1304 {
1305     if (PyIndex_Check(item)) {
1306         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1307         if (i == -1 && PyErr_Occurred())
1308             return NULL;
1309         if (i < 0)
1310             i += PyString_GET_SIZE(self);
1311         return string_item(self, i);
1312     }
1313     else if (PySlice_Check(item)) {
1314         Py_ssize_t start, stop, step, slicelength, cur, i;
1315         char* source_buf;
1316         char* result_buf;
1317         PyObject* result;
1318 
1319         if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
1320             return NULL;
1321         }
1322         slicelength = _PySlice_AdjustIndices(PyString_GET_SIZE(self), &start,
1323                                             &stop, step);
1324 
1325         if (slicelength <= 0) {
1326             return PyString_FromStringAndSize("", 0);
1327         }
1328         else if (start == 0 && step == 1 &&
1329                  slicelength == PyString_GET_SIZE(self) &&
1330                  PyString_CheckExact(self)) {
1331             Py_INCREF(self);
1332             return (PyObject *)self;
1333         }
1334         else if (step == 1) {
1335             return PyString_FromStringAndSize(
1336                 PyString_AS_STRING(self) + start,
1337                 slicelength);
1338         }
1339         else {
1340             source_buf = PyString_AsString((PyObject*)self);
1341             result_buf = (char *)PyMem_Malloc(slicelength);
1342             if (result_buf == NULL)
1343                 return PyErr_NoMemory();
1344 
1345             for (cur = start, i = 0; i < slicelength;
1346                  cur += step, i++) {
1347                 result_buf[i] = source_buf[cur];
1348             }
1349 
1350             result = PyString_FromStringAndSize(result_buf,
1351                                                 slicelength);
1352             PyMem_Free(result_buf);
1353             return result;
1354         }
1355     }
1356     else {
1357         PyErr_Format(PyExc_TypeError,
1358                      "string indices must be integers, not %.200s",
1359                      Py_TYPE(item)->tp_name);
1360         return NULL;
1361     }
1362 }
1363 
1364 static Py_ssize_t
string_buffer_getreadbuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1365 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1366 {
1367     if ( index != 0 ) {
1368         PyErr_SetString(PyExc_SystemError,
1369                         "accessing non-existent string segment");
1370         return -1;
1371     }
1372     *ptr = (void *)self->ob_sval;
1373     return Py_SIZE(self);
1374 }
1375 
1376 static Py_ssize_t
string_buffer_getwritebuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1377 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1378 {
1379     PyErr_SetString(PyExc_TypeError,
1380                     "Cannot use string as modifiable buffer");
1381     return -1;
1382 }
1383 
1384 static Py_ssize_t
string_buffer_getsegcount(PyStringObject * self,Py_ssize_t * lenp)1385 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1386 {
1387     if ( lenp )
1388         *lenp = Py_SIZE(self);
1389     return 1;
1390 }
1391 
1392 static Py_ssize_t
string_buffer_getcharbuf(PyStringObject * self,Py_ssize_t index,const char ** ptr)1393 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1394 {
1395     if ( index != 0 ) {
1396         PyErr_SetString(PyExc_SystemError,
1397                         "accessing non-existent string segment");
1398         return -1;
1399     }
1400     *ptr = self->ob_sval;
1401     return Py_SIZE(self);
1402 }
1403 
1404 static int
string_buffer_getbuffer(PyStringObject * self,Py_buffer * view,int flags)1405 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1406 {
1407     return PyBuffer_FillInfo(view, (PyObject*)self,
1408                              (void *)self->ob_sval, Py_SIZE(self),
1409                              1, flags);
1410 }
1411 
1412 static PySequenceMethods string_as_sequence = {
1413     (lenfunc)string_length, /*sq_length*/
1414     (binaryfunc)string_concat, /*sq_concat*/
1415     (ssizeargfunc)string_repeat, /*sq_repeat*/
1416     (ssizeargfunc)string_item, /*sq_item*/
1417     (ssizessizeargfunc)string_slice, /*sq_slice*/
1418     0,                  /*sq_ass_item*/
1419     0,                  /*sq_ass_slice*/
1420     (objobjproc)string_contains /*sq_contains*/
1421 };
1422 
1423 static PyMappingMethods string_as_mapping = {
1424     (lenfunc)string_length,
1425     (binaryfunc)string_subscript,
1426     0,
1427 };
1428 
1429 static PyBufferProcs string_as_buffer = {
1430     (readbufferproc)string_buffer_getreadbuf,
1431     (writebufferproc)string_buffer_getwritebuf,
1432     (segcountproc)string_buffer_getsegcount,
1433     (charbufferproc)string_buffer_getcharbuf,
1434     (getbufferproc)string_buffer_getbuffer,
1435     0, /* XXX */
1436 };
1437 
1438 
1439 
1440 #define LEFTSTRIP 0
1441 #define RIGHTSTRIP 1
1442 #define BOTHSTRIP 2
1443 
1444 /* Arrays indexed by above */
1445 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1446 
1447 #define STRIPNAME(i) (stripformat[i]+3)
1448 
1449 PyDoc_STRVAR(split__doc__,
1450 "S.split([sep [,maxsplit]]) -> list of strings\n\
1451 \n\
1452 Return a list of the words in the string S, using sep as the\n\
1453 delimiter string.  If maxsplit is given, at most maxsplit\n\
1454 splits are done. If sep is not specified or is None, any\n\
1455 whitespace string is a separator and empty strings are removed\n\
1456 from the result.");
1457 
1458 static PyObject *
string_split(PyStringObject * self,PyObject * args)1459 string_split(PyStringObject *self, PyObject *args)
1460 {
1461     Py_ssize_t len = PyString_GET_SIZE(self), n;
1462     Py_ssize_t maxsplit = -1;
1463     const char *s = PyString_AS_STRING(self), *sub;
1464     PyObject *subobj = Py_None;
1465 
1466     if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1467         return NULL;
1468     if (maxsplit < 0)
1469         maxsplit = PY_SSIZE_T_MAX;
1470     if (subobj == Py_None)
1471         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1472     if (PyString_Check(subobj)) {
1473         sub = PyString_AS_STRING(subobj);
1474         n = PyString_GET_SIZE(subobj);
1475     }
1476 #ifdef Py_USING_UNICODE
1477     else if (PyUnicode_Check(subobj))
1478         return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1479 #endif
1480     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1481         return NULL;
1482 
1483     return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1484 }
1485 
1486 PyDoc_STRVAR(partition__doc__,
1487 "S.partition(sep) -> (head, sep, tail)\n\
1488 \n\
1489 Search for the separator sep in S, and return the part before it,\n\
1490 the separator itself, and the part after it.  If the separator is not\n\
1491 found, return S and two empty strings.");
1492 
1493 static PyObject *
string_partition(PyStringObject * self,PyObject * sep_obj)1494 string_partition(PyStringObject *self, PyObject *sep_obj)
1495 {
1496     const char *sep;
1497     Py_ssize_t sep_len;
1498 
1499     if (PyString_Check(sep_obj)) {
1500         sep = PyString_AS_STRING(sep_obj);
1501         sep_len = PyString_GET_SIZE(sep_obj);
1502     }
1503 #ifdef Py_USING_UNICODE
1504     else if (PyUnicode_Check(sep_obj))
1505         return PyUnicode_Partition((PyObject *) self, sep_obj);
1506 #endif
1507     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1508         return NULL;
1509 
1510     return stringlib_partition(
1511         (PyObject*) self,
1512         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1513         sep_obj, sep, sep_len
1514         );
1515 }
1516 
1517 PyDoc_STRVAR(rpartition__doc__,
1518 "S.rpartition(sep) -> (head, sep, tail)\n\
1519 \n\
1520 Search for the separator sep in S, starting at the end of S, and return\n\
1521 the part before it, the separator itself, and the part after it.  If the\n\
1522 separator is not found, return two empty strings and S.");
1523 
1524 static PyObject *
string_rpartition(PyStringObject * self,PyObject * sep_obj)1525 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1526 {
1527     const char *sep;
1528     Py_ssize_t sep_len;
1529 
1530     if (PyString_Check(sep_obj)) {
1531         sep = PyString_AS_STRING(sep_obj);
1532         sep_len = PyString_GET_SIZE(sep_obj);
1533     }
1534 #ifdef Py_USING_UNICODE
1535     else if (PyUnicode_Check(sep_obj))
1536         return PyUnicode_RPartition((PyObject *) self, sep_obj);
1537 #endif
1538     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1539         return NULL;
1540 
1541     return stringlib_rpartition(
1542         (PyObject*) self,
1543         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1544         sep_obj, sep, sep_len
1545         );
1546 }
1547 
1548 PyDoc_STRVAR(rsplit__doc__,
1549 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1550 \n\
1551 Return a list of the words in the string S, using sep as the\n\
1552 delimiter string, starting at the end of the string and working\n\
1553 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1554 done. If sep is not specified or is None, any whitespace string\n\
1555 is a separator.");
1556 
1557 static PyObject *
string_rsplit(PyStringObject * self,PyObject * args)1558 string_rsplit(PyStringObject *self, PyObject *args)
1559 {
1560     Py_ssize_t len = PyString_GET_SIZE(self), n;
1561     Py_ssize_t maxsplit = -1;
1562     const char *s = PyString_AS_STRING(self), *sub;
1563     PyObject *subobj = Py_None;
1564 
1565     if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1566         return NULL;
1567     if (maxsplit < 0)
1568         maxsplit = PY_SSIZE_T_MAX;
1569     if (subobj == Py_None)
1570         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1571     if (PyString_Check(subobj)) {
1572         sub = PyString_AS_STRING(subobj);
1573         n = PyString_GET_SIZE(subobj);
1574     }
1575 #ifdef Py_USING_UNICODE
1576     else if (PyUnicode_Check(subobj))
1577         return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1578 #endif
1579     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1580         return NULL;
1581 
1582     return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1583 }
1584 
1585 
1586 PyDoc_STRVAR(join__doc__,
1587 "S.join(iterable) -> string\n\
1588 \n\
1589 Return a string which is the concatenation of the strings in the\n\
1590 iterable.  The separator between elements is S.");
1591 
1592 static PyObject *
string_join(PyStringObject * self,PyObject * orig)1593 string_join(PyStringObject *self, PyObject *orig)
1594 {
1595     char *sep = PyString_AS_STRING(self);
1596     const Py_ssize_t seplen = PyString_GET_SIZE(self);
1597     PyObject *res = NULL;
1598     char *p;
1599     Py_ssize_t seqlen = 0;
1600     size_t sz = 0;
1601     Py_ssize_t i;
1602     PyObject *seq, *item;
1603 
1604     seq = PySequence_Fast(orig, "can only join an iterable");
1605     if (seq == NULL) {
1606         return NULL;
1607     }
1608 
1609     seqlen = PySequence_Size(seq);
1610     if (seqlen == 0) {
1611         Py_DECREF(seq);
1612         return PyString_FromString("");
1613     }
1614     if (seqlen == 1) {
1615         item = PySequence_Fast_GET_ITEM(seq, 0);
1616         if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1617             Py_INCREF(item);
1618             Py_DECREF(seq);
1619             return item;
1620         }
1621     }
1622 
1623     /* There are at least two things to join, or else we have a subclass
1624      * of the builtin types in the sequence.
1625      * Do a pre-pass to figure out the total amount of space we'll
1626      * need (sz), see whether any argument is absurd, and defer to
1627      * the Unicode join if appropriate.
1628      */
1629     for (i = 0; i < seqlen; i++) {
1630         const size_t old_sz = sz;
1631         item = PySequence_Fast_GET_ITEM(seq, i);
1632         if (!PyString_Check(item)){
1633 #ifdef Py_USING_UNICODE
1634             if (PyUnicode_Check(item)) {
1635                 /* Defer to Unicode join.
1636                  * CAUTION:  There's no guarantee that the
1637                  * original sequence can be iterated over
1638                  * again, so we must pass seq here.
1639                  */
1640                 PyObject *result;
1641                 result = PyUnicode_Join((PyObject *)self, seq);
1642                 Py_DECREF(seq);
1643                 return result;
1644             }
1645 #endif
1646             PyErr_Format(PyExc_TypeError,
1647                          "sequence item %zd: expected string,"
1648                          " %.80s found",
1649                          i, Py_TYPE(item)->tp_name);
1650             Py_DECREF(seq);
1651             return NULL;
1652         }
1653         sz += PyString_GET_SIZE(item);
1654         if (i != 0)
1655             sz += seplen;
1656         if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1657             PyErr_SetString(PyExc_OverflowError,
1658                 "join() result is too long for a Python string");
1659             Py_DECREF(seq);
1660             return NULL;
1661         }
1662     }
1663 
1664     /* Allocate result space. */
1665     res = PyString_FromStringAndSize((char*)NULL, sz);
1666     if (res == NULL) {
1667         Py_DECREF(seq);
1668         return NULL;
1669     }
1670 
1671     /* Catenate everything. */
1672     p = PyString_AS_STRING(res);
1673     for (i = 0; i < seqlen; ++i) {
1674         size_t n;
1675         item = PySequence_Fast_GET_ITEM(seq, i);
1676         n = PyString_GET_SIZE(item);
1677         Py_MEMCPY(p, PyString_AS_STRING(item), n);
1678         p += n;
1679         if (i < seqlen - 1) {
1680             Py_MEMCPY(p, sep, seplen);
1681             p += seplen;
1682         }
1683     }
1684 
1685     Py_DECREF(seq);
1686     return res;
1687 }
1688 
1689 PyObject *
_PyString_Join(PyObject * sep,PyObject * x)1690 _PyString_Join(PyObject *sep, PyObject *x)
1691 {
1692     assert(sep != NULL && PyString_Check(sep));
1693     assert(x != NULL);
1694     return string_join((PyStringObject *)sep, x);
1695 }
1696 
1697 /* helper macro to fixup start/end slice values */
1698 #define ADJUST_INDICES(start, end, len)         \
1699     if (end > len)                          \
1700         end = len;                          \
1701     else if (end < 0) {                     \
1702         end += len;                         \
1703         if (end < 0)                        \
1704         end = 0;                        \
1705     }                                       \
1706     if (start < 0) {                        \
1707         start += len;                       \
1708         if (start < 0)                      \
1709         start = 0;                      \
1710     }
1711 
1712 Py_LOCAL_INLINE(Py_ssize_t)
string_find_internal(PyStringObject * self,PyObject * args,int dir)1713 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1714 {
1715     PyObject *subobj;
1716     const char *sub;
1717     Py_ssize_t sub_len;
1718     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1719 
1720     if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1721                                     args, &subobj, &start, &end))
1722         return -2;
1723 
1724     if (PyString_Check(subobj)) {
1725         sub = PyString_AS_STRING(subobj);
1726         sub_len = PyString_GET_SIZE(subobj);
1727     }
1728 #ifdef Py_USING_UNICODE
1729     else if (PyUnicode_Check(subobj))
1730         return PyUnicode_Find(
1731             (PyObject *)self, subobj, start, end, dir);
1732 #endif
1733     else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1734         /* XXX - the "expected a character buffer object" is pretty
1735            confusing for a non-expert.  remap to something else ? */
1736         return -2;
1737 
1738     if (dir > 0)
1739         return stringlib_find_slice(
1740             PyString_AS_STRING(self), PyString_GET_SIZE(self),
1741             sub, sub_len, start, end);
1742     else
1743         return stringlib_rfind_slice(
1744             PyString_AS_STRING(self), PyString_GET_SIZE(self),
1745             sub, sub_len, start, end);
1746 }
1747 
1748 
1749 PyDoc_STRVAR(find__doc__,
1750 "S.find(sub [,start [,end]]) -> int\n\
1751 \n\
1752 Return the lowest index in S where substring sub is found,\n\
1753 such that sub is contained within S[start:end].  Optional\n\
1754 arguments start and end are interpreted as in slice notation.\n\
1755 \n\
1756 Return -1 on failure.");
1757 
1758 static PyObject *
string_find(PyStringObject * self,PyObject * args)1759 string_find(PyStringObject *self, PyObject *args)
1760 {
1761     Py_ssize_t result = string_find_internal(self, args, +1);
1762     if (result == -2)
1763         return NULL;
1764     return PyInt_FromSsize_t(result);
1765 }
1766 
1767 
1768 PyDoc_STRVAR(index__doc__,
1769 "S.index(sub [,start [,end]]) -> int\n\
1770 \n\
1771 Like S.find() but raise ValueError when the substring is not found.");
1772 
1773 static PyObject *
string_index(PyStringObject * self,PyObject * args)1774 string_index(PyStringObject *self, PyObject *args)
1775 {
1776     Py_ssize_t result = string_find_internal(self, args, +1);
1777     if (result == -2)
1778         return NULL;
1779     if (result == -1) {
1780         PyErr_SetString(PyExc_ValueError,
1781                         "substring not found");
1782         return NULL;
1783     }
1784     return PyInt_FromSsize_t(result);
1785 }
1786 
1787 
1788 PyDoc_STRVAR(rfind__doc__,
1789 "S.rfind(sub [,start [,end]]) -> int\n\
1790 \n\
1791 Return the highest index in S where substring sub is found,\n\
1792 such that sub is contained within S[start:end].  Optional\n\
1793 arguments start and end are interpreted as in slice notation.\n\
1794 \n\
1795 Return -1 on failure.");
1796 
1797 static PyObject *
string_rfind(PyStringObject * self,PyObject * args)1798 string_rfind(PyStringObject *self, PyObject *args)
1799 {
1800     Py_ssize_t result = string_find_internal(self, args, -1);
1801     if (result == -2)
1802         return NULL;
1803     return PyInt_FromSsize_t(result);
1804 }
1805 
1806 
1807 PyDoc_STRVAR(rindex__doc__,
1808 "S.rindex(sub [,start [,end]]) -> int\n\
1809 \n\
1810 Like S.rfind() but raise ValueError when the substring is not found.");
1811 
1812 static PyObject *
string_rindex(PyStringObject * self,PyObject * args)1813 string_rindex(PyStringObject *self, PyObject *args)
1814 {
1815     Py_ssize_t result = string_find_internal(self, args, -1);
1816     if (result == -2)
1817         return NULL;
1818     if (result == -1) {
1819         PyErr_SetString(PyExc_ValueError,
1820                         "substring not found");
1821         return NULL;
1822     }
1823     return PyInt_FromSsize_t(result);
1824 }
1825 
1826 
1827 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyStringObject * self,int striptype,PyObject * sepobj)1828 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1829 {
1830     char *s = PyString_AS_STRING(self);
1831     Py_ssize_t len = PyString_GET_SIZE(self);
1832     char *sep = PyString_AS_STRING(sepobj);
1833     Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1834     Py_ssize_t i, j;
1835 
1836     i = 0;
1837     if (striptype != RIGHTSTRIP) {
1838         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1839             i++;
1840         }
1841     }
1842 
1843     j = len;
1844     if (striptype != LEFTSTRIP) {
1845         do {
1846             j--;
1847         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1848         j++;
1849     }
1850 
1851     if (i == 0 && j == len && PyString_CheckExact(self)) {
1852         Py_INCREF(self);
1853         return (PyObject*)self;
1854     }
1855     else
1856         return PyString_FromStringAndSize(s+i, j-i);
1857 }
1858 
1859 
1860 Py_LOCAL_INLINE(PyObject *)
do_strip(PyStringObject * self,int striptype)1861 do_strip(PyStringObject *self, int striptype)
1862 {
1863     char *s = PyString_AS_STRING(self);
1864     Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1865 
1866     i = 0;
1867     if (striptype != RIGHTSTRIP) {
1868         while (i < len && isspace(Py_CHARMASK(s[i]))) {
1869             i++;
1870         }
1871     }
1872 
1873     j = len;
1874     if (striptype != LEFTSTRIP) {
1875         do {
1876             j--;
1877         } while (j >= i && isspace(Py_CHARMASK(s[j])));
1878         j++;
1879     }
1880 
1881     if (i == 0 && j == len && PyString_CheckExact(self)) {
1882         Py_INCREF(self);
1883         return (PyObject*)self;
1884     }
1885     else
1886         return PyString_FromStringAndSize(s+i, j-i);
1887 }
1888 
1889 
1890 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyStringObject * self,int striptype,PyObject * args)1891 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1892 {
1893     PyObject *sep = NULL;
1894 
1895     if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1896         return NULL;
1897 
1898     if (sep != NULL && sep != Py_None) {
1899         if (PyString_Check(sep))
1900             return do_xstrip(self, striptype, sep);
1901 #ifdef Py_USING_UNICODE
1902         else if (PyUnicode_Check(sep)) {
1903             PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1904             PyObject *res;
1905             if (uniself==NULL)
1906                 return NULL;
1907             res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1908                 striptype, sep);
1909             Py_DECREF(uniself);
1910             return res;
1911         }
1912 #endif
1913         PyErr_Format(PyExc_TypeError,
1914 #ifdef Py_USING_UNICODE
1915                      "%s arg must be None, str or unicode",
1916 #else
1917                      "%s arg must be None or str",
1918 #endif
1919                      STRIPNAME(striptype));
1920         return NULL;
1921     }
1922 
1923     return do_strip(self, striptype);
1924 }
1925 
1926 
1927 PyDoc_STRVAR(strip__doc__,
1928 "S.strip([chars]) -> string or unicode\n\
1929 \n\
1930 Return a copy of the string S with leading and trailing\n\
1931 whitespace removed.\n\
1932 If chars is given and not None, remove characters in chars instead.\n\
1933 If chars is unicode, S will be converted to unicode before stripping");
1934 
1935 static PyObject *
string_strip(PyStringObject * self,PyObject * args)1936 string_strip(PyStringObject *self, PyObject *args)
1937 {
1938     if (PyTuple_GET_SIZE(args) == 0)
1939         return do_strip(self, BOTHSTRIP); /* Common case */
1940     else
1941         return do_argstrip(self, BOTHSTRIP, args);
1942 }
1943 
1944 
1945 PyDoc_STRVAR(lstrip__doc__,
1946 "S.lstrip([chars]) -> string or unicode\n\
1947 \n\
1948 Return a copy of the string S with leading whitespace removed.\n\
1949 If chars is given and not None, remove characters in chars instead.\n\
1950 If chars is unicode, S will be converted to unicode before stripping");
1951 
1952 static PyObject *
string_lstrip(PyStringObject * self,PyObject * args)1953 string_lstrip(PyStringObject *self, PyObject *args)
1954 {
1955     if (PyTuple_GET_SIZE(args) == 0)
1956         return do_strip(self, LEFTSTRIP); /* Common case */
1957     else
1958         return do_argstrip(self, LEFTSTRIP, args);
1959 }
1960 
1961 
1962 PyDoc_STRVAR(rstrip__doc__,
1963 "S.rstrip([chars]) -> string or unicode\n\
1964 \n\
1965 Return a copy of the string S with trailing whitespace removed.\n\
1966 If chars is given and not None, remove characters in chars instead.\n\
1967 If chars is unicode, S will be converted to unicode before stripping");
1968 
1969 static PyObject *
string_rstrip(PyStringObject * self,PyObject * args)1970 string_rstrip(PyStringObject *self, PyObject *args)
1971 {
1972     if (PyTuple_GET_SIZE(args) == 0)
1973         return do_strip(self, RIGHTSTRIP); /* Common case */
1974     else
1975         return do_argstrip(self, RIGHTSTRIP, args);
1976 }
1977 
1978 
1979 PyDoc_STRVAR(lower__doc__,
1980 "S.lower() -> string\n\
1981 \n\
1982 Return a copy of the string S converted to lowercase.");
1983 
1984 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1985 #ifndef _tolower
1986 #define _tolower tolower
1987 #endif
1988 
1989 static PyObject *
string_lower(PyStringObject * self)1990 string_lower(PyStringObject *self)
1991 {
1992     char *s;
1993     Py_ssize_t i, n = PyString_GET_SIZE(self);
1994     PyObject *newobj;
1995 
1996     newobj = PyString_FromStringAndSize(NULL, n);
1997     if (!newobj)
1998         return NULL;
1999 
2000     s = PyString_AS_STRING(newobj);
2001 
2002     Py_MEMCPY(s, PyString_AS_STRING(self), n);
2003 
2004     for (i = 0; i < n; i++) {
2005         int c = Py_CHARMASK(s[i]);
2006         if (isupper(c))
2007             s[i] = _tolower(c);
2008     }
2009 
2010     return newobj;
2011 }
2012 
2013 PyDoc_STRVAR(upper__doc__,
2014 "S.upper() -> string\n\
2015 \n\
2016 Return a copy of the string S converted to uppercase.");
2017 
2018 #ifndef _toupper
2019 #define _toupper toupper
2020 #endif
2021 
2022 static PyObject *
string_upper(PyStringObject * self)2023 string_upper(PyStringObject *self)
2024 {
2025     char *s;
2026     Py_ssize_t i, n = PyString_GET_SIZE(self);
2027     PyObject *newobj;
2028 
2029     newobj = PyString_FromStringAndSize(NULL, n);
2030     if (!newobj)
2031         return NULL;
2032 
2033     s = PyString_AS_STRING(newobj);
2034 
2035     Py_MEMCPY(s, PyString_AS_STRING(self), n);
2036 
2037     for (i = 0; i < n; i++) {
2038         int c = Py_CHARMASK(s[i]);
2039         if (islower(c))
2040             s[i] = _toupper(c);
2041     }
2042 
2043     return newobj;
2044 }
2045 
2046 PyDoc_STRVAR(title__doc__,
2047 "S.title() -> string\n\
2048 \n\
2049 Return a titlecased version of S, i.e. words start with uppercase\n\
2050 characters, all remaining cased characters have lowercase.");
2051 
2052 static PyObject*
string_title(PyStringObject * self)2053 string_title(PyStringObject *self)
2054 {
2055     char *s = PyString_AS_STRING(self), *s_new;
2056     Py_ssize_t i, n = PyString_GET_SIZE(self);
2057     int previous_is_cased = 0;
2058     PyObject *newobj;
2059 
2060     newobj = PyString_FromStringAndSize(NULL, n);
2061     if (newobj == NULL)
2062         return NULL;
2063     s_new = PyString_AsString(newobj);
2064     for (i = 0; i < n; i++) {
2065         int c = Py_CHARMASK(*s++);
2066         if (islower(c)) {
2067             if (!previous_is_cased)
2068                 c = toupper(c);
2069             previous_is_cased = 1;
2070         } else if (isupper(c)) {
2071             if (previous_is_cased)
2072                 c = tolower(c);
2073             previous_is_cased = 1;
2074         } else
2075             previous_is_cased = 0;
2076         *s_new++ = c;
2077     }
2078     return newobj;
2079 }
2080 
2081 PyDoc_STRVAR(capitalize__doc__,
2082 "S.capitalize() -> string\n\
2083 \n\
2084 Return a copy of the string S with only its first character\n\
2085 capitalized.");
2086 
2087 static PyObject *
string_capitalize(PyStringObject * self)2088 string_capitalize(PyStringObject *self)
2089 {
2090     char *s = PyString_AS_STRING(self), *s_new;
2091     Py_ssize_t i, n = PyString_GET_SIZE(self);
2092     PyObject *newobj;
2093 
2094     newobj = PyString_FromStringAndSize(NULL, n);
2095     if (newobj == NULL)
2096         return NULL;
2097     s_new = PyString_AsString(newobj);
2098     if (0 < n) {
2099         int c = Py_CHARMASK(*s++);
2100         if (islower(c))
2101             *s_new = toupper(c);
2102         else
2103             *s_new = c;
2104         s_new++;
2105     }
2106     for (i = 1; i < n; i++) {
2107         int c = Py_CHARMASK(*s++);
2108         if (isupper(c))
2109             *s_new = tolower(c);
2110         else
2111             *s_new = c;
2112         s_new++;
2113     }
2114     return newobj;
2115 }
2116 
2117 
2118 PyDoc_STRVAR(count__doc__,
2119 "S.count(sub[, start[, end]]) -> int\n\
2120 \n\
2121 Return the number of non-overlapping occurrences of substring sub in\n\
2122 string S[start:end].  Optional arguments start and end are interpreted\n\
2123 as in slice notation.");
2124 
2125 static PyObject *
string_count(PyStringObject * self,PyObject * args)2126 string_count(PyStringObject *self, PyObject *args)
2127 {
2128     PyObject *sub_obj;
2129     const char *str = PyString_AS_STRING(self), *sub;
2130     Py_ssize_t sub_len;
2131     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2132 
2133     if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2134         return NULL;
2135 
2136     if (PyString_Check(sub_obj)) {
2137         sub = PyString_AS_STRING(sub_obj);
2138         sub_len = PyString_GET_SIZE(sub_obj);
2139     }
2140 #ifdef Py_USING_UNICODE
2141     else if (PyUnicode_Check(sub_obj)) {
2142         Py_ssize_t count;
2143         count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2144         if (count == -1)
2145             return NULL;
2146         else
2147             return PyInt_FromSsize_t(count);
2148     }
2149 #endif
2150     else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2151         return NULL;
2152 
2153     ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2154 
2155     return PyInt_FromSsize_t(
2156         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2157         );
2158 }
2159 
2160 PyDoc_STRVAR(swapcase__doc__,
2161 "S.swapcase() -> string\n\
2162 \n\
2163 Return a copy of the string S with uppercase characters\n\
2164 converted to lowercase and vice versa.");
2165 
2166 static PyObject *
string_swapcase(PyStringObject * self)2167 string_swapcase(PyStringObject *self)
2168 {
2169     char *s = PyString_AS_STRING(self), *s_new;
2170     Py_ssize_t i, n = PyString_GET_SIZE(self);
2171     PyObject *newobj;
2172 
2173     newobj = PyString_FromStringAndSize(NULL, n);
2174     if (newobj == NULL)
2175         return NULL;
2176     s_new = PyString_AsString(newobj);
2177     for (i = 0; i < n; i++) {
2178         int c = Py_CHARMASK(*s++);
2179         if (islower(c)) {
2180             *s_new = toupper(c);
2181         }
2182         else if (isupper(c)) {
2183             *s_new = tolower(c);
2184         }
2185         else
2186             *s_new = c;
2187         s_new++;
2188     }
2189     return newobj;
2190 }
2191 
2192 
2193 PyDoc_STRVAR(translate__doc__,
2194 "S.translate(table [,deletechars]) -> string\n\
2195 \n\
2196 Return a copy of the string S, where all characters occurring\n\
2197 in the optional argument deletechars are removed, and the\n\
2198 remaining characters have been mapped through the given\n\
2199 translation table, which must be a string of length 256 or None.\n\
2200 If the table argument is None, no translation is applied and\n\
2201 the operation simply removes the characters in deletechars.");
2202 
2203 static PyObject *
string_translate(PyStringObject * self,PyObject * args)2204 string_translate(PyStringObject *self, PyObject *args)
2205 {
2206     register char *input, *output;
2207     const char *table;
2208     register Py_ssize_t i, c, changed = 0;
2209     PyObject *input_obj = (PyObject*)self;
2210     const char *output_start, *del_table=NULL;
2211     Py_ssize_t inlen, tablen, dellen = 0;
2212     PyObject *result;
2213     int trans_table[256];
2214     PyObject *tableobj, *delobj = NULL;
2215 
2216     if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2217                           &tableobj, &delobj))
2218         return NULL;
2219 
2220     if (PyString_Check(tableobj)) {
2221         table = PyString_AS_STRING(tableobj);
2222         tablen = PyString_GET_SIZE(tableobj);
2223     }
2224     else if (tableobj == Py_None) {
2225         table = NULL;
2226         tablen = 256;
2227     }
2228 #ifdef Py_USING_UNICODE
2229     else if (PyUnicode_Check(tableobj)) {
2230         /* Unicode .translate() does not support the deletechars
2231            parameter; instead a mapping to None will cause characters
2232            to be deleted. */
2233         if (delobj != NULL) {
2234             PyErr_SetString(PyExc_TypeError,
2235             "deletions are implemented differently for unicode");
2236             return NULL;
2237         }
2238         return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2239     }
2240 #endif
2241     else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2242         return NULL;
2243 
2244     if (tablen != 256) {
2245         PyErr_SetString(PyExc_ValueError,
2246           "translation table must be 256 characters long");
2247         return NULL;
2248     }
2249 
2250     if (delobj != NULL) {
2251         if (PyString_Check(delobj)) {
2252             del_table = PyString_AS_STRING(delobj);
2253             dellen = PyString_GET_SIZE(delobj);
2254         }
2255 #ifdef Py_USING_UNICODE
2256         else if (PyUnicode_Check(delobj)) {
2257             PyErr_SetString(PyExc_TypeError,
2258             "deletions are implemented differently for unicode");
2259             return NULL;
2260         }
2261 #endif
2262         else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2263             return NULL;
2264     }
2265     else {
2266         del_table = NULL;
2267         dellen = 0;
2268     }
2269 
2270     inlen = PyString_GET_SIZE(input_obj);
2271     result = PyString_FromStringAndSize((char *)NULL, inlen);
2272     if (result == NULL)
2273         return NULL;
2274     output_start = output = PyString_AsString(result);
2275     input = PyString_AS_STRING(input_obj);
2276 
2277     if (dellen == 0 && table != NULL) {
2278         /* If no deletions are required, use faster code */
2279         for (i = inlen; --i >= 0; ) {
2280             c = Py_CHARMASK(*input++);
2281             if (Py_CHARMASK((*output++ = table[c])) != c)
2282                 changed = 1;
2283         }
2284         if (changed || !PyString_CheckExact(input_obj))
2285             return result;
2286         Py_DECREF(result);
2287         Py_INCREF(input_obj);
2288         return input_obj;
2289     }
2290 
2291     if (table == NULL) {
2292         for (i = 0; i < 256; i++)
2293             trans_table[i] = Py_CHARMASK(i);
2294     } else {
2295         for (i = 0; i < 256; i++)
2296             trans_table[i] = Py_CHARMASK(table[i]);
2297     }
2298 
2299     for (i = 0; i < dellen; i++)
2300         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2301 
2302     for (i = inlen; --i >= 0; ) {
2303         c = Py_CHARMASK(*input++);
2304         if (trans_table[c] != -1)
2305             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2306                 continue;
2307         changed = 1;
2308     }
2309     if (!changed && PyString_CheckExact(input_obj)) {
2310         Py_DECREF(result);
2311         Py_INCREF(input_obj);
2312         return input_obj;
2313     }
2314     /* Fix the size of the resulting string */
2315     if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2316         return NULL;
2317     return result;
2318 }
2319 
2320 
2321 /* find and count characters and substrings */
2322 
2323 #define findchar(target, target_len, c)                         \
2324   ((char *)memchr((const void *)(target), c, target_len))
2325 
2326 /* String ops must return a string.  */
2327 /* If the object is subclass of string, create a copy */
2328 Py_LOCAL(PyStringObject *)
return_self(PyStringObject * self)2329 return_self(PyStringObject *self)
2330 {
2331     if (PyString_CheckExact(self)) {
2332         Py_INCREF(self);
2333         return self;
2334     }
2335     return (PyStringObject *)PyString_FromStringAndSize(
2336         PyString_AS_STRING(self),
2337         PyString_GET_SIZE(self));
2338 }
2339 
2340 Py_LOCAL_INLINE(Py_ssize_t)
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)2341 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
2342 {
2343     Py_ssize_t count=0;
2344     const char *start=target;
2345     const char *end=target+target_len;
2346 
2347     while ( (start=findchar(start, end-start, c)) != NULL ) {
2348         count++;
2349         if (count >= maxcount)
2350             break;
2351         start += 1;
2352     }
2353     return count;
2354 }
2355 
2356 
2357 /* Algorithms for different cases of string replacement */
2358 
2359 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2360 Py_LOCAL(PyStringObject *)
replace_interleave(PyStringObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2361 replace_interleave(PyStringObject *self,
2362                    const char *to_s, Py_ssize_t to_len,
2363                    Py_ssize_t maxcount)
2364 {
2365     char *self_s, *result_s;
2366     Py_ssize_t self_len, result_len;
2367     Py_ssize_t count, i;
2368     PyStringObject *result;
2369 
2370     self_len = PyString_GET_SIZE(self);
2371 
2372     /* 1 at the end plus 1 after every character;
2373        count = min(maxcount, self_len + 1) */
2374     if (maxcount <= self_len) {
2375         count = maxcount;
2376     }
2377     else {
2378         /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2379         count = self_len + 1;
2380     }
2381 
2382     /* Check for overflow */
2383     /*   result_len = count * to_len + self_len; */
2384     assert(count > 0);
2385     if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
2386         PyErr_SetString(PyExc_OverflowError,
2387                         "replace string is too long");
2388         return NULL;
2389     }
2390     result_len = count * to_len + self_len;
2391     if (! (result = (PyStringObject *)
2392                      PyString_FromStringAndSize(NULL, result_len)) )
2393         return NULL;
2394 
2395     self_s = PyString_AS_STRING(self);
2396     result_s = PyString_AS_STRING(result);
2397 
2398     /* TODO: special case single character, which doesn't need memcpy */
2399 
2400     /* Lay the first one down (guaranteed this will occur) */
2401     Py_MEMCPY(result_s, to_s, to_len);
2402     result_s += to_len;
2403     count -= 1;
2404 
2405     for (i=0; i<count; i++) {
2406         *result_s++ = *self_s++;
2407         Py_MEMCPY(result_s, to_s, to_len);
2408         result_s += to_len;
2409     }
2410 
2411     /* Copy the rest of the original string */
2412     Py_MEMCPY(result_s, self_s, self_len-i);
2413 
2414     return result;
2415 }
2416 
2417 /* Special case for deleting a single character */
2418 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2419 Py_LOCAL(PyStringObject *)
replace_delete_single_character(PyStringObject * self,char from_c,Py_ssize_t maxcount)2420 replace_delete_single_character(PyStringObject *self,
2421                                 char from_c, Py_ssize_t maxcount)
2422 {
2423     char *self_s, *result_s;
2424     char *start, *next, *end;
2425     Py_ssize_t self_len, result_len;
2426     Py_ssize_t count;
2427     PyStringObject *result;
2428 
2429     self_len = PyString_GET_SIZE(self);
2430     self_s = PyString_AS_STRING(self);
2431 
2432     count = countchar(self_s, self_len, from_c, maxcount);
2433     if (count == 0) {
2434         return return_self(self);
2435     }
2436 
2437     result_len = self_len - count;  /* from_len == 1 */
2438     assert(result_len>=0);
2439 
2440     if ( (result = (PyStringObject *)
2441                     PyString_FromStringAndSize(NULL, result_len)) == NULL)
2442         return NULL;
2443     result_s = PyString_AS_STRING(result);
2444 
2445     start = self_s;
2446     end = self_s + self_len;
2447     while (count-- > 0) {
2448         next = findchar(start, end-start, from_c);
2449         if (next == NULL)
2450             break;
2451         Py_MEMCPY(result_s, start, next-start);
2452         result_s += (next-start);
2453         start = next+1;
2454     }
2455     Py_MEMCPY(result_s, start, end-start);
2456 
2457     return result;
2458 }
2459 
2460 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2461 
2462 Py_LOCAL(PyStringObject *)
replace_delete_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)2463 replace_delete_substring(PyStringObject *self,
2464                          const char *from_s, Py_ssize_t from_len,
2465                          Py_ssize_t maxcount) {
2466     char *self_s, *result_s;
2467     char *start, *next, *end;
2468     Py_ssize_t self_len, result_len;
2469     Py_ssize_t count, offset;
2470     PyStringObject *result;
2471 
2472     self_len = PyString_GET_SIZE(self);
2473     self_s = PyString_AS_STRING(self);
2474 
2475     count = stringlib_count(self_s, self_len,
2476                             from_s, from_len,
2477                             maxcount);
2478 
2479     if (count == 0) {
2480         /* no matches */
2481         return return_self(self);
2482     }
2483 
2484     result_len = self_len - (count * from_len);
2485     assert (result_len>=0);
2486 
2487     if ( (result = (PyStringObject *)
2488           PyString_FromStringAndSize(NULL, result_len)) == NULL )
2489         return NULL;
2490 
2491     result_s = PyString_AS_STRING(result);
2492 
2493     start = self_s;
2494     end = self_s + self_len;
2495     while (count-- > 0) {
2496         offset = stringlib_find(start, end-start,
2497                                 from_s, from_len,
2498                                 0);
2499         if (offset == -1)
2500             break;
2501         next = start + offset;
2502 
2503         Py_MEMCPY(result_s, start, next-start);
2504 
2505         result_s += (next-start);
2506         start = next+from_len;
2507     }
2508     Py_MEMCPY(result_s, start, end-start);
2509     return result;
2510 }
2511 
2512 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2513 Py_LOCAL(PyStringObject *)
replace_single_character_in_place(PyStringObject * self,char from_c,char to_c,Py_ssize_t maxcount)2514 replace_single_character_in_place(PyStringObject *self,
2515                                   char from_c, char to_c,
2516                                   Py_ssize_t maxcount)
2517 {
2518     char *self_s, *result_s, *start, *end, *next;
2519     Py_ssize_t self_len;
2520     PyStringObject *result;
2521 
2522     /* The result string will be the same size */
2523     self_s = PyString_AS_STRING(self);
2524     self_len = PyString_GET_SIZE(self);
2525 
2526     next = findchar(self_s, self_len, from_c);
2527 
2528     if (next == NULL) {
2529         /* No matches; return the original string */
2530         return return_self(self);
2531     }
2532 
2533     /* Need to make a new string */
2534     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2535     if (result == NULL)
2536         return NULL;
2537     result_s = PyString_AS_STRING(result);
2538     Py_MEMCPY(result_s, self_s, self_len);
2539 
2540     /* change everything in-place, starting with this one */
2541     start =  result_s + (next-self_s);
2542     *start = to_c;
2543     start++;
2544     end = result_s + self_len;
2545 
2546     while (--maxcount > 0) {
2547         next = findchar(start, end-start, from_c);
2548         if (next == NULL)
2549             break;
2550         *next = to_c;
2551         start = next+1;
2552     }
2553 
2554     return result;
2555 }
2556 
2557 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2558 Py_LOCAL(PyStringObject *)
replace_substring_in_place(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2559 replace_substring_in_place(PyStringObject *self,
2560                            const char *from_s, Py_ssize_t from_len,
2561                            const char *to_s, Py_ssize_t to_len,
2562                            Py_ssize_t maxcount)
2563 {
2564     char *result_s, *start, *end;
2565     char *self_s;
2566     Py_ssize_t self_len, offset;
2567     PyStringObject *result;
2568 
2569     /* The result string will be the same size */
2570 
2571     self_s = PyString_AS_STRING(self);
2572     self_len = PyString_GET_SIZE(self);
2573 
2574     offset = stringlib_find(self_s, self_len,
2575                             from_s, from_len,
2576                             0);
2577     if (offset == -1) {
2578         /* No matches; return the original string */
2579         return return_self(self);
2580     }
2581 
2582     /* Need to make a new string */
2583     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2584     if (result == NULL)
2585         return NULL;
2586     result_s = PyString_AS_STRING(result);
2587     Py_MEMCPY(result_s, self_s, self_len);
2588 
2589     /* change everything in-place, starting with this one */
2590     start =  result_s + offset;
2591     Py_MEMCPY(start, to_s, from_len);
2592     start += from_len;
2593     end = result_s + self_len;
2594 
2595     while ( --maxcount > 0) {
2596         offset = stringlib_find(start, end-start,
2597                                 from_s, from_len,
2598                                 0);
2599         if (offset==-1)
2600             break;
2601         Py_MEMCPY(start+offset, to_s, from_len);
2602         start += offset+from_len;
2603     }
2604 
2605     return result;
2606 }
2607 
2608 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2609 Py_LOCAL(PyStringObject *)
replace_single_character(PyStringObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2610 replace_single_character(PyStringObject *self,
2611                          char from_c,
2612                          const char *to_s, Py_ssize_t to_len,
2613                          Py_ssize_t maxcount)
2614 {
2615     char *self_s, *result_s;
2616     char *start, *next, *end;
2617     Py_ssize_t self_len, result_len;
2618     Py_ssize_t count;
2619     PyStringObject *result;
2620 
2621     self_s = PyString_AS_STRING(self);
2622     self_len = PyString_GET_SIZE(self);
2623 
2624     count = countchar(self_s, self_len, from_c, maxcount);
2625     if (count == 0) {
2626         /* no matches, return unchanged */
2627         return return_self(self);
2628     }
2629 
2630     /* use the difference between current and new, hence the "-1" */
2631     /*   result_len = self_len + count * (to_len-1)  */
2632     assert(count > 0);
2633     if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
2634         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2635         return NULL;
2636     }
2637     result_len = self_len + count * (to_len - 1);
2638 
2639     if ( (result = (PyStringObject *)
2640           PyString_FromStringAndSize(NULL, result_len)) == NULL)
2641         return NULL;
2642     result_s = PyString_AS_STRING(result);
2643 
2644     start = self_s;
2645     end = self_s + self_len;
2646     while (count-- > 0) {
2647         next = findchar(start, end-start, from_c);
2648         if (next == NULL)
2649             break;
2650 
2651         if (next == start) {
2652             /* replace with the 'to' */
2653             Py_MEMCPY(result_s, to_s, to_len);
2654             result_s += to_len;
2655             start += 1;
2656         } else {
2657             /* copy the unchanged old then the 'to' */
2658             Py_MEMCPY(result_s, start, next-start);
2659             result_s += (next-start);
2660             Py_MEMCPY(result_s, to_s, to_len);
2661             result_s += to_len;
2662             start = next+1;
2663         }
2664     }
2665     /* Copy the remainder of the remaining string */
2666     Py_MEMCPY(result_s, start, end-start);
2667 
2668     return result;
2669 }
2670 
2671 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2672 Py_LOCAL(PyStringObject *)
replace_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2673 replace_substring(PyStringObject *self,
2674                   const char *from_s, Py_ssize_t from_len,
2675                   const char *to_s, Py_ssize_t to_len,
2676                   Py_ssize_t maxcount) {
2677     char *self_s, *result_s;
2678     char *start, *next, *end;
2679     Py_ssize_t self_len, result_len;
2680     Py_ssize_t count, offset;
2681     PyStringObject *result;
2682 
2683     self_s = PyString_AS_STRING(self);
2684     self_len = PyString_GET_SIZE(self);
2685 
2686     count = stringlib_count(self_s, self_len,
2687                             from_s, from_len,
2688                             maxcount);
2689 
2690     if (count == 0) {
2691         /* no matches, return unchanged */
2692         return return_self(self);
2693     }
2694 
2695     /* Check for overflow */
2696     /*    result_len = self_len + count * (to_len-from_len) */
2697     assert(count > 0);
2698     if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
2699         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2700         return NULL;
2701     }
2702     result_len = self_len + count * (to_len - from_len);
2703 
2704     if ( (result = (PyStringObject *)
2705           PyString_FromStringAndSize(NULL, result_len)) == NULL)
2706         return NULL;
2707     result_s = PyString_AS_STRING(result);
2708 
2709     start = self_s;
2710     end = self_s + self_len;
2711     while (count-- > 0) {
2712         offset = stringlib_find(start, end-start,
2713                                 from_s, from_len,
2714                                 0);
2715         if (offset == -1)
2716             break;
2717         next = start+offset;
2718         if (next == start) {
2719             /* replace with the 'to' */
2720             Py_MEMCPY(result_s, to_s, to_len);
2721             result_s += to_len;
2722             start += from_len;
2723         } else {
2724             /* copy the unchanged old then the 'to' */
2725             Py_MEMCPY(result_s, start, next-start);
2726             result_s += (next-start);
2727             Py_MEMCPY(result_s, to_s, to_len);
2728             result_s += to_len;
2729             start = next+from_len;
2730         }
2731     }
2732     /* Copy the remainder of the remaining string */
2733     Py_MEMCPY(result_s, start, end-start);
2734 
2735     return result;
2736 }
2737 
2738 
2739 Py_LOCAL(PyStringObject *)
replace(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2740 replace(PyStringObject *self,
2741     const char *from_s, Py_ssize_t from_len,
2742     const char *to_s, Py_ssize_t to_len,
2743     Py_ssize_t maxcount)
2744 {
2745     if (maxcount < 0) {
2746         maxcount = PY_SSIZE_T_MAX;
2747     } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2748         /* nothing to do; return the original string */
2749         return return_self(self);
2750     }
2751 
2752     if (maxcount == 0 ||
2753         (from_len == 0 && to_len == 0)) {
2754         /* nothing to do; return the original string */
2755         return return_self(self);
2756     }
2757 
2758     /* Handle zero-length special cases */
2759 
2760     if (from_len == 0) {
2761         /* insert the 'to' string everywhere.   */
2762         /*    >>> "Python".replace("", ".")     */
2763         /*    '.P.y.t.h.o.n.'                   */
2764         return replace_interleave(self, to_s, to_len, maxcount);
2765     }
2766 
2767     /* Except for "".replace("", "A") == "A" there is no way beyond this */
2768     /* point for an empty self string to generate a non-empty string */
2769     /* Special case so the remaining code always gets a non-empty string */
2770     if (PyString_GET_SIZE(self) == 0) {
2771         return return_self(self);
2772     }
2773 
2774     if (to_len == 0) {
2775         /* delete all occurrences of 'from' string */
2776         if (from_len == 1) {
2777             return replace_delete_single_character(
2778                 self, from_s[0], maxcount);
2779         } else {
2780             return replace_delete_substring(self, from_s, from_len, maxcount);
2781         }
2782     }
2783 
2784     /* Handle special case where both strings have the same length */
2785 
2786     if (from_len == to_len) {
2787         if (from_len == 1) {
2788             return replace_single_character_in_place(
2789                 self,
2790                 from_s[0],
2791                 to_s[0],
2792                 maxcount);
2793         } else {
2794             return replace_substring_in_place(
2795                 self, from_s, from_len, to_s, to_len, maxcount);
2796         }
2797     }
2798 
2799     /* Otherwise use the more generic algorithms */
2800     if (from_len == 1) {
2801         return replace_single_character(self, from_s[0],
2802                                         to_s, to_len, maxcount);
2803     } else {
2804         /* len('from')>=2, len('to')>=1 */
2805         return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2806     }
2807 }
2808 
2809 PyDoc_STRVAR(replace__doc__,
2810 "S.replace(old, new[, count]) -> string\n\
2811 \n\
2812 Return a copy of string S with all occurrences of substring\n\
2813 old replaced by new.  If the optional argument count is\n\
2814 given, only the first count occurrences are replaced.");
2815 
2816 static PyObject *
string_replace(PyStringObject * self,PyObject * args)2817 string_replace(PyStringObject *self, PyObject *args)
2818 {
2819     Py_ssize_t count = -1;
2820     PyObject *from, *to;
2821     const char *from_s, *to_s;
2822     Py_ssize_t from_len, to_len;
2823 
2824     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2825         return NULL;
2826 
2827     if (PyString_Check(from)) {
2828         from_s = PyString_AS_STRING(from);
2829         from_len = PyString_GET_SIZE(from);
2830     }
2831 #ifdef Py_USING_UNICODE
2832     if (PyUnicode_Check(from))
2833         return PyUnicode_Replace((PyObject *)self,
2834                                  from, to, count);
2835 #endif
2836     else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2837         return NULL;
2838 
2839     if (PyString_Check(to)) {
2840         to_s = PyString_AS_STRING(to);
2841         to_len = PyString_GET_SIZE(to);
2842     }
2843 #ifdef Py_USING_UNICODE
2844     else if (PyUnicode_Check(to))
2845         return PyUnicode_Replace((PyObject *)self,
2846                                  from, to, count);
2847 #endif
2848     else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2849         return NULL;
2850 
2851     return (PyObject *)replace((PyStringObject *) self,
2852                                from_s, from_len,
2853                                to_s, to_len, count);
2854 }
2855 
2856 /** End DALKE **/
2857 
2858 /* Matches the end (direction >= 0) or start (direction < 0) of self
2859  * against substr, using the start and end arguments. Returns
2860  * -1 on error, 0 if not found and 1 if found.
2861  */
2862 Py_LOCAL(int)
_string_tailmatch(PyStringObject * self,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)2863 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2864                   Py_ssize_t end, int direction)
2865 {
2866     Py_ssize_t len = PyString_GET_SIZE(self);
2867     Py_ssize_t slen;
2868     const char* sub;
2869     const char* str;
2870 
2871     if (PyString_Check(substr)) {
2872         sub = PyString_AS_STRING(substr);
2873         slen = PyString_GET_SIZE(substr);
2874     }
2875 #ifdef Py_USING_UNICODE
2876     else if (PyUnicode_Check(substr))
2877         return PyUnicode_Tailmatch((PyObject *)self,
2878                                    substr, start, end, direction);
2879 #endif
2880     else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2881         return -1;
2882     str = PyString_AS_STRING(self);
2883 
2884     ADJUST_INDICES(start, end, len);
2885 
2886     if (direction < 0) {
2887         /* startswith */
2888         if (start+slen > len)
2889             return 0;
2890     } else {
2891         /* endswith */
2892         if (end-start < slen || start > len)
2893             return 0;
2894 
2895         if (end-slen > start)
2896             start = end - slen;
2897     }
2898     if (end-start >= slen)
2899         return ! memcmp(str+start, sub, slen);
2900     return 0;
2901 }
2902 
2903 
2904 PyDoc_STRVAR(startswith__doc__,
2905 "S.startswith(prefix[, start[, end]]) -> bool\n\
2906 \n\
2907 Return True if S starts with the specified prefix, False otherwise.\n\
2908 With optional start, test S beginning at that position.\n\
2909 With optional end, stop comparing S at that position.\n\
2910 prefix can also be a tuple of strings to try.");
2911 
2912 static PyObject *
string_startswith(PyStringObject * self,PyObject * args)2913 string_startswith(PyStringObject *self, PyObject *args)
2914 {
2915     Py_ssize_t start = 0;
2916     Py_ssize_t end = PY_SSIZE_T_MAX;
2917     PyObject *subobj;
2918     int result;
2919 
2920     if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2921         return NULL;
2922     if (PyTuple_Check(subobj)) {
2923         Py_ssize_t i;
2924         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2925             result = _string_tailmatch(self,
2926                             PyTuple_GET_ITEM(subobj, i),
2927                             start, end, -1);
2928             if (result == -1)
2929                 return NULL;
2930             else if (result) {
2931                 Py_RETURN_TRUE;
2932             }
2933         }
2934         Py_RETURN_FALSE;
2935     }
2936     result = _string_tailmatch(self, subobj, start, end, -1);
2937     if (result == -1) {
2938         if (PyErr_ExceptionMatches(PyExc_TypeError))
2939             PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2940                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2941         return NULL;
2942     }
2943     else
2944         return PyBool_FromLong(result);
2945 }
2946 
2947 
2948 PyDoc_STRVAR(endswith__doc__,
2949 "S.endswith(suffix[, start[, end]]) -> bool\n\
2950 \n\
2951 Return True if S ends with the specified suffix, False otherwise.\n\
2952 With optional start, test S beginning at that position.\n\
2953 With optional end, stop comparing S at that position.\n\
2954 suffix can also be a tuple of strings to try.");
2955 
2956 static PyObject *
string_endswith(PyStringObject * self,PyObject * args)2957 string_endswith(PyStringObject *self, PyObject *args)
2958 {
2959     Py_ssize_t start = 0;
2960     Py_ssize_t end = PY_SSIZE_T_MAX;
2961     PyObject *subobj;
2962     int result;
2963 
2964     if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2965         return NULL;
2966     if (PyTuple_Check(subobj)) {
2967         Py_ssize_t i;
2968         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2969             result = _string_tailmatch(self,
2970                             PyTuple_GET_ITEM(subobj, i),
2971                             start, end, +1);
2972             if (result == -1)
2973                 return NULL;
2974             else if (result) {
2975                 Py_RETURN_TRUE;
2976             }
2977         }
2978         Py_RETURN_FALSE;
2979     }
2980     result = _string_tailmatch(self, subobj, start, end, +1);
2981     if (result == -1) {
2982         if (PyErr_ExceptionMatches(PyExc_TypeError))
2983             PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2984                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2985         return NULL;
2986     }
2987     else
2988         return PyBool_FromLong(result);
2989 }
2990 
2991 
2992 PyDoc_STRVAR(encode__doc__,
2993 "S.encode([encoding[,errors]]) -> object\n\
2994 \n\
2995 Encodes S using the codec registered for encoding. encoding defaults\n\
2996 to the default encoding. errors may be given to set a different error\n\
2997 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2998 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2999 'xmlcharrefreplace' as well as any other name registered with\n\
3000 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3001 
3002 static PyObject *
string_encode(PyStringObject * self,PyObject * args,PyObject * kwargs)3003 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3004 {
3005     static char *kwlist[] = {"encoding", "errors", 0};
3006     char *encoding = NULL;
3007     char *errors = NULL;
3008     PyObject *v;
3009 
3010     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3011                                      kwlist, &encoding, &errors))
3012         return NULL;
3013     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3014     if (v == NULL)
3015         goto onError;
3016     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3017         PyErr_Format(PyExc_TypeError,
3018                      "encoder did not return a string/unicode object "
3019                      "(type=%.400s)",
3020                      Py_TYPE(v)->tp_name);
3021         Py_DECREF(v);
3022         return NULL;
3023     }
3024     return v;
3025 
3026  onError:
3027     return NULL;
3028 }
3029 
3030 
3031 PyDoc_STRVAR(decode__doc__,
3032 "S.decode([encoding[,errors]]) -> object\n\
3033 \n\
3034 Decodes S using the codec registered for encoding. encoding defaults\n\
3035 to the default encoding. errors may be given to set a different error\n\
3036 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3037 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3038 as well as any other name registered with codecs.register_error that is\n\
3039 able to handle UnicodeDecodeErrors.");
3040 
3041 static PyObject *
string_decode(PyStringObject * self,PyObject * args,PyObject * kwargs)3042 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3043 {
3044     static char *kwlist[] = {"encoding", "errors", 0};
3045     char *encoding = NULL;
3046     char *errors = NULL;
3047     PyObject *v;
3048 
3049     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3050                                      kwlist, &encoding, &errors))
3051         return NULL;
3052     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3053     if (v == NULL)
3054         goto onError;
3055     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3056         PyErr_Format(PyExc_TypeError,
3057                      "decoder did not return a string/unicode object "
3058                      "(type=%.400s)",
3059                      Py_TYPE(v)->tp_name);
3060         Py_DECREF(v);
3061         return NULL;
3062     }
3063     return v;
3064 
3065  onError:
3066     return NULL;
3067 }
3068 
3069 
3070 PyDoc_STRVAR(expandtabs__doc__,
3071 "S.expandtabs([tabsize]) -> string\n\
3072 \n\
3073 Return a copy of S where all tab characters are expanded using spaces.\n\
3074 If tabsize is not given, a tab size of 8 characters is assumed.");
3075 
3076 static PyObject*
string_expandtabs(PyStringObject * self,PyObject * args)3077 string_expandtabs(PyStringObject *self, PyObject *args)
3078 {
3079     const char *e, *p, *qe;
3080     char *q;
3081     Py_ssize_t i, j, incr;
3082     PyObject *u;
3083     int tabsize = 8;
3084 
3085     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3086         return NULL;
3087 
3088     /* First pass: determine size of output string */
3089     i = 0; /* chars up to and including most recent \n or \r */
3090     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3091     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3092     for (p = PyString_AS_STRING(self); p < e; p++) {
3093         if (*p == '\t') {
3094             if (tabsize > 0) {
3095                 incr = tabsize - (j % tabsize);
3096                 if (j > PY_SSIZE_T_MAX - incr)
3097                     goto overflow1;
3098                 j += incr;
3099             }
3100         }
3101         else {
3102             if (j > PY_SSIZE_T_MAX - 1)
3103                 goto overflow1;
3104             j++;
3105             if (*p == '\n' || *p == '\r') {
3106                 if (i > PY_SSIZE_T_MAX - j)
3107                     goto overflow1;
3108                 i += j;
3109                 j = 0;
3110             }
3111         }
3112     }
3113 
3114     if (i > PY_SSIZE_T_MAX - j)
3115         goto overflow1;
3116 
3117     /* Second pass: create output string and fill it */
3118     u = PyString_FromStringAndSize(NULL, i + j);
3119     if (!u)
3120         return NULL;
3121 
3122     j = 0; /* same as in first pass */
3123     q = PyString_AS_STRING(u); /* next output char */
3124     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3125 
3126     for (p = PyString_AS_STRING(self); p < e; p++) {
3127         if (*p == '\t') {
3128             if (tabsize > 0) {
3129                 i = tabsize - (j % tabsize);
3130                 j += i;
3131                 while (i--) {
3132                     if (q >= qe)
3133                         goto overflow2;
3134                     *q++ = ' ';
3135                 }
3136             }
3137         }
3138         else {
3139             if (q >= qe)
3140                 goto overflow2;
3141             *q++ = *p;
3142             j++;
3143             if (*p == '\n' || *p == '\r')
3144                 j = 0;
3145         }
3146     }
3147 
3148     return u;
3149 
3150   overflow2:
3151     Py_DECREF(u);
3152   overflow1:
3153     PyErr_SetString(PyExc_OverflowError, "new string is too long");
3154     return NULL;
3155 }
3156 
3157 Py_LOCAL_INLINE(PyObject *)
pad(PyStringObject * self,Py_ssize_t left,Py_ssize_t right,char fill)3158 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3159 {
3160     PyObject *u;
3161 
3162     if (left < 0)
3163         left = 0;
3164     if (right < 0)
3165         right = 0;
3166 
3167     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3168         Py_INCREF(self);
3169         return (PyObject *)self;
3170     }
3171 
3172     u = PyString_FromStringAndSize(NULL,
3173                                    left + PyString_GET_SIZE(self) + right);
3174     if (u) {
3175         if (left)
3176             memset(PyString_AS_STRING(u), fill, left);
3177         Py_MEMCPY(PyString_AS_STRING(u) + left,
3178                PyString_AS_STRING(self),
3179                PyString_GET_SIZE(self));
3180         if (right)
3181             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3182                fill, right);
3183     }
3184 
3185     return u;
3186 }
3187 
3188 PyDoc_STRVAR(ljust__doc__,
3189 "S.ljust(width[, fillchar]) -> string\n"
3190 "\n"
3191 "Return S left-justified in a string of length width. Padding is\n"
3192 "done using the specified fill character (default is a space).");
3193 
3194 static PyObject *
string_ljust(PyStringObject * self,PyObject * args)3195 string_ljust(PyStringObject *self, PyObject *args)
3196 {
3197     Py_ssize_t width;
3198     char fillchar = ' ';
3199 
3200     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3201         return NULL;
3202 
3203     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3204         Py_INCREF(self);
3205         return (PyObject*) self;
3206     }
3207 
3208     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3209 }
3210 
3211 
3212 PyDoc_STRVAR(rjust__doc__,
3213 "S.rjust(width[, fillchar]) -> string\n"
3214 "\n"
3215 "Return S right-justified in a string of length width. Padding is\n"
3216 "done using the specified fill character (default is a space)");
3217 
3218 static PyObject *
string_rjust(PyStringObject * self,PyObject * args)3219 string_rjust(PyStringObject *self, PyObject *args)
3220 {
3221     Py_ssize_t width;
3222     char fillchar = ' ';
3223 
3224     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3225         return NULL;
3226 
3227     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3228         Py_INCREF(self);
3229         return (PyObject*) self;
3230     }
3231 
3232     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3233 }
3234 
3235 
3236 PyDoc_STRVAR(center__doc__,
3237 "S.center(width[, fillchar]) -> string\n"
3238 "\n"
3239 "Return S centered in a string of length width. Padding is\n"
3240 "done using the specified fill character (default is a space)");
3241 
3242 static PyObject *
string_center(PyStringObject * self,PyObject * args)3243 string_center(PyStringObject *self, PyObject *args)
3244 {
3245     Py_ssize_t marg, left;
3246     Py_ssize_t width;
3247     char fillchar = ' ';
3248 
3249     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3250         return NULL;
3251 
3252     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3253         Py_INCREF(self);
3254         return (PyObject*) self;
3255     }
3256 
3257     marg = width - PyString_GET_SIZE(self);
3258     left = marg / 2 + (marg & width & 1);
3259 
3260     return pad(self, left, marg - left, fillchar);
3261 }
3262 
3263 PyDoc_STRVAR(zfill__doc__,
3264 "S.zfill(width) -> string\n"
3265 "\n"
3266 "Pad a numeric string S with zeros on the left, to fill a field\n"
3267 "of the specified width.  The string S is never truncated.");
3268 
3269 static PyObject *
string_zfill(PyStringObject * self,PyObject * args)3270 string_zfill(PyStringObject *self, PyObject *args)
3271 {
3272     Py_ssize_t fill;
3273     PyObject *s;
3274     char *p;
3275     Py_ssize_t width;
3276 
3277     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3278         return NULL;
3279 
3280     if (PyString_GET_SIZE(self) >= width) {
3281         if (PyString_CheckExact(self)) {
3282             Py_INCREF(self);
3283             return (PyObject*) self;
3284         }
3285         else
3286             return PyString_FromStringAndSize(
3287                 PyString_AS_STRING(self),
3288                 PyString_GET_SIZE(self)
3289             );
3290     }
3291 
3292     fill = width - PyString_GET_SIZE(self);
3293 
3294     s = pad(self, fill, 0, '0');
3295 
3296     if (s == NULL)
3297         return NULL;
3298 
3299     p = PyString_AS_STRING(s);
3300     if (p[fill] == '+' || p[fill] == '-') {
3301         /* move sign to beginning of string */
3302         p[0] = p[fill];
3303         p[fill] = '0';
3304     }
3305 
3306     return (PyObject*) s;
3307 }
3308 
3309 PyDoc_STRVAR(isspace__doc__,
3310 "S.isspace() -> bool\n\
3311 \n\
3312 Return True if all characters in S are whitespace\n\
3313 and there is at least one character in S, False otherwise.");
3314 
3315 static PyObject*
string_isspace(PyStringObject * self)3316 string_isspace(PyStringObject *self)
3317 {
3318     register const unsigned char *p
3319         = (unsigned char *) PyString_AS_STRING(self);
3320     register const unsigned char *e;
3321 
3322     /* Shortcut for single character strings */
3323     if (PyString_GET_SIZE(self) == 1 &&
3324         isspace(*p))
3325         return PyBool_FromLong(1);
3326 
3327     /* Special case for empty strings */
3328     if (PyString_GET_SIZE(self) == 0)
3329         return PyBool_FromLong(0);
3330 
3331     e = p + PyString_GET_SIZE(self);
3332     for (; p < e; p++) {
3333         if (!isspace(*p))
3334             return PyBool_FromLong(0);
3335     }
3336     return PyBool_FromLong(1);
3337 }
3338 
3339 
3340 PyDoc_STRVAR(isalpha__doc__,
3341 "S.isalpha() -> bool\n\
3342 \n\
3343 Return True if all characters in S are alphabetic\n\
3344 and there is at least one character in S, False otherwise.");
3345 
3346 static PyObject*
string_isalpha(PyStringObject * self)3347 string_isalpha(PyStringObject *self)
3348 {
3349     register const unsigned char *p
3350         = (unsigned char *) PyString_AS_STRING(self);
3351     register const unsigned char *e;
3352 
3353     /* Shortcut for single character strings */
3354     if (PyString_GET_SIZE(self) == 1 &&
3355         isalpha(*p))
3356         return PyBool_FromLong(1);
3357 
3358     /* Special case for empty strings */
3359     if (PyString_GET_SIZE(self) == 0)
3360         return PyBool_FromLong(0);
3361 
3362     e = p + PyString_GET_SIZE(self);
3363     for (; p < e; p++) {
3364         if (!isalpha(*p))
3365             return PyBool_FromLong(0);
3366     }
3367     return PyBool_FromLong(1);
3368 }
3369 
3370 
3371 PyDoc_STRVAR(isalnum__doc__,
3372 "S.isalnum() -> bool\n\
3373 \n\
3374 Return True if all characters in S are alphanumeric\n\
3375 and there is at least one character in S, False otherwise.");
3376 
3377 static PyObject*
string_isalnum(PyStringObject * self)3378 string_isalnum(PyStringObject *self)
3379 {
3380     register const unsigned char *p
3381         = (unsigned char *) PyString_AS_STRING(self);
3382     register const unsigned char *e;
3383 
3384     /* Shortcut for single character strings */
3385     if (PyString_GET_SIZE(self) == 1 &&
3386         isalnum(*p))
3387         return PyBool_FromLong(1);
3388 
3389     /* Special case for empty strings */
3390     if (PyString_GET_SIZE(self) == 0)
3391         return PyBool_FromLong(0);
3392 
3393     e = p + PyString_GET_SIZE(self);
3394     for (; p < e; p++) {
3395         if (!isalnum(*p))
3396             return PyBool_FromLong(0);
3397     }
3398     return PyBool_FromLong(1);
3399 }
3400 
3401 
3402 PyDoc_STRVAR(isdigit__doc__,
3403 "S.isdigit() -> bool\n\
3404 \n\
3405 Return True if all characters in S are digits\n\
3406 and there is at least one character in S, False otherwise.");
3407 
3408 static PyObject*
string_isdigit(PyStringObject * self)3409 string_isdigit(PyStringObject *self)
3410 {
3411     register const unsigned char *p
3412         = (unsigned char *) PyString_AS_STRING(self);
3413     register const unsigned char *e;
3414 
3415     /* Shortcut for single character strings */
3416     if (PyString_GET_SIZE(self) == 1 &&
3417         isdigit(*p))
3418         return PyBool_FromLong(1);
3419 
3420     /* Special case for empty strings */
3421     if (PyString_GET_SIZE(self) == 0)
3422         return PyBool_FromLong(0);
3423 
3424     e = p + PyString_GET_SIZE(self);
3425     for (; p < e; p++) {
3426         if (!isdigit(*p))
3427             return PyBool_FromLong(0);
3428     }
3429     return PyBool_FromLong(1);
3430 }
3431 
3432 
3433 PyDoc_STRVAR(islower__doc__,
3434 "S.islower() -> bool\n\
3435 \n\
3436 Return True if all cased characters in S are lowercase and there is\n\
3437 at least one cased character in S, False otherwise.");
3438 
3439 static PyObject*
string_islower(PyStringObject * self)3440 string_islower(PyStringObject *self)
3441 {
3442     register const unsigned char *p
3443         = (unsigned char *) PyString_AS_STRING(self);
3444     register const unsigned char *e;
3445     int cased;
3446 
3447     /* Shortcut for single character strings */
3448     if (PyString_GET_SIZE(self) == 1)
3449         return PyBool_FromLong(islower(*p) != 0);
3450 
3451     /* Special case for empty strings */
3452     if (PyString_GET_SIZE(self) == 0)
3453         return PyBool_FromLong(0);
3454 
3455     e = p + PyString_GET_SIZE(self);
3456     cased = 0;
3457     for (; p < e; p++) {
3458         if (isupper(*p))
3459             return PyBool_FromLong(0);
3460         else if (!cased && islower(*p))
3461             cased = 1;
3462     }
3463     return PyBool_FromLong(cased);
3464 }
3465 
3466 
3467 PyDoc_STRVAR(isupper__doc__,
3468 "S.isupper() -> bool\n\
3469 \n\
3470 Return True if all cased characters in S are uppercase and there is\n\
3471 at least one cased character in S, False otherwise.");
3472 
3473 static PyObject*
string_isupper(PyStringObject * self)3474 string_isupper(PyStringObject *self)
3475 {
3476     register const unsigned char *p
3477         = (unsigned char *) PyString_AS_STRING(self);
3478     register const unsigned char *e;
3479     int cased;
3480 
3481     /* Shortcut for single character strings */
3482     if (PyString_GET_SIZE(self) == 1)
3483         return PyBool_FromLong(isupper(*p) != 0);
3484 
3485     /* Special case for empty strings */
3486     if (PyString_GET_SIZE(self) == 0)
3487         return PyBool_FromLong(0);
3488 
3489     e = p + PyString_GET_SIZE(self);
3490     cased = 0;
3491     for (; p < e; p++) {
3492         if (islower(*p))
3493             return PyBool_FromLong(0);
3494         else if (!cased && isupper(*p))
3495             cased = 1;
3496     }
3497     return PyBool_FromLong(cased);
3498 }
3499 
3500 
3501 PyDoc_STRVAR(istitle__doc__,
3502 "S.istitle() -> bool\n\
3503 \n\
3504 Return True if S is a titlecased string and there is at least one\n\
3505 character in S, i.e. uppercase characters may only follow uncased\n\
3506 characters and lowercase characters only cased ones. Return False\n\
3507 otherwise.");
3508 
3509 static PyObject*
string_istitle(PyStringObject * self,PyObject * uncased)3510 string_istitle(PyStringObject *self, PyObject *uncased)
3511 {
3512     register const unsigned char *p
3513         = (unsigned char *) PyString_AS_STRING(self);
3514     register const unsigned char *e;
3515     int cased, previous_is_cased;
3516 
3517     /* Shortcut for single character strings */
3518     if (PyString_GET_SIZE(self) == 1)
3519         return PyBool_FromLong(isupper(*p) != 0);
3520 
3521     /* Special case for empty strings */
3522     if (PyString_GET_SIZE(self) == 0)
3523         return PyBool_FromLong(0);
3524 
3525     e = p + PyString_GET_SIZE(self);
3526     cased = 0;
3527     previous_is_cased = 0;
3528     for (; p < e; p++) {
3529         register const unsigned char ch = *p;
3530 
3531         if (isupper(ch)) {
3532             if (previous_is_cased)
3533                 return PyBool_FromLong(0);
3534             previous_is_cased = 1;
3535             cased = 1;
3536         }
3537         else if (islower(ch)) {
3538             if (!previous_is_cased)
3539                 return PyBool_FromLong(0);
3540             previous_is_cased = 1;
3541             cased = 1;
3542         }
3543         else
3544             previous_is_cased = 0;
3545     }
3546     return PyBool_FromLong(cased);
3547 }
3548 
3549 
3550 PyDoc_STRVAR(splitlines__doc__,
3551 "S.splitlines(keepends=False) -> list of strings\n\
3552 \n\
3553 Return a list of the lines in S, breaking at line boundaries.\n\
3554 Line breaks are not included in the resulting list unless keepends\n\
3555 is given and true.");
3556 
3557 static PyObject*
string_splitlines(PyStringObject * self,PyObject * args)3558 string_splitlines(PyStringObject *self, PyObject *args)
3559 {
3560     int keepends = 0;
3561 
3562     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3563         return NULL;
3564 
3565     return stringlib_splitlines(
3566         (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3567         keepends
3568     );
3569 }
3570 
3571 PyDoc_STRVAR(sizeof__doc__,
3572 "S.__sizeof__() -> size of S in memory, in bytes");
3573 
3574 static PyObject *
string_sizeof(PyStringObject * v)3575 string_sizeof(PyStringObject *v)
3576 {
3577     Py_ssize_t res;
3578     res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3579     return PyInt_FromSsize_t(res);
3580 }
3581 
3582 static PyObject *
string_getnewargs(PyStringObject * v)3583 string_getnewargs(PyStringObject *v)
3584 {
3585     return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3586 }
3587 
3588 
3589 #include "stringlib/string_format.h"
3590 
3591 PyDoc_STRVAR(format__doc__,
3592 "S.format(*args, **kwargs) -> string\n\
3593 \n\
3594 Return a formatted version of S, using substitutions from args and kwargs.\n\
3595 The substitutions are identified by braces ('{' and '}').");
3596 
3597 static PyObject *
string__format__(PyObject * self,PyObject * args)3598 string__format__(PyObject* self, PyObject* args)
3599 {
3600     PyObject *format_spec;
3601     PyObject *result = NULL;
3602     PyObject *tmp = NULL;
3603 
3604     /* If 2.x, convert format_spec to the same type as value */
3605     /* This is to allow things like u''.format('') */
3606     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3607         goto done;
3608     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3609         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3610                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3611         goto done;
3612     }
3613     tmp = PyObject_Str(format_spec);
3614     if (tmp == NULL)
3615         goto done;
3616     format_spec = tmp;
3617 
3618     result = _PyBytes_FormatAdvanced(self,
3619                                      PyString_AS_STRING(format_spec),
3620                                      PyString_GET_SIZE(format_spec));
3621 done:
3622     Py_XDECREF(tmp);
3623     return result;
3624 }
3625 
3626 PyDoc_STRVAR(p_format__doc__,
3627 "S.__format__(format_spec) -> string\n\
3628 \n\
3629 Return a formatted version of S as described by format_spec.");
3630 
3631 
3632 static PyMethodDef
3633 string_methods[] = {
3634     /* Counterparts of the obsolete stropmodule functions; except
3635        string.maketrans(). */
3636     {"join", (PyCFunction)string_join, METH_O, join__doc__},
3637     {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3638     {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3639     {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3640     {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3641     {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3642     {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3643     {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3644     {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3645     {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3646     {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3647     {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3648     {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3649      capitalize__doc__},
3650     {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3651     {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3652      endswith__doc__},
3653     {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3654     {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3655     {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3656     {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3657     {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3658     {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3659     {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3660     {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3661     {"rpartition", (PyCFunction)string_rpartition, METH_O,
3662      rpartition__doc__},
3663     {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3664      startswith__doc__},
3665     {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3666     {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3667      swapcase__doc__},
3668     {"translate", (PyCFunction)string_translate, METH_VARARGS,
3669      translate__doc__},
3670     {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3671     {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3672     {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3673     {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3674     {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3675     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3676     {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3677     {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3678     {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3679     {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3680     {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3681     {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3682      expandtabs__doc__},
3683     {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3684      splitlines__doc__},
3685     {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3686      sizeof__doc__},
3687     {"__getnewargs__",          (PyCFunction)string_getnewargs, METH_NOARGS},
3688     {NULL,     NULL}                         /* sentinel */
3689 };
3690 
3691 static PyObject *
3692 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3693 
3694 static PyObject *
string_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3695 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3696 {
3697     PyObject *x = NULL;
3698     static char *kwlist[] = {"object", 0};
3699 
3700     if (type != &PyString_Type)
3701         return str_subtype_new(type, args, kwds);
3702     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3703         return NULL;
3704     if (x == NULL)
3705         return PyString_FromString("");
3706     return PyObject_Str(x);
3707 }
3708 
3709 static PyObject *
str_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3710 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3711 {
3712     PyObject *tmp, *pnew;
3713     Py_ssize_t n;
3714 
3715     assert(PyType_IsSubtype(type, &PyString_Type));
3716     tmp = string_new(&PyString_Type, args, kwds);
3717     if (tmp == NULL)
3718         return NULL;
3719     assert(PyString_Check(tmp));
3720     n = PyString_GET_SIZE(tmp);
3721     pnew = type->tp_alloc(type, n);
3722     if (pnew != NULL) {
3723         Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3724         ((PyStringObject *)pnew)->ob_shash =
3725             ((PyStringObject *)tmp)->ob_shash;
3726         ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3727     }
3728     Py_DECREF(tmp);
3729     return pnew;
3730 }
3731 
3732 static PyObject *
basestring_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3733 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3734 {
3735     PyErr_SetString(PyExc_TypeError,
3736                     "The basestring type cannot be instantiated");
3737     return NULL;
3738 }
3739 
3740 static PyObject *
string_mod(PyObject * v,PyObject * w)3741 string_mod(PyObject *v, PyObject *w)
3742 {
3743     if (!PyString_Check(v)) {
3744         Py_INCREF(Py_NotImplemented);
3745         return Py_NotImplemented;
3746     }
3747     return PyString_Format(v, w);
3748 }
3749 
3750 PyDoc_STRVAR(basestring_doc,
3751 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3752 
3753 static PyNumberMethods string_as_number = {
3754     0,                          /*nb_add*/
3755     0,                          /*nb_subtract*/
3756     0,                          /*nb_multiply*/
3757     0,                          /*nb_divide*/
3758     string_mod,                 /*nb_remainder*/
3759 };
3760 
3761 
3762 PyTypeObject PyBaseString_Type = {
3763     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3764     "basestring",
3765     0,
3766     0,
3767     0,                                          /* tp_dealloc */
3768     0,                                          /* tp_print */
3769     0,                                          /* tp_getattr */
3770     0,                                          /* tp_setattr */
3771     0,                                          /* tp_compare */
3772     0,                                          /* tp_repr */
3773     0,                                          /* tp_as_number */
3774     0,                                          /* tp_as_sequence */
3775     0,                                          /* tp_as_mapping */
3776     0,                                          /* tp_hash */
3777     0,                                          /* tp_call */
3778     0,                                          /* tp_str */
3779     0,                                          /* tp_getattro */
3780     0,                                          /* tp_setattro */
3781     0,                                          /* tp_as_buffer */
3782     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3783     basestring_doc,                             /* tp_doc */
3784     0,                                          /* tp_traverse */
3785     0,                                          /* tp_clear */
3786     0,                                          /* tp_richcompare */
3787     0,                                          /* tp_weaklistoffset */
3788     0,                                          /* tp_iter */
3789     0,                                          /* tp_iternext */
3790     0,                                          /* tp_methods */
3791     0,                                          /* tp_members */
3792     0,                                          /* tp_getset */
3793     &PyBaseObject_Type,                         /* tp_base */
3794     0,                                          /* tp_dict */
3795     0,                                          /* tp_descr_get */
3796     0,                                          /* tp_descr_set */
3797     0,                                          /* tp_dictoffset */
3798     0,                                          /* tp_init */
3799     0,                                          /* tp_alloc */
3800     basestring_new,                             /* tp_new */
3801     0,                                          /* tp_free */
3802 };
3803 
3804 PyDoc_STRVAR(string_doc,
3805 "str(object='') -> string\n\
3806 \n\
3807 Return a nice string representation of the object.\n\
3808 If the argument is a string, the return value is the same object.");
3809 
3810 PyTypeObject PyString_Type = {
3811     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3812     "str",
3813     PyStringObject_SIZE,
3814     sizeof(char),
3815     string_dealloc,                             /* tp_dealloc */
3816     (printfunc)string_print,                    /* tp_print */
3817     0,                                          /* tp_getattr */
3818     0,                                          /* tp_setattr */
3819     0,                                          /* tp_compare */
3820     string_repr,                                /* tp_repr */
3821     &string_as_number,                          /* tp_as_number */
3822     &string_as_sequence,                        /* tp_as_sequence */
3823     &string_as_mapping,                         /* tp_as_mapping */
3824     (hashfunc)string_hash,                      /* tp_hash */
3825     0,                                          /* tp_call */
3826     string_str,                                 /* tp_str */
3827     PyObject_GenericGetAttr,                    /* tp_getattro */
3828     0,                                          /* tp_setattro */
3829     &string_as_buffer,                          /* tp_as_buffer */
3830     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3831         Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3832         Py_TPFLAGS_HAVE_NEWBUFFER,              /* tp_flags */
3833     string_doc,                                 /* tp_doc */
3834     0,                                          /* tp_traverse */
3835     0,                                          /* tp_clear */
3836     (richcmpfunc)string_richcompare,            /* tp_richcompare */
3837     0,                                          /* tp_weaklistoffset */
3838     0,                                          /* tp_iter */
3839     0,                                          /* tp_iternext */
3840     string_methods,                             /* tp_methods */
3841     0,                                          /* tp_members */
3842     0,                                          /* tp_getset */
3843     &PyBaseString_Type,                         /* tp_base */
3844     0,                                          /* tp_dict */
3845     0,                                          /* tp_descr_get */
3846     0,                                          /* tp_descr_set */
3847     0,                                          /* tp_dictoffset */
3848     0,                                          /* tp_init */
3849     0,                                          /* tp_alloc */
3850     string_new,                                 /* tp_new */
3851     PyObject_Del,                               /* tp_free */
3852 };
3853 
3854 void
PyString_Concat(register PyObject ** pv,register PyObject * w)3855 PyString_Concat(register PyObject **pv, register PyObject *w)
3856 {
3857     register PyObject *v;
3858     if (*pv == NULL)
3859         return;
3860     if (w == NULL || !PyString_Check(*pv)) {
3861         Py_CLEAR(*pv);
3862         return;
3863     }
3864     v = string_concat((PyStringObject *) *pv, w);
3865     Py_SETREF(*pv, v);
3866 }
3867 
3868 void
PyString_ConcatAndDel(register PyObject ** pv,register PyObject * w)3869 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3870 {
3871     PyString_Concat(pv, w);
3872     Py_XDECREF(w);
3873 }
3874 
3875 
3876 /* The following function breaks the notion that strings are immutable:
3877    it changes the size of a string.  We get away with this only if there
3878    is only one module referencing the object.  You can also think of it
3879    as creating a new string object and destroying the old one, only
3880    more efficiently.  In any case, don't use this if the string may
3881    already be known to some other part of the code...
3882    Note that if there's not enough memory to resize the string, the original
3883    string object at *pv is deallocated, *pv is set to NULL, an "out of
3884    memory" exception is set, and -1 is returned.  Else (on success) 0 is
3885    returned, and the value in *pv may or may not be the same as on input.
3886    As always, an extra byte is allocated for a trailing \0 byte (newsize
3887    does *not* include that), and a trailing \0 byte is stored.
3888 */
3889 
3890 int
_PyString_Resize(PyObject ** pv,Py_ssize_t newsize)3891 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3892 {
3893     register PyObject *v;
3894     register PyStringObject *sv;
3895     v = *pv;
3896     if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3897         PyString_CHECK_INTERNED(v)) {
3898         *pv = 0;
3899         Py_DECREF(v);
3900         PyErr_BadInternalCall();
3901         return -1;
3902     }
3903     /* XXX UNREF/NEWREF interface should be more symmetrical */
3904     _Py_DEC_REFTOTAL;
3905     _Py_ForgetReference(v);
3906     *pv = (PyObject *)
3907         PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3908     if (*pv == NULL) {
3909         PyObject_Del(v);
3910         PyErr_NoMemory();
3911         return -1;
3912     }
3913     _Py_NewReference(*pv);
3914     sv = (PyStringObject *) *pv;
3915     Py_SIZE(sv) = newsize;
3916     sv->ob_sval[newsize] = '\0';
3917     sv->ob_shash = -1;          /* invalidate cached hash value */
3918     return 0;
3919 }
3920 
3921 /* Helpers for formatstring */
3922 
3923 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)3924 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3925 {
3926     Py_ssize_t argidx = *p_argidx;
3927     if (argidx < arglen) {
3928         (*p_argidx)++;
3929         if (arglen < 0)
3930             return args;
3931         else
3932             return PyTuple_GetItem(args, argidx);
3933     }
3934     PyErr_SetString(PyExc_TypeError,
3935                     "not enough arguments for format string");
3936     return NULL;
3937 }
3938 
3939 /* Format codes
3940  * F_LJUST      '-'
3941  * F_SIGN       '+'
3942  * F_BLANK      ' '
3943  * F_ALT        '#'
3944  * F_ZERO       '0'
3945  */
3946 #define F_LJUST (1<<0)
3947 #define F_SIGN  (1<<1)
3948 #define F_BLANK (1<<2)
3949 #define F_ALT   (1<<3)
3950 #define F_ZERO  (1<<4)
3951 
3952 /* Returns a new reference to a PyString object, or NULL on failure. */
3953 
3954 static PyObject *
formatfloat(PyObject * v,int flags,int prec,int type)3955 formatfloat(PyObject *v, int flags, int prec, int type)
3956 {
3957     char *p;
3958     PyObject *result;
3959     double x;
3960 
3961     x = PyFloat_AsDouble(v);
3962     if (x == -1.0 && PyErr_Occurred()) {
3963         PyErr_Format(PyExc_TypeError, "float argument required, "
3964                      "not %.200s", Py_TYPE(v)->tp_name);
3965         return NULL;
3966     }
3967 
3968     if (prec < 0)
3969         prec = 6;
3970 
3971     p = PyOS_double_to_string(x, type, prec,
3972                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3973 
3974     if (p == NULL)
3975         return NULL;
3976     result = PyString_FromStringAndSize(p, strlen(p));
3977     PyMem_Free(p);
3978     return result;
3979 }
3980 
3981 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3982  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
3983  * Python's regular ints.
3984  * Return value:  a new PyString*, or NULL if error.
3985  *  .  *pbuf is set to point into it,
3986  *     *plen set to the # of chars following that.
3987  *     Caller must decref it when done using pbuf.
3988  *     The string starting at *pbuf is of the form
3989  *         "-"? ("0x" | "0X")? digit+
3990  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
3991  *         set in flags.  The case of hex digits will be correct,
3992  *     There will be at least prec digits, zero-filled on the left if
3993  *         necessary to get that many.
3994  * val          object to be converted
3995  * flags        bitmask of format flags; only F_ALT is looked at
3996  * prec         minimum number of digits; 0-fill on left if needed
3997  * type         a character in [duoxX]; u acts the same as d
3998  *
3999  * CAUTION:  o, x and X conversions on regular ints can never
4000  * produce a '-' sign, but can for Python's unbounded ints.
4001  */
4002 PyObject*
_PyString_FormatLong(PyObject * val,int flags,int prec,int type,char ** pbuf,int * plen)4003 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4004                      char **pbuf, int *plen)
4005 {
4006     PyObject *result = NULL, *r1;
4007     const char *s;
4008     char *buf;
4009     Py_ssize_t i;
4010     int sign;           /* 1 if '-', else 0 */
4011     int len;            /* number of characters */
4012     Py_ssize_t llen;
4013     int numdigits;      /* len == numnondigits + skipped + numdigits */
4014     int numnondigits, skipped, filled;
4015     const char *method;
4016 
4017     switch (type) {
4018     case 'd':
4019     case 'u':
4020         method = "str";
4021         result = Py_TYPE(val)->tp_str(val);
4022         break;
4023     case 'o':
4024         method = "oct";
4025         result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4026         break;
4027     case 'x':
4028     case 'X':
4029         method = "hex";
4030         result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4031         break;
4032     default:
4033         assert(!"'type' not in [duoxX]");
4034     }
4035     if (!result)
4036         return NULL;
4037 
4038     if (PyString_AsStringAndSize(result, (char **)&s, &llen) < 0) {
4039         Py_DECREF(result);
4040         return NULL;
4041     }
4042     if (llen > INT_MAX) {
4043         PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4044         Py_DECREF(result);
4045         return NULL;
4046     }
4047     len = (int)llen;
4048     if (len > 0 && s[len-1] == 'L') {
4049         --len;
4050         if (len == 0)
4051             goto error;
4052     }
4053     sign = s[0] == '-';
4054     numnondigits = sign;
4055 
4056     /* Need to skip 0x, 0X or 0. */
4057     skipped = 0;
4058     switch (type) {
4059     case 'o':
4060         if (s[sign] != '0')
4061             goto error;
4062         /* If 0 is only digit, leave it alone. */
4063         if ((flags & F_ALT) == 0 && len - sign > 1)
4064             skipped = 1;
4065         break;
4066     case 'x':
4067     case 'X':
4068         if (s[sign] != '0' || (s[sign + 1] != 'x' && s[sign + 1] != 'X'))
4069             goto error;
4070         if ((flags & F_ALT) == 0)
4071             skipped = 2;
4072         else
4073             numnondigits += 2;
4074         break;
4075     }
4076     numdigits = len - numnondigits - skipped;
4077     if (numdigits <= 0)
4078         goto error;
4079 
4080     filled = prec - numdigits;
4081     if (filled < 0)
4082         filled = 0;
4083     len = numnondigits + filled + numdigits;
4084 
4085     /* To modify the string in-place, there can only be one reference. */
4086     if (skipped >= filled &&
4087         PyString_CheckExact(result) &&
4088         Py_REFCNT(result) == 1 &&
4089         !PyString_CHECK_INTERNED(result))
4090     {
4091         r1 = NULL;
4092         buf = (char *)s + skipped - filled;
4093     }
4094     else {
4095         r1 = result;
4096         result = PyString_FromStringAndSize(NULL, len);
4097         if (!result) {
4098             Py_DECREF(r1);
4099             return NULL;
4100         }
4101         buf = PyString_AS_STRING(result);
4102     }
4103 
4104     for (i = numnondigits; --i >= 0;)
4105         buf[i] = s[i];
4106     buf += numnondigits;
4107     s += numnondigits + skipped;
4108     for (i = 0; i < filled; i++)
4109         *buf++ = '0';
4110     if (r1 == NULL) {
4111         assert(buf == s);
4112         buf += numdigits;
4113     }
4114     else {
4115         for (i = 0; i < numdigits; i++)
4116             *buf++ = *s++;
4117     }
4118     *buf = '\0';
4119     buf -= len;
4120     Py_XDECREF(r1);
4121 
4122     /* Fix up case for hex conversions. */
4123     if (type == 'X') {
4124         /* Need to convert all lower case letters to upper case.
4125            and need to convert 0x to 0X (and -0x to -0X). */
4126         for (i = 0; i < len; i++) {
4127             if (buf[i] >= 'a' && buf[i] <= 'z')
4128                 buf[i] -= 'a'-'A';
4129         }
4130     }
4131     *pbuf = buf;
4132     *plen = len;
4133     return result;
4134 
4135 error:
4136     PyErr_Format(PyExc_ValueError,
4137                  "%%%c format: invalid result of __%s__ (type=%.200s)",
4138                  type, method, Py_TYPE(val)->tp_name);
4139     Py_DECREF(result);
4140     return NULL;
4141 }
4142 
4143 Py_LOCAL_INLINE(int)
formatint(char * buf,size_t buflen,int flags,int prec,int type,PyObject * v)4144 formatint(char *buf, size_t buflen, int flags,
4145           int prec, int type, PyObject *v)
4146 {
4147     /* fmt = '%#.' + `prec` + 'l' + `type`
4148        worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4149        + 1 + 1 = 24 */
4150     char fmt[64];       /* plenty big enough! */
4151     char *sign;
4152     long x;
4153 
4154     x = PyInt_AsLong(v);
4155     if (x == -1 && PyErr_Occurred()) {
4156         PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4157                      Py_TYPE(v)->tp_name);
4158         return -1;
4159     }
4160     if (x < 0 && type == 'u') {
4161         type = 'd';
4162     }
4163     if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4164         sign = "-";
4165     else
4166         sign = "";
4167     if (prec < 0)
4168         prec = 1;
4169 
4170     if ((flags & F_ALT) &&
4171         (type == 'x' || type == 'X')) {
4172         /* When converting under %#x or %#X, there are a number
4173          * of issues that cause pain:
4174          * - when 0 is being converted, the C standard leaves off
4175          *   the '0x' or '0X', which is inconsistent with other
4176          *   %#x/%#X conversions and inconsistent with Python's
4177          *   hex() function
4178          * - there are platforms that violate the standard and
4179          *   convert 0 with the '0x' or '0X'
4180          *   (Metrowerks, Compaq Tru64)
4181          * - there are platforms that give '0x' when converting
4182          *   under %#X, but convert 0 in accordance with the
4183          *   standard (OS/2 EMX)
4184          *
4185          * We can achieve the desired consistency by inserting our
4186          * own '0x' or '0X' prefix, and substituting %x/%X in place
4187          * of %#x/%#X.
4188          *
4189          * Note that this is the same approach as used in
4190          * formatint() in unicodeobject.c
4191          */
4192         PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4193                       sign, type, prec, type);
4194     }
4195     else {
4196         PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4197                       sign, (flags&F_ALT) ? "#" : "",
4198                       prec, type);
4199     }
4200 
4201     /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4202      * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4203      */
4204     if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4205         PyErr_SetString(PyExc_OverflowError,
4206             "formatted integer is too long (precision too large?)");
4207         return -1;
4208     }
4209     if (sign[0])
4210         PyOS_snprintf(buf, buflen, fmt, -x);
4211     else
4212         PyOS_snprintf(buf, buflen, fmt, x);
4213     return (int)strlen(buf);
4214 }
4215 
4216 Py_LOCAL_INLINE(int)
formatchar(char * buf,size_t buflen,PyObject * v)4217 formatchar(char *buf, size_t buflen, PyObject *v)
4218 {
4219     /* presume that the buffer is at least 2 characters long */
4220     if (PyString_Check(v)) {
4221         if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4222             return -1;
4223     }
4224     else {
4225         if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4226             return -1;
4227     }
4228     buf[1] = '\0';
4229     return 1;
4230 }
4231 
4232 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4233 
4234    FORMATBUFLEN is the length of the buffer in which the ints &
4235    chars are formatted. XXX This is a magic number. Each formatting
4236    routine does bounds checking to ensure no overflow, but a better
4237    solution may be to malloc a buffer of appropriate size for each
4238    format. For now, the current solution is sufficient.
4239 */
4240 #define FORMATBUFLEN (size_t)120
4241 
4242 PyObject *
PyString_Format(PyObject * format,PyObject * args)4243 PyString_Format(PyObject *format, PyObject *args)
4244 {
4245     char *fmt, *res;
4246     Py_ssize_t arglen, argidx;
4247     Py_ssize_t reslen, rescnt, fmtcnt;
4248     int args_owned = 0;
4249     PyObject *result, *orig_args;
4250 #ifdef Py_USING_UNICODE
4251     PyObject *v, *w;
4252 #endif
4253     PyObject *dict = NULL;
4254     if (format == NULL || !PyString_Check(format) || args == NULL) {
4255         PyErr_BadInternalCall();
4256         return NULL;
4257     }
4258     orig_args = args;
4259     fmt = PyString_AS_STRING(format);
4260     fmtcnt = PyString_GET_SIZE(format);
4261     reslen = rescnt = fmtcnt + 100;
4262     result = PyString_FromStringAndSize((char *)NULL, reslen);
4263     if (result == NULL)
4264         return NULL;
4265     res = PyString_AsString(result);
4266     if (PyTuple_Check(args)) {
4267         arglen = PyTuple_GET_SIZE(args);
4268         argidx = 0;
4269     }
4270     else {
4271         arglen = -1;
4272         argidx = -2;
4273     }
4274     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4275         !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
4276         dict = args;
4277     while (--fmtcnt >= 0) {
4278         if (*fmt != '%') {
4279             if (--rescnt < 0) {
4280                 rescnt = fmtcnt + 100;
4281                 reslen += rescnt;
4282                 if (_PyString_Resize(&result, reslen))
4283                     return NULL;
4284                 res = PyString_AS_STRING(result)
4285                     + reslen - rescnt;
4286                 --rescnt;
4287             }
4288             *res++ = *fmt++;
4289         }
4290         else {
4291             /* Got a format specifier */
4292             int flags = 0;
4293             Py_ssize_t width = -1;
4294             int prec = -1;
4295             int c = '\0';
4296             int fill;
4297             int isnumok;
4298             PyObject *v = NULL;
4299             PyObject *temp = NULL;
4300             char *pbuf;
4301             int sign;
4302             Py_ssize_t len;
4303             char formatbuf[FORMATBUFLEN];
4304                  /* For format{int,char}() */
4305 #ifdef Py_USING_UNICODE
4306             char *fmt_start = fmt;
4307             Py_ssize_t argidx_start = argidx;
4308 #endif
4309 
4310             fmt++;
4311             if (*fmt == '(') {
4312                 char *keystart;
4313                 Py_ssize_t keylen;
4314                 PyObject *key;
4315                 int pcount = 1;
4316 
4317                 if (dict == NULL) {
4318                     PyErr_SetString(PyExc_TypeError,
4319                              "format requires a mapping");
4320                     goto error;
4321                 }
4322                 ++fmt;
4323                 --fmtcnt;
4324                 keystart = fmt;
4325                 /* Skip over balanced parentheses */
4326                 while (pcount > 0 && --fmtcnt >= 0) {
4327                     if (*fmt == ')')
4328                         --pcount;
4329                     else if (*fmt == '(')
4330                         ++pcount;
4331                     fmt++;
4332                 }
4333                 keylen = fmt - keystart - 1;
4334                 if (fmtcnt < 0 || pcount > 0) {
4335                     PyErr_SetString(PyExc_ValueError,
4336                                "incomplete format key");
4337                     goto error;
4338                 }
4339                 key = PyString_FromStringAndSize(keystart,
4340                                                  keylen);
4341                 if (key == NULL)
4342                     goto error;
4343                 if (args_owned) {
4344                     Py_DECREF(args);
4345                     args_owned = 0;
4346                 }
4347                 args = PyObject_GetItem(dict, key);
4348                 Py_DECREF(key);
4349                 if (args == NULL) {
4350                     goto error;
4351                 }
4352                 args_owned = 1;
4353                 arglen = -1;
4354                 argidx = -2;
4355             }
4356             while (--fmtcnt >= 0) {
4357                 switch (c = *fmt++) {
4358                 case '-': flags |= F_LJUST; continue;
4359                 case '+': flags |= F_SIGN; continue;
4360                 case ' ': flags |= F_BLANK; continue;
4361                 case '#': flags |= F_ALT; continue;
4362                 case '0': flags |= F_ZERO; continue;
4363                 }
4364                 break;
4365             }
4366             if (c == '*') {
4367                 v = getnextarg(args, arglen, &argidx);
4368                 if (v == NULL)
4369                     goto error;
4370                 if (!PyInt_Check(v)) {
4371                     PyErr_SetString(PyExc_TypeError,
4372                                     "* wants int");
4373                     goto error;
4374                 }
4375                 width = PyInt_AsSsize_t(v);
4376                 if (width == -1 && PyErr_Occurred())
4377                     goto error;
4378                 if (width < 0) {
4379                     flags |= F_LJUST;
4380                     width = -width;
4381                 }
4382                 if (--fmtcnt >= 0)
4383                     c = *fmt++;
4384             }
4385             else if (c >= 0 && isdigit(c)) {
4386                 width = c - '0';
4387                 while (--fmtcnt >= 0) {
4388                     c = Py_CHARMASK(*fmt++);
4389                     if (!isdigit(c))
4390                         break;
4391                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
4392                         PyErr_SetString(
4393                             PyExc_ValueError,
4394                             "width too big");
4395                         goto error;
4396                     }
4397                     width = width*10 + (c - '0');
4398                 }
4399             }
4400             if (c == '.') {
4401                 prec = 0;
4402                 if (--fmtcnt >= 0)
4403                     c = *fmt++;
4404                 if (c == '*') {
4405                     v = getnextarg(args, arglen, &argidx);
4406                     if (v == NULL)
4407                         goto error;
4408                     if (!PyInt_Check(v)) {
4409                         PyErr_SetString(
4410                             PyExc_TypeError,
4411                             "* wants int");
4412                         goto error;
4413                     }
4414                     prec = _PyInt_AsInt(v);
4415                     if (prec == -1 && PyErr_Occurred())
4416                         goto error;
4417                     if (prec < 0)
4418                         prec = 0;
4419                     if (--fmtcnt >= 0)
4420                         c = *fmt++;
4421                 }
4422                 else if (c >= 0 && isdigit(c)) {
4423                     prec = c - '0';
4424                     while (--fmtcnt >= 0) {
4425                         c = Py_CHARMASK(*fmt++);
4426                         if (!isdigit(c))
4427                             break;
4428                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
4429                             PyErr_SetString(
4430                                 PyExc_ValueError,
4431                                 "prec too big");
4432                             goto error;
4433                         }
4434                         prec = prec*10 + (c - '0');
4435                     }
4436                 }
4437             } /* prec */
4438             if (fmtcnt >= 0) {
4439                 if (c == 'h' || c == 'l' || c == 'L') {
4440                     if (--fmtcnt >= 0)
4441                         c = *fmt++;
4442                 }
4443             }
4444             if (fmtcnt < 0) {
4445                 PyErr_SetString(PyExc_ValueError,
4446                                 "incomplete format");
4447                 goto error;
4448             }
4449             if (c != '%') {
4450                 v = getnextarg(args, arglen, &argidx);
4451                 if (v == NULL)
4452                     goto error;
4453             }
4454             sign = 0;
4455             fill = ' ';
4456             switch (c) {
4457             case '%':
4458                 pbuf = "%";
4459                 len = 1;
4460                 break;
4461             case 's':
4462 #ifdef Py_USING_UNICODE
4463                 if (PyUnicode_Check(v)) {
4464                     fmt = fmt_start;
4465                     argidx = argidx_start;
4466                     goto unicode;
4467                 }
4468 #endif
4469                 temp = _PyObject_Str(v);
4470 #ifdef Py_USING_UNICODE
4471                 if (temp != NULL && PyUnicode_Check(temp)) {
4472                     Py_DECREF(temp);
4473                     fmt = fmt_start;
4474                     argidx = argidx_start;
4475                     goto unicode;
4476                 }
4477 #endif
4478                 /* Fall through */
4479             case 'r':
4480                 if (c == 'r')
4481                     temp = PyObject_Repr(v);
4482                 if (temp == NULL)
4483                     goto error;
4484                 if (!PyString_Check(temp)) {
4485                     PyErr_SetString(PyExc_TypeError,
4486                       "%s argument has non-string str()");
4487                     Py_DECREF(temp);
4488                     goto error;
4489                 }
4490                 pbuf = PyString_AS_STRING(temp);
4491                 len = PyString_GET_SIZE(temp);
4492                 if (prec >= 0 && len > prec)
4493                     len = prec;
4494                 break;
4495             case 'i':
4496             case 'd':
4497             case 'u':
4498             case 'o':
4499             case 'x':
4500             case 'X':
4501                 if (c == 'i')
4502                     c = 'd';
4503                 isnumok = 0;
4504                 if (PyNumber_Check(v)) {
4505                     PyObject *iobj=NULL;
4506 
4507                     if (_PyAnyInt_Check(v)) {
4508                         iobj = v;
4509                         Py_INCREF(iobj);
4510                     }
4511                     else {
4512                         iobj = PyNumber_Int(v);
4513                         if (iobj==NULL) {
4514                             PyErr_Clear();
4515                             iobj = PyNumber_Long(v);
4516                         }
4517                     }
4518                     if (iobj!=NULL) {
4519                         if (PyInt_Check(iobj)) {
4520                             isnumok = 1;
4521                             pbuf = formatbuf;
4522                             len = formatint(pbuf,
4523                                             sizeof(formatbuf),
4524                                             flags, prec, c, iobj);
4525                             Py_DECREF(iobj);
4526                             if (len < 0)
4527                                 goto error;
4528                             sign = 1;
4529                         }
4530                         else if (PyLong_Check(iobj)) {
4531                             int ilen;
4532 
4533                             isnumok = 1;
4534                             temp = _PyString_FormatLong(iobj, flags,
4535                                 prec, c, &pbuf, &ilen);
4536                             Py_DECREF(iobj);
4537                             len = ilen;
4538                             if (!temp)
4539                                 goto error;
4540                             sign = 1;
4541                         }
4542                         else {
4543                             Py_DECREF(iobj);
4544                         }
4545                     }
4546                 }
4547                 if (!isnumok) {
4548                     PyErr_Format(PyExc_TypeError,
4549                         "%%%c format: a number is required, "
4550                         "not %.200s", c, Py_TYPE(v)->tp_name);
4551                     goto error;
4552                 }
4553                 if (flags & F_ZERO)
4554                     fill = '0';
4555                 break;
4556             case 'e':
4557             case 'E':
4558             case 'f':
4559             case 'F':
4560             case 'g':
4561             case 'G':
4562                 temp = formatfloat(v, flags, prec, c);
4563                 if (temp == NULL)
4564                     goto error;
4565                 pbuf = PyString_AS_STRING(temp);
4566                 len = PyString_GET_SIZE(temp);
4567                 sign = 1;
4568                 if (flags & F_ZERO)
4569                     fill = '0';
4570                 break;
4571             case 'c':
4572 #ifdef Py_USING_UNICODE
4573                 if (PyUnicode_Check(v)) {
4574                     fmt = fmt_start;
4575                     argidx = argidx_start;
4576                     goto unicode;
4577                 }
4578 #endif
4579                 pbuf = formatbuf;
4580                 len = formatchar(pbuf, sizeof(formatbuf), v);
4581                 if (len < 0)
4582                     goto error;
4583                 break;
4584             default:
4585                 PyErr_Format(PyExc_ValueError,
4586                   "unsupported format character '%c' (0x%x) "
4587                   "at index %zd",
4588                   c, c,
4589                   (Py_ssize_t)(fmt - 1 -
4590                                PyString_AsString(format)));
4591                 goto error;
4592             }
4593             if (sign) {
4594                 if (*pbuf == '-' || *pbuf == '+') {
4595                     sign = *pbuf++;
4596                     len--;
4597                 }
4598                 else if (flags & F_SIGN)
4599                     sign = '+';
4600                 else if (flags & F_BLANK)
4601                     sign = ' ';
4602                 else
4603                     sign = 0;
4604             }
4605             if (width < len)
4606                 width = len;
4607             if (rescnt - (sign != 0) < width) {
4608                 reslen -= rescnt;
4609                 rescnt = width + fmtcnt + 100;
4610                 reslen += rescnt;
4611                 if (reslen < 0) {
4612                     Py_DECREF(result);
4613                     Py_XDECREF(temp);
4614                     return PyErr_NoMemory();
4615                 }
4616                 if (_PyString_Resize(&result, reslen)) {
4617                     Py_XDECREF(temp);
4618                     return NULL;
4619                 }
4620                 res = PyString_AS_STRING(result)
4621                     + reslen - rescnt;
4622             }
4623             if (sign) {
4624                 if (fill != ' ')
4625                     *res++ = sign;
4626                 rescnt--;
4627                 if (width > len)
4628                     width--;
4629             }
4630             if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4631                 assert(pbuf[0] == '0');
4632                 assert(pbuf[1] == c);
4633                 if (fill != ' ') {
4634                     *res++ = *pbuf++;
4635                     *res++ = *pbuf++;
4636                 }
4637                 rescnt -= 2;
4638                 width -= 2;
4639                 if (width < 0)
4640                     width = 0;
4641                 len -= 2;
4642             }
4643             if (width > len && !(flags & F_LJUST)) {
4644                 do {
4645                     --rescnt;
4646                     *res++ = fill;
4647                 } while (--width > len);
4648             }
4649             if (fill == ' ') {
4650                 if (sign)
4651                     *res++ = sign;
4652                 if ((flags & F_ALT) &&
4653                     (c == 'x' || c == 'X')) {
4654                     assert(pbuf[0] == '0');
4655                     assert(pbuf[1] == c);
4656                     *res++ = *pbuf++;
4657                     *res++ = *pbuf++;
4658                 }
4659             }
4660             Py_MEMCPY(res, pbuf, len);
4661             res += len;
4662             rescnt -= len;
4663             while (--width >= len) {
4664                 --rescnt;
4665                 *res++ = ' ';
4666             }
4667             if (dict && (argidx < arglen) && c != '%') {
4668                 PyErr_SetString(PyExc_TypeError,
4669                            "not all arguments converted during string formatting");
4670                 Py_XDECREF(temp);
4671                 goto error;
4672             }
4673             Py_XDECREF(temp);
4674         } /* '%' */
4675     } /* until end */
4676     if (argidx < arglen && !dict) {
4677         PyErr_SetString(PyExc_TypeError,
4678                         "not all arguments converted during string formatting");
4679         goto error;
4680     }
4681     if (args_owned) {
4682         Py_DECREF(args);
4683     }
4684     if (_PyString_Resize(&result, reslen - rescnt))
4685         return NULL;
4686     return result;
4687 
4688 #ifdef Py_USING_UNICODE
4689  unicode:
4690     if (args_owned) {
4691         Py_DECREF(args);
4692         args_owned = 0;
4693     }
4694     /* Fiddle args right (remove the first argidx arguments) */
4695     if (PyTuple_Check(orig_args) && argidx > 0) {
4696         PyObject *v;
4697         Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4698         v = PyTuple_New(n);
4699         if (v == NULL)
4700             goto error;
4701         while (--n >= 0) {
4702             PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4703             Py_INCREF(w);
4704             PyTuple_SET_ITEM(v, n, w);
4705         }
4706         args = v;
4707     } else {
4708         Py_INCREF(orig_args);
4709         args = orig_args;
4710     }
4711     args_owned = 1;
4712     /* Take what we have of the result and let the Unicode formatting
4713        function format the rest of the input. */
4714     rescnt = res - PyString_AS_STRING(result);
4715     if (_PyString_Resize(&result, rescnt))
4716         goto error;
4717     fmtcnt = PyString_GET_SIZE(format) - \
4718              (fmt - PyString_AS_STRING(format));
4719     format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4720     if (format == NULL)
4721         goto error;
4722     v = PyUnicode_Format(format, args);
4723     Py_DECREF(format);
4724     if (v == NULL)
4725         goto error;
4726     /* Paste what we have (result) to what the Unicode formatting
4727        function returned (v) and return the result (or error) */
4728     w = PyUnicode_Concat(result, v);
4729     Py_DECREF(result);
4730     Py_DECREF(v);
4731     Py_DECREF(args);
4732     return w;
4733 #endif /* Py_USING_UNICODE */
4734 
4735  error:
4736     Py_DECREF(result);
4737     if (args_owned) {
4738         Py_DECREF(args);
4739     }
4740     return NULL;
4741 }
4742 
4743 void
PyString_InternInPlace(PyObject ** p)4744 PyString_InternInPlace(PyObject **p)
4745 {
4746     register PyStringObject *s = (PyStringObject *)(*p);
4747     PyObject *t;
4748     if (s == NULL || !PyString_Check(s))
4749         Py_FatalError("PyString_InternInPlace: strings only please!");
4750     /* If it's a string subclass, we don't really know what putting
4751        it in the interned dict might do. */
4752     if (!PyString_CheckExact(s))
4753         return;
4754     if (PyString_CHECK_INTERNED(s))
4755         return;
4756     if (interned == NULL) {
4757         interned = PyDict_New();
4758         if (interned == NULL) {
4759             PyErr_Clear(); /* Don't leave an exception */
4760             return;
4761         }
4762     }
4763     t = PyDict_GetItem(interned, (PyObject *)s);
4764     if (t) {
4765         Py_INCREF(t);
4766         Py_SETREF(*p, t);
4767         return;
4768     }
4769 
4770     if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4771         PyErr_Clear();
4772         return;
4773     }
4774     /* The two references in interned are not counted by refcnt.
4775        The string deallocator will take care of this */
4776     Py_REFCNT(s) -= 2;
4777     PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4778 }
4779 
4780 void
PyString_InternImmortal(PyObject ** p)4781 PyString_InternImmortal(PyObject **p)
4782 {
4783     PyString_InternInPlace(p);
4784     if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4785         PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4786         Py_INCREF(*p);
4787     }
4788 }
4789 
4790 
4791 PyObject *
PyString_InternFromString(const char * cp)4792 PyString_InternFromString(const char *cp)
4793 {
4794     PyObject *s = PyString_FromString(cp);
4795     if (s == NULL)
4796         return NULL;
4797     PyString_InternInPlace(&s);
4798     return s;
4799 }
4800 
4801 void
PyString_Fini(void)4802 PyString_Fini(void)
4803 {
4804     int i;
4805     for (i = 0; i < UCHAR_MAX + 1; i++)
4806         Py_CLEAR(characters[i]);
4807     Py_CLEAR(nullstring);
4808 }
4809 
_Py_ReleaseInternedStrings(void)4810 void _Py_ReleaseInternedStrings(void)
4811 {
4812     PyObject *keys;
4813     PyStringObject *s;
4814     Py_ssize_t i, n;
4815     Py_ssize_t immortal_size = 0, mortal_size = 0;
4816 
4817     if (interned == NULL || !PyDict_Check(interned))
4818         return;
4819     keys = PyDict_Keys(interned);
4820     if (keys == NULL || !PyList_Check(keys)) {
4821         PyErr_Clear();
4822         return;
4823     }
4824 
4825     /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4826        detector, interned strings are not forcibly deallocated; rather, we
4827        give them their stolen references back, and then clear and DECREF
4828        the interned dict. */
4829 
4830     n = PyList_GET_SIZE(keys);
4831     fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4832         n);
4833     for (i = 0; i < n; i++) {
4834         s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4835         switch (s->ob_sstate) {
4836         case SSTATE_NOT_INTERNED:
4837             /* XXX Shouldn't happen */
4838             break;
4839         case SSTATE_INTERNED_IMMORTAL:
4840             Py_REFCNT(s) += 1;
4841             immortal_size += Py_SIZE(s);
4842             break;
4843         case SSTATE_INTERNED_MORTAL:
4844             Py_REFCNT(s) += 2;
4845             mortal_size += Py_SIZE(s);
4846             break;
4847         default:
4848             Py_FatalError("Inconsistent interned string state.");
4849         }
4850         s->ob_sstate = SSTATE_NOT_INTERNED;
4851     }
4852     fprintf(stderr, "total size of all interned strings: "
4853                     "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4854                     "mortal/immortal\n", mortal_size, immortal_size);
4855     Py_DECREF(keys);
4856     PyDict_Clear(interned);
4857     Py_CLEAR(interned);
4858 }
4859