1 /* strop module */
2 
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include <ctype.h>
6 
7 PyDoc_STRVAR(strop_module__doc__,
8 "Common string manipulations, optimized for speed.\n"
9 "\n"
10 "Always use \"import string\" rather than referencing\n"
11 "this module directly.");
12 
13 /* XXX This file assumes that the <ctype.h> is*() functions
14    XXX are defined for all 8-bit characters! */
15 
16 #define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
17                "strop functions are obsolete; use string methods")) \
18          return NULL
19 
20 /* The lstrip(), rstrip() and strip() functions are implemented
21    in do_strip(), which uses an additional parameter to indicate what
22    type of strip should occur. */
23 
24 #define LEFTSTRIP 0
25 #define RIGHTSTRIP 1
26 #define BOTHSTRIP 2
27 
28 
29 static PyObject *
split_whitespace(char * s,Py_ssize_t len,Py_ssize_t maxsplit)30 split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit)
31 {
32     Py_ssize_t i = 0, j;
33     int err;
34     Py_ssize_t countsplit = 0;
35     PyObject* item;
36     PyObject *list = PyList_New(0);
37 
38     if (list == NULL)
39         return NULL;
40 
41     while (i < len) {
42         while (i < len && isspace(Py_CHARMASK(s[i]))) {
43             i = i+1;
44         }
45         j = i;
46         while (i < len && !isspace(Py_CHARMASK(s[i]))) {
47             i = i+1;
48         }
49         if (j < i) {
50             item = PyString_FromStringAndSize(s+j, i-j);
51             if (item == NULL)
52                 goto finally;
53 
54             err = PyList_Append(list, item);
55             Py_DECREF(item);
56             if (err < 0)
57                 goto finally;
58 
59             countsplit++;
60             while (i < len && isspace(Py_CHARMASK(s[i]))) {
61                 i = i+1;
62             }
63             if (maxsplit && (countsplit >= maxsplit) && i < len) {
64                 item = PyString_FromStringAndSize(
65                     s+i, len - i);
66                 if (item == NULL)
67                     goto finally;
68 
69                 err = PyList_Append(list, item);
70                 Py_DECREF(item);
71                 if (err < 0)
72                     goto finally;
73 
74                 i = len;
75             }
76         }
77     }
78     return list;
79   finally:
80     Py_DECREF(list);
81     return NULL;
82 }
83 
84 
85 PyDoc_STRVAR(splitfields__doc__,
86 "split(s [,sep [,maxsplit]]) -> list of strings\n"
87 "splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
88 "\n"
89 "Return a list of the words in the string s, using sep as the\n"
90 "delimiter string.  If maxsplit is nonzero, splits into at most\n"
91 "maxsplit words.  If sep is not specified, any whitespace string\n"
92 "is a separator.  Maxsplit defaults to 0.\n"
93 "\n"
94 "(split and splitfields are synonymous)");
95 
96 static PyObject *
strop_splitfields(PyObject * self,PyObject * args)97 strop_splitfields(PyObject *self, PyObject *args)
98 {
99     Py_ssize_t len, n, i, j, err;
100     Py_ssize_t splitcount, maxsplit;
101     char *s, *sub;
102     PyObject *list, *item;
103 
104     WARN;
105     sub = NULL;
106     n = 0;
107     splitcount = 0;
108     maxsplit = 0;
109     if (!PyArg_ParseTuple(args, "t#|z#n:split", &s, &len, &sub, &n, &maxsplit))
110         return NULL;
111     if (sub == NULL)
112         return split_whitespace(s, len, maxsplit);
113     if (n == 0) {
114         PyErr_SetString(PyExc_ValueError, "empty separator");
115         return NULL;
116     }
117 
118     list = PyList_New(0);
119     if (list == NULL)
120         return NULL;
121 
122     i = j = 0;
123     while (i+n <= len) {
124         if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
125             item = PyString_FromStringAndSize(s+j, i-j);
126             if (item == NULL)
127                 goto fail;
128             err = PyList_Append(list, item);
129             Py_DECREF(item);
130             if (err < 0)
131                 goto fail;
132             i = j = i + n;
133             splitcount++;
134             if (maxsplit && (splitcount >= maxsplit))
135                 break;
136         }
137         else
138             i++;
139     }
140     item = PyString_FromStringAndSize(s+j, len-j);
141     if (item == NULL)
142         goto fail;
143     err = PyList_Append(list, item);
144     Py_DECREF(item);
145     if (err < 0)
146         goto fail;
147 
148     return list;
149 
150  fail:
151     Py_DECREF(list);
152     return NULL;
153 }
154 
155 
156 PyDoc_STRVAR(joinfields__doc__,
157 "join(list [,sep]) -> string\n"
158 "joinfields(list [,sep]) -> string\n"
159 "\n"
160 "Return a string composed of the words in list, with\n"
161 "intervening occurrences of sep.  Sep defaults to a single\n"
162 "space.\n"
163 "\n"
164 "(join and joinfields are synonymous)");
165 
166 static PyObject *
strop_joinfields(PyObject * self,PyObject * args)167 strop_joinfields(PyObject *self, PyObject *args)
168 {
169     PyObject *seq;
170     char *sep = NULL;
171     Py_ssize_t seqlen, seplen = 0;
172     Py_ssize_t i, reslen = 0, slen = 0, sz = 100;
173     PyObject *res = NULL;
174     char* p = NULL;
175     ssizeargfunc getitemfunc;
176 
177     WARN;
178     if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
179         return NULL;
180     if (sep == NULL) {
181         sep = " ";
182         seplen = 1;
183     }
184 
185     seqlen = PySequence_Size(seq);
186     if (seqlen < 0 && PyErr_Occurred())
187         return NULL;
188 
189     if (seqlen == 1) {
190         /* Optimization if there's only one item */
191         PyObject *item = PySequence_GetItem(seq, 0);
192         if (item && !PyString_Check(item)) {
193             PyErr_SetString(PyExc_TypeError,
194                      "first argument must be sequence of strings");
195             Py_DECREF(item);
196             return NULL;
197         }
198         return item;
199     }
200 
201     if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
202         return NULL;
203     p = PyString_AsString(res);
204 
205     /* optimize for lists, since it's the most common case.  all others
206      * (tuples and arbitrary sequences) just use the sequence abstract
207      * interface.
208      */
209     if (PyList_Check(seq)) {
210         for (i = 0; i < seqlen; i++) {
211             PyObject *item = PyList_GET_ITEM(seq, i);
212             if (!PyString_Check(item)) {
213                 PyErr_SetString(PyExc_TypeError,
214                 "first argument must be sequence of strings");
215                 Py_DECREF(res);
216                 return NULL;
217             }
218             slen = PyString_GET_SIZE(item);
219             if (slen > PY_SSIZE_T_MAX - reslen ||
220                 seplen > PY_SSIZE_T_MAX - reslen - seplen) {
221                 PyErr_SetString(PyExc_OverflowError,
222                                 "input too long");
223                 Py_DECREF(res);
224                 return NULL;
225             }
226             while (reslen + slen + seplen >= sz) {
227                 if (_PyString_Resize(&res, sz * 2) < 0)
228                     return NULL;
229                 sz *= 2;
230                 p = PyString_AsString(res) + reslen;
231             }
232             if (i > 0) {
233                 memcpy(p, sep, seplen);
234                 p += seplen;
235                 reslen += seplen;
236             }
237             memcpy(p, PyString_AS_STRING(item), slen);
238             p += slen;
239             reslen += slen;
240         }
241         _PyString_Resize(&res, reslen);
242         return res;
243     }
244 
245     if (seq->ob_type->tp_as_sequence == NULL ||
246              (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
247     {
248         PyErr_SetString(PyExc_TypeError,
249                         "first argument must be a sequence");
250         return NULL;
251     }
252     /* This is now type safe */
253     for (i = 0; i < seqlen; i++) {
254         PyObject *item = getitemfunc(seq, i);
255         if (!item || !PyString_Check(item)) {
256             PyErr_SetString(PyExc_TypeError,
257                      "first argument must be sequence of strings");
258             Py_DECREF(res);
259             Py_XDECREF(item);
260             return NULL;
261         }
262         slen = PyString_GET_SIZE(item);
263         if (slen > PY_SSIZE_T_MAX - reslen ||
264             seplen > PY_SSIZE_T_MAX - reslen - seplen) {
265             PyErr_SetString(PyExc_OverflowError,
266                             "input too long");
267             Py_DECREF(res);
268             Py_XDECREF(item);
269             return NULL;
270         }
271         while (reslen + slen + seplen >= sz) {
272             if (_PyString_Resize(&res, sz * 2) < 0) {
273                 Py_DECREF(item);
274                 return NULL;
275             }
276             sz *= 2;
277             p = PyString_AsString(res) + reslen;
278         }
279         if (i > 0) {
280             memcpy(p, sep, seplen);
281             p += seplen;
282             reslen += seplen;
283         }
284         memcpy(p, PyString_AS_STRING(item), slen);
285         p += slen;
286         reslen += slen;
287         Py_DECREF(item);
288     }
289     _PyString_Resize(&res, reslen);
290     return res;
291 }
292 
293 
294 PyDoc_STRVAR(find__doc__,
295 "find(s, sub [,start [,end]]) -> in\n"
296 "\n"
297 "Return the lowest index in s where substring sub is found,\n"
298 "such that sub is contained within s[start,end].  Optional\n"
299 "arguments start and end are interpreted as in slice notation.\n"
300 "\n"
301 "Return -1 on failure.");
302 
303 static PyObject *
strop_find(PyObject * self,PyObject * args)304 strop_find(PyObject *self, PyObject *args)
305 {
306     char *s, *sub;
307     Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX;
308 
309     WARN;
310     if (!PyArg_ParseTuple(args, "t#t#|nn:find", &s, &len, &sub, &n, &i, &last))
311         return NULL;
312 
313     if (last > len)
314         last = len;
315     if (last < 0)
316         last += len;
317     if (last < 0)
318         last = 0;
319     if (i < 0)
320         i += len;
321     if (i < 0)
322         i = 0;
323 
324     if (n == 0 && i <= last)
325         return PyInt_FromLong((long)i);
326 
327     last -= n;
328     for (; i <= last; ++i)
329         if (s[i] == sub[0] &&
330             (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
331             return PyInt_FromLong((long)i);
332 
333     return PyInt_FromLong(-1L);
334 }
335 
336 
337 PyDoc_STRVAR(rfind__doc__,
338 "rfind(s, sub [,start [,end]]) -> int\n"
339 "\n"
340 "Return the highest index in s where substring sub is found,\n"
341 "such that sub is contained within s[start,end].  Optional\n"
342 "arguments start and end are interpreted as in slice notation.\n"
343 "\n"
344 "Return -1 on failure.");
345 
346 static PyObject *
strop_rfind(PyObject * self,PyObject * args)347 strop_rfind(PyObject *self, PyObject *args)
348 {
349     char *s, *sub;
350     Py_ssize_t len, n, j;
351     Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
352 
353     WARN;
354     if (!PyArg_ParseTuple(args, "t#t#|nn:rfind", &s, &len, &sub, &n, &i, &last))
355         return NULL;
356 
357     if (last > len)
358         last = len;
359     if (last < 0)
360         last += len;
361     if (last < 0)
362         last = 0;
363     if (i < 0)
364         i += len;
365     if (i < 0)
366         i = 0;
367 
368     if (n == 0 && i <= last)
369         return PyInt_FromLong((long)last);
370 
371     for (j = last-n; j >= i; --j)
372         if (s[j] == sub[0] &&
373             (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
374             return PyInt_FromLong((long)j);
375 
376     return PyInt_FromLong(-1L);
377 }
378 
379 
380 static PyObject *
do_strip(PyObject * args,int striptype)381 do_strip(PyObject *args, int striptype)
382 {
383     char *s;
384     Py_ssize_t len, i, j;
385 
386 
387     if (PyString_AsStringAndSize(args, &s, &len))
388         return NULL;
389 
390     i = 0;
391     if (striptype != RIGHTSTRIP) {
392         while (i < len && isspace(Py_CHARMASK(s[i]))) {
393             i++;
394         }
395     }
396 
397     j = len;
398     if (striptype != LEFTSTRIP) {
399         do {
400             j--;
401         } while (j >= i && isspace(Py_CHARMASK(s[j])));
402         j++;
403     }
404 
405     if (i == 0 && j == len) {
406         Py_INCREF(args);
407         return args;
408     }
409     else
410         return PyString_FromStringAndSize(s+i, j-i);
411 }
412 
413 
414 PyDoc_STRVAR(strip__doc__,
415 "strip(s) -> string\n"
416 "\n"
417 "Return a copy of the string s with leading and trailing\n"
418 "whitespace removed.");
419 
420 static PyObject *
strop_strip(PyObject * self,PyObject * args)421 strop_strip(PyObject *self, PyObject *args)
422 {
423     WARN;
424     return do_strip(args, BOTHSTRIP);
425 }
426 
427 
428 PyDoc_STRVAR(lstrip__doc__,
429 "lstrip(s) -> string\n"
430 "\n"
431 "Return a copy of the string s with leading whitespace removed.");
432 
433 static PyObject *
strop_lstrip(PyObject * self,PyObject * args)434 strop_lstrip(PyObject *self, PyObject *args)
435 {
436     WARN;
437     return do_strip(args, LEFTSTRIP);
438 }
439 
440 
441 PyDoc_STRVAR(rstrip__doc__,
442 "rstrip(s) -> string\n"
443 "\n"
444 "Return a copy of the string s with trailing whitespace removed.");
445 
446 static PyObject *
strop_rstrip(PyObject * self,PyObject * args)447 strop_rstrip(PyObject *self, PyObject *args)
448 {
449     WARN;
450     return do_strip(args, RIGHTSTRIP);
451 }
452 
453 
454 PyDoc_STRVAR(lower__doc__,
455 "lower(s) -> string\n"
456 "\n"
457 "Return a copy of the string s converted to lowercase.");
458 
459 static PyObject *
strop_lower(PyObject * self,PyObject * args)460 strop_lower(PyObject *self, PyObject *args)
461 {
462     char *s, *s_new;
463     Py_ssize_t i, n;
464     PyObject *newstr;
465     int changed;
466 
467     WARN;
468     if (PyString_AsStringAndSize(args, &s, &n))
469         return NULL;
470     newstr = PyString_FromStringAndSize(NULL, n);
471     if (newstr == NULL)
472         return NULL;
473     s_new = PyString_AsString(newstr);
474     changed = 0;
475     for (i = 0; i < n; i++) {
476         int c = Py_CHARMASK(*s++);
477         if (isupper(c)) {
478             changed = 1;
479             *s_new = tolower(c);
480         } else
481             *s_new = c;
482         s_new++;
483     }
484     if (!changed) {
485         Py_DECREF(newstr);
486         Py_INCREF(args);
487         return args;
488     }
489     return newstr;
490 }
491 
492 
493 PyDoc_STRVAR(upper__doc__,
494 "upper(s) -> string\n"
495 "\n"
496 "Return a copy of the string s converted to uppercase.");
497 
498 static PyObject *
strop_upper(PyObject * self,PyObject * args)499 strop_upper(PyObject *self, PyObject *args)
500 {
501     char *s, *s_new;
502     Py_ssize_t i, n;
503     PyObject *newstr;
504     int changed;
505 
506     WARN;
507     if (PyString_AsStringAndSize(args, &s, &n))
508         return NULL;
509     newstr = PyString_FromStringAndSize(NULL, n);
510     if (newstr == NULL)
511         return NULL;
512     s_new = PyString_AsString(newstr);
513     changed = 0;
514     for (i = 0; i < n; i++) {
515         int c = Py_CHARMASK(*s++);
516         if (islower(c)) {
517             changed = 1;
518             *s_new = toupper(c);
519         } else
520             *s_new = c;
521         s_new++;
522     }
523     if (!changed) {
524         Py_DECREF(newstr);
525         Py_INCREF(args);
526         return args;
527     }
528     return newstr;
529 }
530 
531 
532 PyDoc_STRVAR(capitalize__doc__,
533 "capitalize(s) -> string\n"
534 "\n"
535 "Return a copy of the string s with only its first character\n"
536 "capitalized.");
537 
538 static PyObject *
strop_capitalize(PyObject * self,PyObject * args)539 strop_capitalize(PyObject *self, PyObject *args)
540 {
541     char *s, *s_new;
542     Py_ssize_t i, n;
543     PyObject *newstr;
544     int changed;
545 
546     WARN;
547     if (PyString_AsStringAndSize(args, &s, &n))
548         return NULL;
549     newstr = PyString_FromStringAndSize(NULL, n);
550     if (newstr == NULL)
551         return NULL;
552     s_new = PyString_AsString(newstr);
553     changed = 0;
554     if (0 < n) {
555         int c = Py_CHARMASK(*s++);
556         if (islower(c)) {
557             changed = 1;
558             *s_new = toupper(c);
559         } else
560             *s_new = c;
561         s_new++;
562     }
563     for (i = 1; i < n; i++) {
564         int c = Py_CHARMASK(*s++);
565         if (isupper(c)) {
566             changed = 1;
567             *s_new = tolower(c);
568         } else
569             *s_new = c;
570         s_new++;
571     }
572     if (!changed) {
573         Py_DECREF(newstr);
574         Py_INCREF(args);
575         return args;
576     }
577     return newstr;
578 }
579 
580 
581 PyDoc_STRVAR(expandtabs__doc__,
582 "expandtabs(string, [tabsize]) -> string\n"
583 "\n"
584 "Expand tabs in a string, i.e. replace them by one or more spaces,\n"
585 "depending on the current column and the given tab size (default 8).\n"
586 "The column number is reset to zero after each newline occurring in the\n"
587 "string.  This doesn't understand other non-printing characters.");
588 
589 static PyObject *
strop_expandtabs(PyObject * self,PyObject * args)590 strop_expandtabs(PyObject *self, PyObject *args)
591 {
592     /* Original by Fredrik Lundh */
593     char* e;
594     char* p;
595     char* q;
596     Py_ssize_t i, j, old_j;
597     PyObject* out;
598     char* string;
599     Py_ssize_t stringlen;
600     int tabsize = 8;
601 
602     WARN;
603     /* Get arguments */
604     if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
605         return NULL;
606     if (tabsize < 1) {
607         PyErr_SetString(PyExc_ValueError,
608                         "tabsize must be at least 1");
609         return NULL;
610     }
611 
612     /* First pass: determine size of output string */
613     i = j = old_j = 0; /* j: current column; i: total of previous lines */
614     e = string + stringlen;
615     for (p = string; p < e; p++) {
616         if (*p == '\t') {
617             j += tabsize - (j%tabsize);
618             if (old_j > j) {
619                 PyErr_SetString(PyExc_OverflowError,
620                                 "new string is too long");
621                 return NULL;
622             }
623             old_j = j;
624         } else {
625             j++;
626             if (*p == '\n') {
627                 i += j;
628                 j = 0;
629             }
630         }
631     }
632 
633     if ((i + j) < 0) {
634         PyErr_SetString(PyExc_OverflowError, "new string is too long");
635         return NULL;
636     }
637 
638     /* Second pass: create output string and fill it */
639     out = PyString_FromStringAndSize(NULL, i+j);
640     if (out == NULL)
641         return NULL;
642 
643     i = 0;
644     q = PyString_AS_STRING(out);
645 
646     for (p = string; p < e; p++) {
647         if (*p == '\t') {
648             j = tabsize - (i%tabsize);
649             i += j;
650             while (j-- > 0)
651                 *q++ = ' ';
652         } else {
653             *q++ = *p;
654             i++;
655             if (*p == '\n')
656                 i = 0;
657         }
658     }
659 
660     return out;
661 }
662 
663 
664 PyDoc_STRVAR(count__doc__,
665 "count(s, sub[, start[, end]]) -> int\n"
666 "\n"
667 "Return the number of occurrences of substring sub in string\n"
668 "s[start:end].  Optional arguments start and end are\n"
669 "interpreted as in slice notation.");
670 
671 static PyObject *
strop_count(PyObject * self,PyObject * args)672 strop_count(PyObject *self, PyObject *args)
673 {
674     char *s, *sub;
675     Py_ssize_t len, n;
676     Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
677     Py_ssize_t m, r;
678 
679     WARN;
680     if (!PyArg_ParseTuple(args, "t#t#|nn:count", &s, &len, &sub, &n, &i, &last))
681         return NULL;
682     if (last > len)
683         last = len;
684     if (last < 0)
685         last += len;
686     if (last < 0)
687         last = 0;
688     if (i < 0)
689         i += len;
690     if (i < 0)
691         i = 0;
692     m = last + 1 - n;
693     if (n == 0)
694         return PyInt_FromLong((long) (m-i));
695 
696     r = 0;
697     while (i < m) {
698         if (!memcmp(s+i, sub, n)) {
699             r++;
700             i += n;
701         } else {
702             i++;
703         }
704     }
705     return PyInt_FromLong((long) r);
706 }
707 
708 
709 PyDoc_STRVAR(swapcase__doc__,
710 "swapcase(s) -> string\n"
711 "\n"
712 "Return a copy of the string s with upper case characters\n"
713 "converted to lowercase and vice versa.");
714 
715 static PyObject *
strop_swapcase(PyObject * self,PyObject * args)716 strop_swapcase(PyObject *self, PyObject *args)
717 {
718     char *s, *s_new;
719     Py_ssize_t i, n;
720     PyObject *newstr;
721     int changed;
722 
723     WARN;
724     if (PyString_AsStringAndSize(args, &s, &n))
725         return NULL;
726     newstr = PyString_FromStringAndSize(NULL, n);
727     if (newstr == NULL)
728         return NULL;
729     s_new = PyString_AsString(newstr);
730     changed = 0;
731     for (i = 0; i < n; i++) {
732         int c = Py_CHARMASK(*s++);
733         if (islower(c)) {
734             changed = 1;
735             *s_new = toupper(c);
736         }
737         else if (isupper(c)) {
738             changed = 1;
739             *s_new = tolower(c);
740         }
741         else
742             *s_new = c;
743         s_new++;
744     }
745     if (!changed) {
746         Py_DECREF(newstr);
747         Py_INCREF(args);
748         return args;
749     }
750     return newstr;
751 }
752 
753 
754 PyDoc_STRVAR(atoi__doc__,
755 "atoi(s [,base]) -> int\n"
756 "\n"
757 "Return the integer represented by the string s in the given\n"
758 "base, which defaults to 10.  The string s must consist of one\n"
759 "or more digits, possibly preceded by a sign.  If base is 0, it\n"
760 "is chosen from the leading characters of s, 0 for octal, 0x or\n"
761 "0X for hexadecimal.  If base is 16, a preceding 0x or 0X is\n"
762 "accepted.");
763 
764 static PyObject *
strop_atoi(PyObject * self,PyObject * args)765 strop_atoi(PyObject *self, PyObject *args)
766 {
767     char *s, *end;
768     int base = 10;
769     long x;
770     char buffer[256]; /* For errors */
771 
772     WARN;
773     if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
774         return NULL;
775 
776     if ((base != 0 && base < 2) || base > 36) {
777         PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
778         return NULL;
779     }
780 
781     while (*s && isspace(Py_CHARMASK(*s)))
782         s++;
783     errno = 0;
784     if (base == 0 && s[0] == '0')
785         x = (long) PyOS_strtoul(s, &end, base);
786     else
787         x = PyOS_strtol(s, &end, base);
788     if (end == s || !isalnum(Py_CHARMASK(end[-1])))
789         goto bad;
790     while (*end && isspace(Py_CHARMASK(*end)))
791         end++;
792     if (*end != '\0') {
793   bad:
794         PyOS_snprintf(buffer, sizeof(buffer),
795                       "invalid literal for atoi(): %.200s", s);
796         PyErr_SetString(PyExc_ValueError, buffer);
797         return NULL;
798     }
799     else if (errno != 0) {
800         PyOS_snprintf(buffer, sizeof(buffer),
801                       "atoi() literal too large: %.200s", s);
802         PyErr_SetString(PyExc_ValueError, buffer);
803         return NULL;
804     }
805     return PyInt_FromLong(x);
806 }
807 
808 
809 PyDoc_STRVAR(atol__doc__,
810 "atol(s [,base]) -> long\n"
811 "\n"
812 "Return the long integer represented by the string s in the\n"
813 "given base, which defaults to 10.  The string s must consist\n"
814 "of one or more digits, possibly preceded by a sign.  If base\n"
815 "is 0, it is chosen from the leading characters of s, 0 for\n"
816 "octal, 0x or 0X for hexadecimal.  If base is 16, a preceding\n"
817 "0x or 0X is accepted.  A trailing L or l is not accepted,\n"
818 "unless base is 0.");
819 
820 static PyObject *
strop_atol(PyObject * self,PyObject * args)821 strop_atol(PyObject *self, PyObject *args)
822 {
823     char *s, *end;
824     int base = 10;
825     PyObject *x;
826     char buffer[256]; /* For errors */
827 
828     WARN;
829     if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
830         return NULL;
831 
832     if ((base != 0 && base < 2) || base > 36) {
833         PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
834         return NULL;
835     }
836 
837     while (*s && isspace(Py_CHARMASK(*s)))
838         s++;
839     if (s[0] == '\0') {
840         PyErr_SetString(PyExc_ValueError, "empty string for atol()");
841         return NULL;
842     }
843     x = PyLong_FromString(s, &end, base);
844     if (x == NULL)
845         return NULL;
846     if (base == 0 && (*end == 'l' || *end == 'L'))
847         end++;
848     while (*end && isspace(Py_CHARMASK(*end)))
849         end++;
850     if (*end != '\0') {
851         PyOS_snprintf(buffer, sizeof(buffer),
852                       "invalid literal for atol(): %.200s", s);
853         PyErr_SetString(PyExc_ValueError, buffer);
854         Py_DECREF(x);
855         return NULL;
856     }
857     return x;
858 }
859 
860 
861 PyDoc_STRVAR(atof__doc__,
862 "atof(s) -> float\n"
863 "\n"
864 "Return the floating point number represented by the string s.");
865 
866 static PyObject *
strop_atof(PyObject * self,PyObject * args)867 strop_atof(PyObject *self, PyObject *args)
868 {
869     char *s, *end;
870     double x;
871     char buffer[256]; /* For errors */
872 
873     WARN;
874     if (!PyArg_ParseTuple(args, "s:atof", &s))
875         return NULL;
876     while (*s && isspace(Py_CHARMASK(*s)))
877         s++;
878     if (s[0] == '\0') {
879         PyErr_SetString(PyExc_ValueError, "empty string for atof()");
880         return NULL;
881     }
882 
883     PyFPE_START_PROTECT("strop_atof", return 0)
884     x = PyOS_string_to_double(s, &end, PyExc_OverflowError);
885     PyFPE_END_PROTECT(x)
886     if (x == -1 && PyErr_Occurred())
887         return NULL;
888     while (*end && isspace(Py_CHARMASK(*end)))
889         end++;
890     if (*end != '\0') {
891         PyOS_snprintf(buffer, sizeof(buffer),
892                       "invalid literal for atof(): %.200s", s);
893         PyErr_SetString(PyExc_ValueError, buffer);
894         return NULL;
895     }
896     return PyFloat_FromDouble(x);
897 }
898 
899 
900 PyDoc_STRVAR(maketrans__doc__,
901 "maketrans(frm, to) -> string\n"
902 "\n"
903 "Return a translation table (a string of 256 bytes long)\n"
904 "suitable for use in string.translate.  The strings frm and to\n"
905 "must be of the same length.");
906 
907 static PyObject *
strop_maketrans(PyObject * self,PyObject * args)908 strop_maketrans(PyObject *self, PyObject *args)
909 {
910     unsigned char *c, *from=NULL, *to=NULL;
911     Py_ssize_t i, fromlen=0, tolen=0;
912     PyObject *result;
913 
914     if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
915         return NULL;
916 
917     if (fromlen != tolen) {
918         PyErr_SetString(PyExc_ValueError,
919                         "maketrans arguments must have same length");
920         return NULL;
921     }
922 
923     result = PyString_FromStringAndSize((char *)NULL, 256);
924     if (result == NULL)
925         return NULL;
926     c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
927     for (i = 0; i < 256; i++)
928         c[i]=(unsigned char)i;
929     for (i = 0; i < fromlen; i++)
930         c[from[i]]=to[i];
931 
932     return result;
933 }
934 
935 
936 PyDoc_STRVAR(translate__doc__,
937 "translate(s,table [,deletechars]) -> string\n"
938 "\n"
939 "Return a copy of the string s, where all characters occurring\n"
940 "in the optional argument deletechars are removed, and the\n"
941 "remaining characters have been mapped through the given\n"
942 "translation table, which must be a string of length 256.");
943 
944 static PyObject *
strop_translate(PyObject * self,PyObject * args)945 strop_translate(PyObject *self, PyObject *args)
946 {
947     register char *input, *table, *output;
948     Py_ssize_t i;
949     int c, changed = 0;
950     PyObject *input_obj;
951     char *table1, *output_start, *del_table=NULL;
952     Py_ssize_t inlen, tablen, dellen = 0;
953     PyObject *result;
954     int trans_table[256];
955 
956     WARN;
957     if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
958                           &table1, &tablen, &del_table, &dellen))
959         return NULL;
960     if (tablen != 256) {
961         PyErr_SetString(PyExc_ValueError,
962                       "translation table must be 256 characters long");
963         return NULL;
964     }
965 
966     table = table1;
967     inlen = PyString_GET_SIZE(input_obj);
968     result = PyString_FromStringAndSize((char *)NULL, inlen);
969     if (result == NULL)
970         return NULL;
971     output_start = output = PyString_AsString(result);
972     input = PyString_AsString(input_obj);
973 
974     if (dellen == 0) {
975         /* If no deletions are required, use faster code */
976         for (i = inlen; --i >= 0; ) {
977             c = Py_CHARMASK(*input++);
978             if (Py_CHARMASK((*output++ = table[c])) != c)
979                 changed = 1;
980         }
981         if (changed)
982             return result;
983         Py_DECREF(result);
984         Py_INCREF(input_obj);
985         return input_obj;
986     }
987 
988     for (i = 0; i < 256; i++)
989         trans_table[i] = Py_CHARMASK(table[i]);
990 
991     for (i = 0; i < dellen; i++)
992         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
993 
994     for (i = inlen; --i >= 0; ) {
995         c = Py_CHARMASK(*input++);
996         if (trans_table[c] != -1)
997             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
998                 continue;
999         changed = 1;
1000     }
1001     if (!changed) {
1002         Py_DECREF(result);
1003         Py_INCREF(input_obj);
1004         return input_obj;
1005     }
1006     /* Fix the size of the resulting string */
1007     if (inlen > 0)
1008         _PyString_Resize(&result, output - output_start);
1009     return result;
1010 }
1011 
1012 
1013 /* What follows is used for implementing replace().  Perry Stoll. */
1014 
1015 /*
1016   mymemfind
1017 
1018   strstr replacement for arbitrary blocks of memory.
1019 
1020   Locates the first occurrence in the memory pointed to by MEM of the
1021   contents of memory pointed to by PAT.  Returns the index into MEM if
1022   found, or -1 if not found.  If len of PAT is greater than length of
1023   MEM, the function returns -1.
1024 */
1025 static Py_ssize_t
mymemfind(const char * mem,Py_ssize_t len,const char * pat,Py_ssize_t pat_len)1026 mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1027 {
1028     register Py_ssize_t ii;
1029 
1030     /* pattern can not occur in the last pat_len-1 chars */
1031     len -= pat_len;
1032 
1033     for (ii = 0; ii <= len; ii++) {
1034         if (mem[ii] == pat[0] &&
1035             (pat_len == 1 ||
1036              memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1037             return ii;
1038         }
1039     }
1040     return -1;
1041 }
1042 
1043 /*
1044   mymemcnt
1045 
1046    Return the number of distinct times PAT is found in MEM.
1047    meaning mem=1111 and pat==11 returns 2.
1048        mem=11111 and pat==11 also return 2.
1049  */
1050 static Py_ssize_t
mymemcnt(const char * mem,Py_ssize_t len,const char * pat,Py_ssize_t pat_len)1051 mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1052 {
1053     register Py_ssize_t offset = 0;
1054     Py_ssize_t nfound = 0;
1055 
1056     while (len >= 0) {
1057         offset = mymemfind(mem, len, pat, pat_len);
1058         if (offset == -1)
1059             break;
1060         mem += offset + pat_len;
1061         len -= offset + pat_len;
1062         nfound++;
1063     }
1064     return nfound;
1065 }
1066 
1067 /*
1068    mymemreplace
1069 
1070    Return a string in which all occurrences of PAT in memory STR are
1071    replaced with SUB.
1072 
1073    If length of PAT is less than length of STR or there are no occurrences
1074    of PAT in STR, then the original string is returned. Otherwise, a new
1075    string is allocated here and returned.
1076 
1077    on return, out_len is:
1078        the length of output string, or
1079        -1 if the input string is returned, or
1080        unchanged if an error occurs (no memory).
1081 
1082    return value is:
1083        the new string allocated locally, or
1084        NULL if an error occurred.
1085 */
1086 static char *
mymemreplace(const char * str,Py_ssize_t len,const char * pat,Py_ssize_t pat_len,const char * sub,Py_ssize_t sub_len,Py_ssize_t count,Py_ssize_t * out_len)1087 mymemreplace(const char *str, Py_ssize_t len,           /* input string */
1088          const char *pat, Py_ssize_t pat_len,           /* pattern string to find */
1089          const char *sub, Py_ssize_t sub_len,           /* substitution string */
1090          Py_ssize_t count,                              /* number of replacements */
1091          Py_ssize_t *out_len)
1092 {
1093     char *out_s;
1094     char *new_s;
1095     Py_ssize_t nfound, offset, new_len;
1096 
1097     if (len == 0 || pat_len > len)
1098         goto return_same;
1099 
1100     /* find length of output string */
1101     nfound = mymemcnt(str, len, pat, pat_len);
1102     if (count < 0)
1103         count = PY_SSIZE_T_MAX;
1104     else if (nfound > count)
1105         nfound = count;
1106     if (nfound == 0)
1107         goto return_same;
1108 
1109     new_len = len + nfound*(sub_len - pat_len);
1110     if (new_len == 0) {
1111         /* Have to allocate something for the caller to free(). */
1112         out_s = (char *)PyMem_MALLOC(1);
1113         if (out_s == NULL)
1114             return NULL;
1115         out_s[0] = '\0';
1116     }
1117     else {
1118         assert(new_len > 0);
1119         new_s = (char *)PyMem_MALLOC(new_len);
1120         if (new_s == NULL)
1121             return NULL;
1122         out_s = new_s;
1123 
1124         for (; count > 0 && len > 0; --count) {
1125             /* find index of next instance of pattern */
1126             offset = mymemfind(str, len, pat, pat_len);
1127             if (offset == -1)
1128                 break;
1129 
1130             /* copy non matching part of input string */
1131             memcpy(new_s, str, offset);
1132             str += offset + pat_len;
1133             len -= offset + pat_len;
1134 
1135             /* copy substitute into the output string */
1136             new_s += offset;
1137             memcpy(new_s, sub, sub_len);
1138             new_s += sub_len;
1139         }
1140         /* copy any remaining values into output string */
1141         if (len > 0)
1142             memcpy(new_s, str, len);
1143     }
1144     *out_len = new_len;
1145     return out_s;
1146 
1147   return_same:
1148     *out_len = -1;
1149     return (char *)str; /* cast away const */
1150 }
1151 
1152 
1153 PyDoc_STRVAR(replace__doc__,
1154 "replace (str, old, new[, maxsplit]) -> string\n"
1155 "\n"
1156 "Return a copy of string str with all occurrences of substring\n"
1157 "old replaced by new. If the optional argument maxsplit is\n"
1158 "given, only the first maxsplit occurrences are replaced.");
1159 
1160 static PyObject *
strop_replace(PyObject * self,PyObject * args)1161 strop_replace(PyObject *self, PyObject *args)
1162 {
1163     char *str, *pat,*sub,*new_s;
1164     Py_ssize_t len,pat_len,sub_len,out_len;
1165     Py_ssize_t count = -1;
1166     PyObject *newstr;
1167 
1168     WARN;
1169     if (!PyArg_ParseTuple(args, "t#t#t#|n:replace",
1170                           &str, &len, &pat, &pat_len, &sub, &sub_len,
1171                           &count))
1172         return NULL;
1173     if (pat_len <= 0) {
1174         PyErr_SetString(PyExc_ValueError, "empty pattern string");
1175         return NULL;
1176     }
1177     /* CAUTION:  strop treats a replace count of 0 as infinity, unlke
1178      * current (2.1) string.py and string methods.  Preserve this for
1179      * ... well, hard to say for what <wink>.
1180      */
1181     if (count == 0)
1182         count = -1;
1183     new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1184     if (new_s == NULL) {
1185         PyErr_NoMemory();
1186         return NULL;
1187     }
1188     if (out_len == -1) {
1189         /* we're returning another reference to the input string */
1190         newstr = PyTuple_GetItem(args, 0);
1191         Py_XINCREF(newstr);
1192     }
1193     else {
1194         newstr = PyString_FromStringAndSize(new_s, out_len);
1195         PyMem_FREE(new_s);
1196     }
1197     return newstr;
1198 }
1199 
1200 
1201 /* List of functions defined in the module */
1202 
1203 static PyMethodDef
1204 strop_methods[] = {
1205     {"atof",            strop_atof,        METH_VARARGS, atof__doc__},
1206     {"atoi",            strop_atoi,        METH_VARARGS, atoi__doc__},
1207     {"atol",            strop_atol,        METH_VARARGS, atol__doc__},
1208     {"capitalize",      strop_capitalize,  METH_O,       capitalize__doc__},
1209     {"count",           strop_count,       METH_VARARGS, count__doc__},
1210     {"expandtabs",      strop_expandtabs,  METH_VARARGS, expandtabs__doc__},
1211     {"find",            strop_find,        METH_VARARGS, find__doc__},
1212     {"join",            strop_joinfields,  METH_VARARGS, joinfields__doc__},
1213     {"joinfields",      strop_joinfields,  METH_VARARGS, joinfields__doc__},
1214     {"lstrip",          strop_lstrip,      METH_O,       lstrip__doc__},
1215     {"lower",           strop_lower,       METH_O,       lower__doc__},
1216     {"maketrans",       strop_maketrans,   METH_VARARGS, maketrans__doc__},
1217     {"replace",         strop_replace,     METH_VARARGS, replace__doc__},
1218     {"rfind",           strop_rfind,       METH_VARARGS, rfind__doc__},
1219     {"rstrip",          strop_rstrip,      METH_O,       rstrip__doc__},
1220     {"split",           strop_splitfields, METH_VARARGS, splitfields__doc__},
1221     {"splitfields",     strop_splitfields, METH_VARARGS, splitfields__doc__},
1222     {"strip",           strop_strip,       METH_O,       strip__doc__},
1223     {"swapcase",        strop_swapcase,    METH_O,       swapcase__doc__},
1224     {"translate",       strop_translate,   METH_VARARGS, translate__doc__},
1225     {"upper",           strop_upper,       METH_O,       upper__doc__},
1226     {NULL,              NULL}   /* sentinel */
1227 };
1228 
1229 
1230 PyMODINIT_FUNC
initstrop(void)1231 initstrop(void)
1232 {
1233     PyObject *m, *s;
1234     char buf[256];
1235     int c, n;
1236     m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1237                        (PyObject*)NULL, PYTHON_API_VERSION);
1238     if (m == NULL)
1239         return;
1240 
1241     /* Create 'whitespace' object */
1242     n = 0;
1243     for (c = 0; c < 256; c++) {
1244         if (isspace(c))
1245             buf[n++] = c;
1246     }
1247     s = PyString_FromStringAndSize(buf, n);
1248     if (s)
1249         PyModule_AddObject(m, "whitespace", s);
1250 
1251     /* Create 'lowercase' object */
1252     n = 0;
1253     for (c = 0; c < 256; c++) {
1254         if (islower(c))
1255             buf[n++] = c;
1256     }
1257     s = PyString_FromStringAndSize(buf, n);
1258     if (s)
1259         PyModule_AddObject(m, "lowercase", s);
1260 
1261     /* Create 'uppercase' object */
1262     n = 0;
1263     for (c = 0; c < 256; c++) {
1264         if (isupper(c))
1265             buf[n++] = c;
1266     }
1267     s = PyString_FromStringAndSize(buf, n);
1268     if (s)
1269         PyModule_AddObject(m, "uppercase", s);
1270 }
1271