1 #define PY_SSIZE_T_CLEAN
2 #include "Python.h"
3 #include "bytes_methods.h"
4 
5 PyDoc_STRVAR_shared(_Py_isspace__doc__,
6 "B.isspace() -> bool\n\
7 \n\
8 Return True if all characters in B are whitespace\n\
9 and there is at least one character in B, False otherwise.");
10 
11 PyObject*
_Py_bytes_isspace(const char * cptr,Py_ssize_t len)12 _Py_bytes_isspace(const char *cptr, Py_ssize_t len)
13 {
14     const unsigned char *p
15         = (unsigned char *) cptr;
16     const unsigned char *e;
17 
18     /* Shortcut for single character strings */
19     if (len == 1 && Py_ISSPACE(*p))
20         Py_RETURN_TRUE;
21 
22     /* Special case for empty strings */
23     if (len == 0)
24         Py_RETURN_FALSE;
25 
26     e = p + len;
27     for (; p < e; p++) {
28         if (!Py_ISSPACE(*p))
29             Py_RETURN_FALSE;
30     }
31     Py_RETURN_TRUE;
32 }
33 
34 
35 PyDoc_STRVAR_shared(_Py_isalpha__doc__,
36 "B.isalpha() -> bool\n\
37 \n\
38 Return True if all characters in B are alphabetic\n\
39 and there is at least one character in B, False otherwise.");
40 
41 PyObject*
_Py_bytes_isalpha(const char * cptr,Py_ssize_t len)42 _Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
43 {
44     const unsigned char *p
45         = (unsigned char *) cptr;
46     const unsigned char *e;
47 
48     /* Shortcut for single character strings */
49     if (len == 1 && Py_ISALPHA(*p))
50         Py_RETURN_TRUE;
51 
52     /* Special case for empty strings */
53     if (len == 0)
54         Py_RETURN_FALSE;
55 
56     e = p + len;
57     for (; p < e; p++) {
58         if (!Py_ISALPHA(*p))
59             Py_RETURN_FALSE;
60     }
61     Py_RETURN_TRUE;
62 }
63 
64 
65 PyDoc_STRVAR_shared(_Py_isalnum__doc__,
66 "B.isalnum() -> bool\n\
67 \n\
68 Return True if all characters in B are alphanumeric\n\
69 and there is at least one character in B, False otherwise.");
70 
71 PyObject*
_Py_bytes_isalnum(const char * cptr,Py_ssize_t len)72 _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
73 {
74     const unsigned char *p
75         = (unsigned char *) cptr;
76     const unsigned char *e;
77 
78     /* Shortcut for single character strings */
79     if (len == 1 && Py_ISALNUM(*p))
80         Py_RETURN_TRUE;
81 
82     /* Special case for empty strings */
83     if (len == 0)
84         Py_RETURN_FALSE;
85 
86     e = p + len;
87     for (; p < e; p++) {
88         if (!Py_ISALNUM(*p))
89             Py_RETURN_FALSE;
90     }
91     Py_RETURN_TRUE;
92 }
93 
94 
95 PyDoc_STRVAR_shared(_Py_isascii__doc__,
96 "B.isascii() -> bool\n\
97 \n\
98 Return True if B is empty or all characters in B are ASCII,\n\
99 False otherwise.");
100 
101 // Optimization is copied from ascii_decode in unicodeobject.c
102 /* Mask to quickly check whether a C 'long' contains a
103    non-ASCII, UTF8-encoded char. */
104 #if (SIZEOF_LONG == 8)
105 # define ASCII_CHAR_MASK 0x8080808080808080UL
106 #elif (SIZEOF_LONG == 4)
107 # define ASCII_CHAR_MASK 0x80808080UL
108 #else
109 # error C 'long' size should be either 4 or 8!
110 #endif
111 
112 PyObject*
_Py_bytes_isascii(const char * cptr,Py_ssize_t len)113 _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
114 {
115     const char *p = cptr;
116     const char *end = p + len;
117     const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
118 
119     while (p < end) {
120         /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
121            for an explanation. */
122         if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
123             /* Help allocation */
124             const char *_p = p;
125             while (_p < aligned_end) {
126                 unsigned long value = *(unsigned long *) _p;
127                 if (value & ASCII_CHAR_MASK) {
128                     Py_RETURN_FALSE;
129                 }
130                 _p += SIZEOF_LONG;
131             }
132             p = _p;
133             if (_p == end)
134                 break;
135         }
136         if ((unsigned char)*p & 0x80) {
137             Py_RETURN_FALSE;
138         }
139         p++;
140     }
141     Py_RETURN_TRUE;
142 }
143 
144 #undef ASCII_CHAR_MASK
145 
146 
147 PyDoc_STRVAR_shared(_Py_isdigit__doc__,
148 "B.isdigit() -> bool\n\
149 \n\
150 Return True if all characters in B are digits\n\
151 and there is at least one character in B, False otherwise.");
152 
153 PyObject*
_Py_bytes_isdigit(const char * cptr,Py_ssize_t len)154 _Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
155 {
156     const unsigned char *p
157         = (unsigned char *) cptr;
158     const unsigned char *e;
159 
160     /* Shortcut for single character strings */
161     if (len == 1 && Py_ISDIGIT(*p))
162         Py_RETURN_TRUE;
163 
164     /* Special case for empty strings */
165     if (len == 0)
166         Py_RETURN_FALSE;
167 
168     e = p + len;
169     for (; p < e; p++) {
170         if (!Py_ISDIGIT(*p))
171             Py_RETURN_FALSE;
172     }
173     Py_RETURN_TRUE;
174 }
175 
176 
177 PyDoc_STRVAR_shared(_Py_islower__doc__,
178 "B.islower() -> bool\n\
179 \n\
180 Return True if all cased characters in B are lowercase and there is\n\
181 at least one cased character in B, False otherwise.");
182 
183 PyObject*
_Py_bytes_islower(const char * cptr,Py_ssize_t len)184 _Py_bytes_islower(const char *cptr, Py_ssize_t len)
185 {
186     const unsigned char *p
187         = (unsigned char *) cptr;
188     const unsigned char *e;
189     int cased;
190 
191     /* Shortcut for single character strings */
192     if (len == 1)
193         return PyBool_FromLong(Py_ISLOWER(*p));
194 
195     /* Special case for empty strings */
196     if (len == 0)
197         Py_RETURN_FALSE;
198 
199     e = p + len;
200     cased = 0;
201     for (; p < e; p++) {
202         if (Py_ISUPPER(*p))
203             Py_RETURN_FALSE;
204         else if (!cased && Py_ISLOWER(*p))
205             cased = 1;
206     }
207     return PyBool_FromLong(cased);
208 }
209 
210 
211 PyDoc_STRVAR_shared(_Py_isupper__doc__,
212 "B.isupper() -> bool\n\
213 \n\
214 Return True if all cased characters in B are uppercase and there is\n\
215 at least one cased character in B, False otherwise.");
216 
217 PyObject*
_Py_bytes_isupper(const char * cptr,Py_ssize_t len)218 _Py_bytes_isupper(const char *cptr, Py_ssize_t len)
219 {
220     const unsigned char *p
221         = (unsigned char *) cptr;
222     const unsigned char *e;
223     int cased;
224 
225     /* Shortcut for single character strings */
226     if (len == 1)
227         return PyBool_FromLong(Py_ISUPPER(*p));
228 
229     /* Special case for empty strings */
230     if (len == 0)
231         Py_RETURN_FALSE;
232 
233     e = p + len;
234     cased = 0;
235     for (; p < e; p++) {
236         if (Py_ISLOWER(*p))
237             Py_RETURN_FALSE;
238         else if (!cased && Py_ISUPPER(*p))
239             cased = 1;
240     }
241     return PyBool_FromLong(cased);
242 }
243 
244 
245 PyDoc_STRVAR_shared(_Py_istitle__doc__,
246 "B.istitle() -> bool\n\
247 \n\
248 Return True if B is a titlecased string and there is at least one\n\
249 character in B, i.e. uppercase characters may only follow uncased\n\
250 characters and lowercase characters only cased ones. Return False\n\
251 otherwise.");
252 
253 PyObject*
_Py_bytes_istitle(const char * cptr,Py_ssize_t len)254 _Py_bytes_istitle(const char *cptr, Py_ssize_t len)
255 {
256     const unsigned char *p
257         = (unsigned char *) cptr;
258     const unsigned char *e;
259     int cased, previous_is_cased;
260 
261     /* Shortcut for single character strings */
262     if (len == 1)
263         return PyBool_FromLong(Py_ISUPPER(*p));
264 
265     /* Special case for empty strings */
266     if (len == 0)
267         Py_RETURN_FALSE;
268 
269     e = p + len;
270     cased = 0;
271     previous_is_cased = 0;
272     for (; p < e; p++) {
273         const unsigned char ch = *p;
274 
275         if (Py_ISUPPER(ch)) {
276             if (previous_is_cased)
277                 Py_RETURN_FALSE;
278             previous_is_cased = 1;
279             cased = 1;
280         }
281         else if (Py_ISLOWER(ch)) {
282             if (!previous_is_cased)
283                 Py_RETURN_FALSE;
284             previous_is_cased = 1;
285             cased = 1;
286         }
287         else
288             previous_is_cased = 0;
289     }
290     return PyBool_FromLong(cased);
291 }
292 
293 
294 PyDoc_STRVAR_shared(_Py_lower__doc__,
295 "B.lower() -> copy of B\n\
296 \n\
297 Return a copy of B with all ASCII characters converted to lowercase.");
298 
299 void
_Py_bytes_lower(char * result,const char * cptr,Py_ssize_t len)300 _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
301 {
302     Py_ssize_t i;
303 
304     for (i = 0; i < len; i++) {
305         result[i] = Py_TOLOWER((unsigned char) cptr[i]);
306     }
307 }
308 
309 
310 PyDoc_STRVAR_shared(_Py_upper__doc__,
311 "B.upper() -> copy of B\n\
312 \n\
313 Return a copy of B with all ASCII characters converted to uppercase.");
314 
315 void
_Py_bytes_upper(char * result,const char * cptr,Py_ssize_t len)316 _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
317 {
318     Py_ssize_t i;
319 
320     for (i = 0; i < len; i++) {
321         result[i] = Py_TOUPPER((unsigned char) cptr[i]);
322     }
323 }
324 
325 
326 PyDoc_STRVAR_shared(_Py_title__doc__,
327 "B.title() -> copy of B\n\
328 \n\
329 Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
330 characters, all remaining cased characters have lowercase.");
331 
332 void
_Py_bytes_title(char * result,const char * s,Py_ssize_t len)333 _Py_bytes_title(char *result, const char *s, Py_ssize_t len)
334 {
335     Py_ssize_t i;
336     int previous_is_cased = 0;
337 
338     for (i = 0; i < len; i++) {
339         int c = Py_CHARMASK(*s++);
340         if (Py_ISLOWER(c)) {
341             if (!previous_is_cased)
342                 c = Py_TOUPPER(c);
343             previous_is_cased = 1;
344         } else if (Py_ISUPPER(c)) {
345             if (previous_is_cased)
346                 c = Py_TOLOWER(c);
347             previous_is_cased = 1;
348         } else
349             previous_is_cased = 0;
350         *result++ = c;
351     }
352 }
353 
354 
355 PyDoc_STRVAR_shared(_Py_capitalize__doc__,
356 "B.capitalize() -> copy of B\n\
357 \n\
358 Return a copy of B with only its first character capitalized (ASCII)\n\
359 and the rest lower-cased.");
360 
361 void
_Py_bytes_capitalize(char * result,const char * s,Py_ssize_t len)362 _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
363 {
364     Py_ssize_t i;
365 
366     if (0 < len) {
367         int c = Py_CHARMASK(*s++);
368         if (Py_ISLOWER(c))
369             *result = Py_TOUPPER(c);
370         else
371             *result = c;
372         result++;
373     }
374     for (i = 1; i < len; i++) {
375         int c = Py_CHARMASK(*s++);
376         if (Py_ISUPPER(c))
377             *result = Py_TOLOWER(c);
378         else
379             *result = c;
380         result++;
381     }
382 }
383 
384 
385 PyDoc_STRVAR_shared(_Py_swapcase__doc__,
386 "B.swapcase() -> copy of B\n\
387 \n\
388 Return a copy of B with uppercase ASCII characters converted\n\
389 to lowercase ASCII and vice versa.");
390 
391 void
_Py_bytes_swapcase(char * result,const char * s,Py_ssize_t len)392 _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
393 {
394     Py_ssize_t i;
395 
396     for (i = 0; i < len; i++) {
397         int c = Py_CHARMASK(*s++);
398         if (Py_ISLOWER(c)) {
399             *result = Py_TOUPPER(c);
400         }
401         else if (Py_ISUPPER(c)) {
402             *result = Py_TOLOWER(c);
403         }
404         else
405             *result = c;
406         result++;
407     }
408 }
409 
410 
411 PyDoc_STRVAR_shared(_Py_maketrans__doc__,
412 "B.maketrans(frm, to) -> translation table\n\
413 \n\
414 Return a translation table (a bytes object of length 256) suitable\n\
415 for use in the bytes or bytearray translate method where each byte\n\
416 in frm is mapped to the byte at the same position in to.\n\
417 The bytes objects frm and to must be of the same length.");
418 
419 PyObject *
_Py_bytes_maketrans(Py_buffer * frm,Py_buffer * to)420 _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
421 {
422     PyObject *res = NULL;
423     Py_ssize_t i;
424     char *p;
425 
426     if (frm->len != to->len) {
427         PyErr_Format(PyExc_ValueError,
428                      "maketrans arguments must have same length");
429         return NULL;
430     }
431     res = PyBytes_FromStringAndSize(NULL, 256);
432     if (!res)
433         return NULL;
434     p = PyBytes_AS_STRING(res);
435     for (i = 0; i < 256; i++)
436         p[i] = (char) i;
437     for (i = 0; i < frm->len; i++) {
438         p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
439     }
440 
441     return res;
442 }
443 
444 #define FASTSEARCH fastsearch
445 #define STRINGLIB(F) stringlib_##F
446 #define STRINGLIB_CHAR char
447 #define STRINGLIB_SIZEOF_CHAR 1
448 
449 #include "stringlib/fastsearch.h"
450 #include "stringlib/count.h"
451 #include "stringlib/find.h"
452 
453 /*
454 Wraps stringlib_parse_args_finds() and additionally checks the first
455 argument type.
456 
457 In case the first argument is a bytes-like object, sets it to subobj,
458 and doesn't touch the byte parameter.
459 In case it is an integer in range(0, 256), writes the integer value
460 to byte, and sets subobj to NULL.
461 
462 The other parameters are similar to those of
463 stringlib_parse_args_finds().
464 */
465 
466 Py_LOCAL_INLINE(int)
parse_args_finds_byte(const char * function_name,PyObject * args,PyObject ** subobj,char * byte,Py_ssize_t * start,Py_ssize_t * end)467 parse_args_finds_byte(const char *function_name, PyObject *args,
468                       PyObject **subobj, char *byte,
469                       Py_ssize_t *start, Py_ssize_t *end)
470 {
471     PyObject *tmp_subobj;
472     Py_ssize_t ival;
473 
474     if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
475                                    start, end))
476         return 0;
477 
478     if (PyObject_CheckBuffer(tmp_subobj)) {
479         *subobj = tmp_subobj;
480         return 1;
481     }
482 
483     if (!PyIndex_Check(tmp_subobj)) {
484         PyErr_Format(PyExc_TypeError,
485                      "argument should be integer or bytes-like object, "
486                      "not '%.200s'",
487                      Py_TYPE(tmp_subobj)->tp_name);
488         return 0;
489     }
490 
491     ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
492     if (ival == -1 && PyErr_Occurred()) {
493         return 0;
494     }
495     if (ival < 0 || ival > 255) {
496         PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
497         return 0;
498     }
499 
500     *subobj = NULL;
501     *byte = (char)ival;
502     return 1;
503 }
504 
505 /* helper macro to fixup start/end slice values */
506 #define ADJUST_INDICES(start, end, len)         \
507     if (end > len)                          \
508         end = len;                          \
509     else if (end < 0) {                     \
510         end += len;                         \
511         if (end < 0)                        \
512         end = 0;                        \
513     }                                       \
514     if (start < 0) {                        \
515         start += len;                       \
516         if (start < 0)                      \
517         start = 0;                      \
518     }
519 
520 Py_LOCAL_INLINE(Py_ssize_t)
find_internal(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int dir)521 find_internal(const char *str, Py_ssize_t len,
522               const char *function_name, PyObject *args, int dir)
523 {
524     PyObject *subobj;
525     char byte;
526     Py_buffer subbuf;
527     const char *sub;
528     Py_ssize_t sub_len;
529     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
530     Py_ssize_t res;
531 
532     if (!parse_args_finds_byte(function_name, args,
533                                &subobj, &byte, &start, &end))
534         return -2;
535 
536     if (subobj) {
537         if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
538             return -2;
539 
540         sub = subbuf.buf;
541         sub_len = subbuf.len;
542     }
543     else {
544         sub = &byte;
545         sub_len = 1;
546     }
547 
548     ADJUST_INDICES(start, end, len);
549     if (end - start < sub_len)
550         res = -1;
551     else if (sub_len == 1) {
552         if (dir > 0)
553             res = stringlib_find_char(
554                 str + start, end - start,
555                 *sub);
556         else
557             res = stringlib_rfind_char(
558                 str + start, end - start,
559                 *sub);
560         if (res >= 0)
561             res += start;
562     }
563     else {
564         if (dir > 0)
565             res = stringlib_find_slice(
566                 str, len,
567                 sub, sub_len, start, end);
568         else
569             res = stringlib_rfind_slice(
570                 str, len,
571                 sub, sub_len, start, end);
572     }
573 
574     if (subobj)
575         PyBuffer_Release(&subbuf);
576 
577     return res;
578 }
579 
580 PyDoc_STRVAR_shared(_Py_find__doc__,
581 "B.find(sub[, start[, end]]) -> int\n\
582 \n\
583 Return the lowest index in B where subsection sub is found,\n\
584 such that sub is contained within B[start,end].  Optional\n\
585 arguments start and end are interpreted as in slice notation.\n\
586 \n\
587 Return -1 on failure.");
588 
589 PyObject *
_Py_bytes_find(const char * str,Py_ssize_t len,PyObject * args)590 _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
591 {
592     Py_ssize_t result = find_internal(str, len, "find", args, +1);
593     if (result == -2)
594         return NULL;
595     return PyLong_FromSsize_t(result);
596 }
597 
598 PyDoc_STRVAR_shared(_Py_index__doc__,
599 "B.index(sub[, start[, end]]) -> int\n\
600 \n\
601 Return the lowest index in B where subsection sub is found,\n\
602 such that sub is contained within B[start,end].  Optional\n\
603 arguments start and end are interpreted as in slice notation.\n\
604 \n\
605 Raises ValueError when the subsection is not found.");
606 
607 PyObject *
_Py_bytes_index(const char * str,Py_ssize_t len,PyObject * args)608 _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
609 {
610     Py_ssize_t result = find_internal(str, len, "index", args, +1);
611     if (result == -2)
612         return NULL;
613     if (result == -1) {
614         PyErr_SetString(PyExc_ValueError,
615                         "subsection not found");
616         return NULL;
617     }
618     return PyLong_FromSsize_t(result);
619 }
620 
621 PyDoc_STRVAR_shared(_Py_rfind__doc__,
622 "B.rfind(sub[, start[, end]]) -> int\n\
623 \n\
624 Return the highest index in B where subsection sub is found,\n\
625 such that sub is contained within B[start,end].  Optional\n\
626 arguments start and end are interpreted as in slice notation.\n\
627 \n\
628 Return -1 on failure.");
629 
630 PyObject *
_Py_bytes_rfind(const char * str,Py_ssize_t len,PyObject * args)631 _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
632 {
633     Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
634     if (result == -2)
635         return NULL;
636     return PyLong_FromSsize_t(result);
637 }
638 
639 PyDoc_STRVAR_shared(_Py_rindex__doc__,
640 "B.rindex(sub[, start[, end]]) -> int\n\
641 \n\
642 Return the highest index in B where subsection sub is found,\n\
643 such that sub is contained within B[start,end].  Optional\n\
644 arguments start and end are interpreted as in slice notation.\n\
645 \n\
646 Raise ValueError when the subsection is not found.");
647 
648 PyObject *
_Py_bytes_rindex(const char * str,Py_ssize_t len,PyObject * args)649 _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
650 {
651     Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
652     if (result == -2)
653         return NULL;
654     if (result == -1) {
655         PyErr_SetString(PyExc_ValueError,
656                         "subsection not found");
657         return NULL;
658     }
659     return PyLong_FromSsize_t(result);
660 }
661 
662 PyDoc_STRVAR_shared(_Py_count__doc__,
663 "B.count(sub[, start[, end]]) -> int\n\
664 \n\
665 Return the number of non-overlapping occurrences of subsection sub in\n\
666 bytes B[start:end].  Optional arguments start and end are interpreted\n\
667 as in slice notation.");
668 
669 PyObject *
_Py_bytes_count(const char * str,Py_ssize_t len,PyObject * args)670 _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
671 {
672     PyObject *sub_obj;
673     const char *sub;
674     Py_ssize_t sub_len;
675     char byte;
676     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
677 
678     Py_buffer vsub;
679     PyObject *count_obj;
680 
681     if (!parse_args_finds_byte("count", args,
682                                &sub_obj, &byte, &start, &end))
683         return NULL;
684 
685     if (sub_obj) {
686         if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
687             return NULL;
688 
689         sub = vsub.buf;
690         sub_len = vsub.len;
691     }
692     else {
693         sub = &byte;
694         sub_len = 1;
695     }
696 
697     ADJUST_INDICES(start, end, len);
698 
699     count_obj = PyLong_FromSsize_t(
700         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
701         );
702 
703     if (sub_obj)
704         PyBuffer_Release(&vsub);
705 
706     return count_obj;
707 }
708 
709 int
_Py_bytes_contains(const char * str,Py_ssize_t len,PyObject * arg)710 _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
711 {
712     Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
713     if (ival == -1 && PyErr_Occurred()) {
714         Py_buffer varg;
715         Py_ssize_t pos;
716         PyErr_Clear();
717         if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
718             return -1;
719         pos = stringlib_find(str, len,
720                              varg.buf, varg.len, 0);
721         PyBuffer_Release(&varg);
722         return pos >= 0;
723     }
724     if (ival < 0 || ival >= 256) {
725         PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
726         return -1;
727     }
728 
729     return memchr(str, (int) ival, len) != NULL;
730 }
731 
732 
733 /* Matches the end (direction >= 0) or start (direction < 0) of the buffer
734  * against substr, using the start and end arguments. Returns
735  * -1 on error, 0 if not found and 1 if found.
736  */
737 static int
tailmatch(const char * str,Py_ssize_t len,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)738 tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
739           Py_ssize_t start, Py_ssize_t end, int direction)
740 {
741     Py_buffer sub_view = {NULL, NULL};
742     const char *sub;
743     Py_ssize_t slen;
744 
745     if (PyBytes_Check(substr)) {
746         sub = PyBytes_AS_STRING(substr);
747         slen = PyBytes_GET_SIZE(substr);
748     }
749     else {
750         if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
751             return -1;
752         sub = sub_view.buf;
753         slen = sub_view.len;
754     }
755 
756     ADJUST_INDICES(start, end, len);
757 
758     if (direction < 0) {
759         /* startswith */
760         if (start + slen > len)
761             goto notfound;
762     } else {
763         /* endswith */
764         if (end - start < slen || start > len)
765             goto notfound;
766 
767         if (end - slen > start)
768             start = end - slen;
769     }
770     if (end - start < slen)
771         goto notfound;
772     if (memcmp(str + start, sub, slen) != 0)
773         goto notfound;
774 
775     PyBuffer_Release(&sub_view);
776     return 1;
777 
778 notfound:
779     PyBuffer_Release(&sub_view);
780     return 0;
781 }
782 
783 static PyObject *
_Py_bytes_tailmatch(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int direction)784 _Py_bytes_tailmatch(const char *str, Py_ssize_t len,
785                     const char *function_name, PyObject *args,
786                     int direction)
787 {
788     Py_ssize_t start = 0;
789     Py_ssize_t end = PY_SSIZE_T_MAX;
790     PyObject *subobj;
791     int result;
792 
793     if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
794         return NULL;
795     if (PyTuple_Check(subobj)) {
796         Py_ssize_t i;
797         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
798             result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
799                                start, end, direction);
800             if (result == -1)
801                 return NULL;
802             else if (result) {
803                 Py_RETURN_TRUE;
804             }
805         }
806         Py_RETURN_FALSE;
807     }
808     result = tailmatch(str, len, subobj, start, end, direction);
809     if (result == -1) {
810         if (PyErr_ExceptionMatches(PyExc_TypeError))
811             PyErr_Format(PyExc_TypeError,
812                          "%s first arg must be bytes or a tuple of bytes, "
813                          "not %s",
814                          function_name, Py_TYPE(subobj)->tp_name);
815         return NULL;
816     }
817     else
818         return PyBool_FromLong(result);
819 }
820 
821 PyDoc_STRVAR_shared(_Py_startswith__doc__,
822 "B.startswith(prefix[, start[, end]]) -> bool\n\
823 \n\
824 Return True if B starts with the specified prefix, False otherwise.\n\
825 With optional start, test B beginning at that position.\n\
826 With optional end, stop comparing B at that position.\n\
827 prefix can also be a tuple of bytes to try.");
828 
829 PyObject *
_Py_bytes_startswith(const char * str,Py_ssize_t len,PyObject * args)830 _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
831 {
832     return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
833 }
834 
835 PyDoc_STRVAR_shared(_Py_endswith__doc__,
836 "B.endswith(suffix[, start[, end]]) -> bool\n\
837 \n\
838 Return True if B ends with the specified suffix, False otherwise.\n\
839 With optional start, test B beginning at that position.\n\
840 With optional end, stop comparing B at that position.\n\
841 suffix can also be a tuple of bytes to try.");
842 
843 PyObject *
_Py_bytes_endswith(const char * str,Py_ssize_t len,PyObject * args)844 _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
845 {
846     return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
847 }
848 
849 PyDoc_STRVAR_shared(_Py_expandtabs__doc__,
850 "B.expandtabs(tabsize=8) -> copy of B\n\
851 \n\
852 Return a copy of B where all tab characters are expanded using spaces.\n\
853 If tabsize is not given, a tab size of 8 characters is assumed.");
854 
855 PyDoc_STRVAR_shared(_Py_ljust__doc__,
856 "B.ljust(width[, fillchar]) -> copy of B\n"
857 "\n"
858 "Return B left justified in a string of length width. Padding is\n"
859 "done using the specified fill character (default is a space).");
860 
861 PyDoc_STRVAR_shared(_Py_rjust__doc__,
862 "B.rjust(width[, fillchar]) -> copy of B\n"
863 "\n"
864 "Return B right justified in a string of length width. Padding is\n"
865 "done using the specified fill character (default is a space)");
866 
867 PyDoc_STRVAR_shared(_Py_center__doc__,
868 "B.center(width[, fillchar]) -> copy of B\n"
869 "\n"
870 "Return B centered in a string of length width.  Padding is\n"
871 "done using the specified fill character (default is a space).");
872 
873 PyDoc_STRVAR_shared(_Py_zfill__doc__,
874 "B.zfill(width) -> copy of B\n"
875 "\n"
876 "Pad a numeric string B with zeros on the left, to fill a field\n"
877 "of the specified width.  B is never truncated.");
878