1 /*
2  * Secret Labs' Regular Expression Engine
3  *
4  * regular expression matching engine
5  *
6  * partial history:
7  * 1999-10-24 fl   created (based on existing template matcher code)
8  * 2000-03-06 fl   first alpha, sort of
9  * 2000-08-01 fl   fixes for 1.6b1
10  * 2000-08-07 fl   use PyOS_CheckStack() if available
11  * 2000-09-20 fl   added expand method
12  * 2001-03-20 fl   lots of fixes for 2.1b2
13  * 2001-04-15 fl   export copyright as Python attribute, not global
14  * 2001-04-28 fl   added __copy__ methods (work in progress)
15  * 2001-05-14 fl   fixes for 1.5.2 compatibility
16  * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
17  * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
18  * 2001-10-20 fl   added split primitive; reenable unicode for 1.6/2.0/2.1
19  * 2001-10-21 fl   added sub/subn primitive
20  * 2001-10-24 fl   added finditer primitive (for 2.2 only)
21  * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
22  * 2002-11-09 fl   fixed empty sub/subn return type
23  * 2003-04-18 mvl  fully support 4-byte codes
24  * 2003-10-17 gn   implemented non recursive scheme
25  * 2013-02-04 mrab added fullmatch primitive
26  *
27  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
28  *
29  * This version of the SRE library can be redistributed under CNRI's
30  * Python 1.6 license.  For any other use, please contact Secret Labs
31  * AB (info@pythonware.com).
32  *
33  * Portions of this engine have been developed in cooperation with
34  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
35  * other compatibility work.
36  */
37 
38 static const char copyright[] =
39     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40 
41 #define PY_SSIZE_T_CLEAN
42 
43 #include "Python.h"
44 #include "structmember.h" /* offsetof */
45 
46 #include "sre.h"
47 
48 #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49 
50 #include <ctype.h>
51 
52 /* name of this module, minus the leading underscore */
53 #if !defined(SRE_MODULE)
54 #define SRE_MODULE "sre"
55 #endif
56 
57 #define SRE_PY_MODULE "re"
58 
59 /* defining this one enables tracing */
60 #undef VERBOSE
61 
62 /* -------------------------------------------------------------------- */
63 
64 #if defined(_MSC_VER)
65 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
66 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
67 /* fastest possible local call under MSVC */
68 #define LOCAL(type) static __inline type __fastcall
69 #else
70 #define LOCAL(type) static inline type
71 #endif
72 
73 /* error codes */
74 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
75 #define SRE_ERROR_STATE -2 /* illegal state */
76 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
77 #define SRE_ERROR_MEMORY -9 /* out of memory */
78 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
79 
80 #if defined(VERBOSE)
81 #define TRACE(v) printf v
82 #else
83 #define TRACE(v)
84 #endif
85 
86 /* -------------------------------------------------------------------- */
87 /* search engine state */
88 
89 #define SRE_IS_DIGIT(ch)\
90     ((ch) < 128 && Py_ISDIGIT(ch))
91 #define SRE_IS_SPACE(ch)\
92     ((ch) < 128 && Py_ISSPACE(ch))
93 #define SRE_IS_LINEBREAK(ch)\
94     ((ch) == '\n')
95 #define SRE_IS_ALNUM(ch)\
96     ((ch) < 128 && Py_ISALNUM(ch))
97 #define SRE_IS_WORD(ch)\
98     ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
99 
sre_lower_ascii(unsigned int ch)100 static unsigned int sre_lower_ascii(unsigned int ch)
101 {
102     return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
103 }
104 
sre_upper_ascii(unsigned int ch)105 static unsigned int sre_upper_ascii(unsigned int ch)
106 {
107     return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
108 }
109 
110 /* locale-specific character predicates */
111 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
112  * warnings when c's type supports only numbers < N+1 */
113 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
114 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
115 
sre_lower_locale(unsigned int ch)116 static unsigned int sre_lower_locale(unsigned int ch)
117 {
118     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
119 }
120 
sre_upper_locale(unsigned int ch)121 static unsigned int sre_upper_locale(unsigned int ch)
122 {
123     return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
124 }
125 
126 /* unicode-specific character predicates */
127 
128 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
129 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
130 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
131 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
132 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
133 
sre_lower_unicode(unsigned int ch)134 static unsigned int sre_lower_unicode(unsigned int ch)
135 {
136     return (unsigned int) Py_UNICODE_TOLOWER(ch);
137 }
138 
sre_upper_unicode(unsigned int ch)139 static unsigned int sre_upper_unicode(unsigned int ch)
140 {
141     return (unsigned int) Py_UNICODE_TOUPPER(ch);
142 }
143 
144 LOCAL(int)
sre_category(SRE_CODE category,unsigned int ch)145 sre_category(SRE_CODE category, unsigned int ch)
146 {
147     switch (category) {
148 
149     case SRE_CATEGORY_DIGIT:
150         return SRE_IS_DIGIT(ch);
151     case SRE_CATEGORY_NOT_DIGIT:
152         return !SRE_IS_DIGIT(ch);
153     case SRE_CATEGORY_SPACE:
154         return SRE_IS_SPACE(ch);
155     case SRE_CATEGORY_NOT_SPACE:
156         return !SRE_IS_SPACE(ch);
157     case SRE_CATEGORY_WORD:
158         return SRE_IS_WORD(ch);
159     case SRE_CATEGORY_NOT_WORD:
160         return !SRE_IS_WORD(ch);
161     case SRE_CATEGORY_LINEBREAK:
162         return SRE_IS_LINEBREAK(ch);
163     case SRE_CATEGORY_NOT_LINEBREAK:
164         return !SRE_IS_LINEBREAK(ch);
165 
166     case SRE_CATEGORY_LOC_WORD:
167         return SRE_LOC_IS_WORD(ch);
168     case SRE_CATEGORY_LOC_NOT_WORD:
169         return !SRE_LOC_IS_WORD(ch);
170 
171     case SRE_CATEGORY_UNI_DIGIT:
172         return SRE_UNI_IS_DIGIT(ch);
173     case SRE_CATEGORY_UNI_NOT_DIGIT:
174         return !SRE_UNI_IS_DIGIT(ch);
175     case SRE_CATEGORY_UNI_SPACE:
176         return SRE_UNI_IS_SPACE(ch);
177     case SRE_CATEGORY_UNI_NOT_SPACE:
178         return !SRE_UNI_IS_SPACE(ch);
179     case SRE_CATEGORY_UNI_WORD:
180         return SRE_UNI_IS_WORD(ch);
181     case SRE_CATEGORY_UNI_NOT_WORD:
182         return !SRE_UNI_IS_WORD(ch);
183     case SRE_CATEGORY_UNI_LINEBREAK:
184         return SRE_UNI_IS_LINEBREAK(ch);
185     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
186         return !SRE_UNI_IS_LINEBREAK(ch);
187     }
188     return 0;
189 }
190 
191 LOCAL(int)
char_loc_ignore(SRE_CODE pattern,SRE_CODE ch)192 char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
193 {
194     return ch == pattern
195         || (SRE_CODE) sre_lower_locale(ch) == pattern
196         || (SRE_CODE) sre_upper_locale(ch) == pattern;
197 }
198 
199 
200 /* helpers */
201 
202 static void
data_stack_dealloc(SRE_STATE * state)203 data_stack_dealloc(SRE_STATE* state)
204 {
205     if (state->data_stack) {
206         PyMem_FREE(state->data_stack);
207         state->data_stack = NULL;
208     }
209     state->data_stack_size = state->data_stack_base = 0;
210 }
211 
212 static int
data_stack_grow(SRE_STATE * state,Py_ssize_t size)213 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
214 {
215     Py_ssize_t minsize, cursize;
216     minsize = state->data_stack_base+size;
217     cursize = state->data_stack_size;
218     if (cursize < minsize) {
219         void* stack;
220         cursize = minsize+minsize/4+1024;
221         TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
222         stack = PyMem_REALLOC(state->data_stack, cursize);
223         if (!stack) {
224             data_stack_dealloc(state);
225             return SRE_ERROR_MEMORY;
226         }
227         state->data_stack = (char *)stack;
228         state->data_stack_size = cursize;
229     }
230     return 0;
231 }
232 
233 /* generate 8-bit version */
234 
235 #define SRE_CHAR Py_UCS1
236 #define SIZEOF_SRE_CHAR 1
237 #define SRE(F) sre_ucs1_##F
238 #include "sre_lib.h"
239 
240 /* generate 16-bit unicode version */
241 
242 #define SRE_CHAR Py_UCS2
243 #define SIZEOF_SRE_CHAR 2
244 #define SRE(F) sre_ucs2_##F
245 #include "sre_lib.h"
246 
247 /* generate 32-bit unicode version */
248 
249 #define SRE_CHAR Py_UCS4
250 #define SIZEOF_SRE_CHAR 4
251 #define SRE(F) sre_ucs4_##F
252 #include "sre_lib.h"
253 
254 /* -------------------------------------------------------------------- */
255 /* factories and destructors */
256 
257 /* see sre.h for object declarations */
258 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
259 static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
260 
261 
262 /*[clinic input]
263 module _sre
264 class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
265 class _sre.SRE_Match "MatchObject *" "&Match_Type"
266 class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
267 [clinic start generated code]*/
268 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
269 
270 static PyTypeObject Pattern_Type;
271 static PyTypeObject Match_Type;
272 static PyTypeObject Scanner_Type;
273 
274 /*[clinic input]
275 _sre.getcodesize -> int
276 [clinic start generated code]*/
277 
278 static int
_sre_getcodesize_impl(PyObject * module)279 _sre_getcodesize_impl(PyObject *module)
280 /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
281 {
282     return sizeof(SRE_CODE);
283 }
284 
285 /*[clinic input]
286 _sre.ascii_iscased -> bool
287 
288     character: int
289     /
290 
291 [clinic start generated code]*/
292 
293 static int
_sre_ascii_iscased_impl(PyObject * module,int character)294 _sre_ascii_iscased_impl(PyObject *module, int character)
295 /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
296 {
297     unsigned int ch = (unsigned int)character;
298     return ch != sre_lower_ascii(ch) || ch != sre_upper_ascii(ch);
299 }
300 
301 /*[clinic input]
302 _sre.unicode_iscased -> bool
303 
304     character: int
305     /
306 
307 [clinic start generated code]*/
308 
309 static int
_sre_unicode_iscased_impl(PyObject * module,int character)310 _sre_unicode_iscased_impl(PyObject *module, int character)
311 /*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
312 {
313     unsigned int ch = (unsigned int)character;
314     return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
315 }
316 
317 /*[clinic input]
318 _sre.ascii_tolower -> int
319 
320     character: int
321     /
322 
323 [clinic start generated code]*/
324 
325 static int
_sre_ascii_tolower_impl(PyObject * module,int character)326 _sre_ascii_tolower_impl(PyObject *module, int character)
327 /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
328 {
329     return sre_lower_ascii(character);
330 }
331 
332 /*[clinic input]
333 _sre.unicode_tolower -> int
334 
335     character: int
336     /
337 
338 [clinic start generated code]*/
339 
340 static int
_sre_unicode_tolower_impl(PyObject * module,int character)341 _sre_unicode_tolower_impl(PyObject *module, int character)
342 /*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
343 {
344     return sre_lower_unicode(character);
345 }
346 
347 LOCAL(void)
state_reset(SRE_STATE * state)348 state_reset(SRE_STATE* state)
349 {
350     /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
351     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
352 
353     state->lastmark = -1;
354     state->lastindex = -1;
355 
356     state->repeat = NULL;
357 
358     data_stack_dealloc(state);
359 }
360 
361 static void*
getstring(PyObject * string,Py_ssize_t * p_length,int * p_isbytes,int * p_charsize,Py_buffer * view)362 getstring(PyObject* string, Py_ssize_t* p_length,
363           int* p_isbytes, int* p_charsize,
364           Py_buffer *view)
365 {
366     /* given a python object, return a data pointer, a length (in
367        characters), and a character size.  return NULL if the object
368        is not a string (or not compatible) */
369 
370     /* Unicode objects do not support the buffer API. So, get the data
371        directly instead. */
372     if (PyUnicode_Check(string)) {
373         if (PyUnicode_READY(string) == -1)
374             return NULL;
375         *p_length = PyUnicode_GET_LENGTH(string);
376         *p_charsize = PyUnicode_KIND(string);
377         *p_isbytes = 0;
378         return PyUnicode_DATA(string);
379     }
380 
381     /* get pointer to byte string buffer */
382     if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
383         PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
384         return NULL;
385     }
386 
387     *p_length = view->len;
388     *p_charsize = 1;
389     *p_isbytes = 1;
390 
391     if (view->buf == NULL) {
392         PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
393         PyBuffer_Release(view);
394         view->buf = NULL;
395         return NULL;
396     }
397     return view->buf;
398 }
399 
400 LOCAL(PyObject*)
state_init(SRE_STATE * state,PatternObject * pattern,PyObject * string,Py_ssize_t start,Py_ssize_t end)401 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
402            Py_ssize_t start, Py_ssize_t end)
403 {
404     /* prepare state object */
405 
406     Py_ssize_t length;
407     int isbytes, charsize;
408     void* ptr;
409 
410     memset(state, 0, sizeof(SRE_STATE));
411 
412     state->mark = PyMem_New(void *, pattern->groups * 2);
413     if (!state->mark) {
414         PyErr_NoMemory();
415         goto err;
416     }
417     state->lastmark = -1;
418     state->lastindex = -1;
419 
420     state->buffer.buf = NULL;
421     ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
422     if (!ptr)
423         goto err;
424 
425     if (isbytes && pattern->isbytes == 0) {
426         PyErr_SetString(PyExc_TypeError,
427                         "cannot use a string pattern on a bytes-like object");
428         goto err;
429     }
430     if (!isbytes && pattern->isbytes > 0) {
431         PyErr_SetString(PyExc_TypeError,
432                         "cannot use a bytes pattern on a string-like object");
433         goto err;
434     }
435 
436     /* adjust boundaries */
437     if (start < 0)
438         start = 0;
439     else if (start > length)
440         start = length;
441 
442     if (end < 0)
443         end = 0;
444     else if (end > length)
445         end = length;
446 
447     state->isbytes = isbytes;
448     state->charsize = charsize;
449     state->match_all = 0;
450     state->must_advance = 0;
451 
452     state->beginning = ptr;
453 
454     state->start = (void*) ((char*) ptr + start * state->charsize);
455     state->end = (void*) ((char*) ptr + end * state->charsize);
456 
457     Py_INCREF(string);
458     state->string = string;
459     state->pos = start;
460     state->endpos = end;
461 
462     return string;
463   err:
464     PyMem_Del(state->mark);
465     state->mark = NULL;
466     if (state->buffer.buf)
467         PyBuffer_Release(&state->buffer);
468     return NULL;
469 }
470 
471 LOCAL(void)
state_fini(SRE_STATE * state)472 state_fini(SRE_STATE* state)
473 {
474     if (state->buffer.buf)
475         PyBuffer_Release(&state->buffer);
476     Py_XDECREF(state->string);
477     data_stack_dealloc(state);
478     PyMem_Del(state->mark);
479     state->mark = NULL;
480 }
481 
482 /* calculate offset from start of string */
483 #define STATE_OFFSET(state, member)\
484     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
485 
486 LOCAL(PyObject*)
getslice(int isbytes,const void * ptr,PyObject * string,Py_ssize_t start,Py_ssize_t end)487 getslice(int isbytes, const void *ptr,
488          PyObject* string, Py_ssize_t start, Py_ssize_t end)
489 {
490     if (isbytes) {
491         if (PyBytes_CheckExact(string) &&
492             start == 0 && end == PyBytes_GET_SIZE(string)) {
493             Py_INCREF(string);
494             return string;
495         }
496         return PyBytes_FromStringAndSize(
497                 (const char *)ptr + start, end - start);
498     }
499     else {
500         return PyUnicode_Substring(string, start, end);
501     }
502 }
503 
504 LOCAL(PyObject*)
state_getslice(SRE_STATE * state,Py_ssize_t index,PyObject * string,int empty)505 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
506 {
507     Py_ssize_t i, j;
508 
509     index = (index - 1) * 2;
510 
511     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
512         if (empty)
513             /* want empty string */
514             i = j = 0;
515         else {
516             Py_RETURN_NONE;
517         }
518     } else {
519         i = STATE_OFFSET(state, state->mark[index]);
520         j = STATE_OFFSET(state, state->mark[index+1]);
521     }
522 
523     return getslice(state->isbytes, state->beginning, string, i, j);
524 }
525 
526 static void
pattern_error(Py_ssize_t status)527 pattern_error(Py_ssize_t status)
528 {
529     switch (status) {
530     case SRE_ERROR_RECURSION_LIMIT:
531         /* This error code seems to be unused. */
532         PyErr_SetString(
533             PyExc_RecursionError,
534             "maximum recursion limit exceeded"
535             );
536         break;
537     case SRE_ERROR_MEMORY:
538         PyErr_NoMemory();
539         break;
540     case SRE_ERROR_INTERRUPTED:
541     /* An exception has already been raised, so let it fly */
542         break;
543     default:
544         /* other error codes indicate compiler/engine bugs */
545         PyErr_SetString(
546             PyExc_RuntimeError,
547             "internal error in regular expression engine"
548             );
549     }
550 }
551 
552 static void
pattern_dealloc(PatternObject * self)553 pattern_dealloc(PatternObject* self)
554 {
555     if (self->weakreflist != NULL)
556         PyObject_ClearWeakRefs((PyObject *) self);
557     Py_XDECREF(self->pattern);
558     Py_XDECREF(self->groupindex);
559     Py_XDECREF(self->indexgroup);
560     PyObject_DEL(self);
561 }
562 
563 LOCAL(Py_ssize_t)
sre_match(SRE_STATE * state,SRE_CODE * pattern)564 sre_match(SRE_STATE* state, SRE_CODE* pattern)
565 {
566     if (state->charsize == 1)
567         return sre_ucs1_match(state, pattern, 1);
568     if (state->charsize == 2)
569         return sre_ucs2_match(state, pattern, 1);
570     assert(state->charsize == 4);
571     return sre_ucs4_match(state, pattern, 1);
572 }
573 
574 LOCAL(Py_ssize_t)
sre_search(SRE_STATE * state,SRE_CODE * pattern)575 sre_search(SRE_STATE* state, SRE_CODE* pattern)
576 {
577     if (state->charsize == 1)
578         return sre_ucs1_search(state, pattern);
579     if (state->charsize == 2)
580         return sre_ucs2_search(state, pattern);
581     assert(state->charsize == 4);
582     return sre_ucs4_search(state, pattern);
583 }
584 
585 /*[clinic input]
586 _sre.SRE_Pattern.match
587 
588     string: object
589     pos: Py_ssize_t = 0
590     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
591 
592 Matches zero or more characters at the beginning of the string.
593 [clinic start generated code]*/
594 
595 static PyObject *
_sre_SRE_Pattern_match_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)596 _sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
597                             Py_ssize_t pos, Py_ssize_t endpos)
598 /*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
599 {
600     SRE_STATE state;
601     Py_ssize_t status;
602     PyObject *match;
603 
604     if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
605         return NULL;
606 
607     state.ptr = state.start;
608 
609     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
610 
611     status = sre_match(&state, PatternObject_GetCode(self));
612 
613     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
614     if (PyErr_Occurred()) {
615         state_fini(&state);
616         return NULL;
617     }
618 
619     match = pattern_new_match(self, &state, status);
620     state_fini(&state);
621     return match;
622 }
623 
624 /*[clinic input]
625 _sre.SRE_Pattern.fullmatch
626 
627     string: object
628     pos: Py_ssize_t = 0
629     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
630 
631 Matches against all of the string.
632 [clinic start generated code]*/
633 
634 static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)635 _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
636                                 Py_ssize_t pos, Py_ssize_t endpos)
637 /*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
638 {
639     SRE_STATE state;
640     Py_ssize_t status;
641     PyObject *match;
642 
643     if (!state_init(&state, self, string, pos, endpos))
644         return NULL;
645 
646     state.ptr = state.start;
647 
648     TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
649 
650     state.match_all = 1;
651     status = sre_match(&state, PatternObject_GetCode(self));
652 
653     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
654     if (PyErr_Occurred()) {
655         state_fini(&state);
656         return NULL;
657     }
658 
659     match = pattern_new_match(self, &state, status);
660     state_fini(&state);
661     return match;
662 }
663 
664 /*[clinic input]
665 _sre.SRE_Pattern.search
666 
667     string: object
668     pos: Py_ssize_t = 0
669     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
670 
671 Scan through string looking for a match, and return a corresponding match object instance.
672 
673 Return None if no position in the string matches.
674 [clinic start generated code]*/
675 
676 static PyObject *
_sre_SRE_Pattern_search_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)677 _sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
678                              Py_ssize_t pos, Py_ssize_t endpos)
679 /*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
680 {
681     SRE_STATE state;
682     Py_ssize_t status;
683     PyObject *match;
684 
685     if (!state_init(&state, self, string, pos, endpos))
686         return NULL;
687 
688     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
689 
690     status = sre_search(&state, PatternObject_GetCode(self));
691 
692     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
693 
694     if (PyErr_Occurred()) {
695         state_fini(&state);
696         return NULL;
697     }
698 
699     match = pattern_new_match(self, &state, status);
700     state_fini(&state);
701     return match;
702 }
703 
704 static PyObject*
call(const char * module,const char * function,PyObject * args)705 call(const char* module, const char* function, PyObject* args)
706 {
707     PyObject* name;
708     PyObject* mod;
709     PyObject* func;
710     PyObject* result;
711 
712     if (!args)
713         return NULL;
714     name = PyUnicode_FromString(module);
715     if (!name)
716         return NULL;
717     mod = PyImport_Import(name);
718     Py_DECREF(name);
719     if (!mod)
720         return NULL;
721     func = PyObject_GetAttrString(mod, function);
722     Py_DECREF(mod);
723     if (!func)
724         return NULL;
725     result = PyObject_CallObject(func, args);
726     Py_DECREF(func);
727     Py_DECREF(args);
728     return result;
729 }
730 
731 /*[clinic input]
732 _sre.SRE_Pattern.findall
733 
734     string: object
735     pos: Py_ssize_t = 0
736     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
737 
738 Return a list of all non-overlapping matches of pattern in string.
739 [clinic start generated code]*/
740 
741 static PyObject *
_sre_SRE_Pattern_findall_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)742 _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
743                               Py_ssize_t pos, Py_ssize_t endpos)
744 /*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
745 {
746     SRE_STATE state;
747     PyObject* list;
748     Py_ssize_t status;
749     Py_ssize_t i, b, e;
750 
751     if (!state_init(&state, self, string, pos, endpos))
752         return NULL;
753 
754     list = PyList_New(0);
755     if (!list) {
756         state_fini(&state);
757         return NULL;
758     }
759 
760     while (state.start <= state.end) {
761 
762         PyObject* item;
763 
764         state_reset(&state);
765 
766         state.ptr = state.start;
767 
768         status = sre_search(&state, PatternObject_GetCode(self));
769         if (PyErr_Occurred())
770             goto error;
771 
772         if (status <= 0) {
773             if (status == 0)
774                 break;
775             pattern_error(status);
776             goto error;
777         }
778 
779         /* don't bother to build a match object */
780         switch (self->groups) {
781         case 0:
782             b = STATE_OFFSET(&state, state.start);
783             e = STATE_OFFSET(&state, state.ptr);
784             item = getslice(state.isbytes, state.beginning,
785                             string, b, e);
786             if (!item)
787                 goto error;
788             break;
789         case 1:
790             item = state_getslice(&state, 1, string, 1);
791             if (!item)
792                 goto error;
793             break;
794         default:
795             item = PyTuple_New(self->groups);
796             if (!item)
797                 goto error;
798             for (i = 0; i < self->groups; i++) {
799                 PyObject* o = state_getslice(&state, i+1, string, 1);
800                 if (!o) {
801                     Py_DECREF(item);
802                     goto error;
803                 }
804                 PyTuple_SET_ITEM(item, i, o);
805             }
806             break;
807         }
808 
809         status = PyList_Append(list, item);
810         Py_DECREF(item);
811         if (status < 0)
812             goto error;
813 
814         state.must_advance = (state.ptr == state.start);
815         state.start = state.ptr;
816     }
817 
818     state_fini(&state);
819     return list;
820 
821 error:
822     Py_DECREF(list);
823     state_fini(&state);
824     return NULL;
825 
826 }
827 
828 /*[clinic input]
829 _sre.SRE_Pattern.finditer
830 
831     string: object
832     pos: Py_ssize_t = 0
833     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
834 
835 Return an iterator over all non-overlapping matches for the RE pattern in string.
836 
837 For each match, the iterator returns a match object.
838 [clinic start generated code]*/
839 
840 static PyObject *
_sre_SRE_Pattern_finditer_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)841 _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
842                                Py_ssize_t pos, Py_ssize_t endpos)
843 /*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
844 {
845     PyObject* scanner;
846     PyObject* search;
847     PyObject* iterator;
848 
849     scanner = pattern_scanner(self, string, pos, endpos);
850     if (!scanner)
851         return NULL;
852 
853     search = PyObject_GetAttrString(scanner, "search");
854     Py_DECREF(scanner);
855     if (!search)
856         return NULL;
857 
858     iterator = PyCallIter_New(search, Py_None);
859     Py_DECREF(search);
860 
861     return iterator;
862 }
863 
864 /*[clinic input]
865 _sre.SRE_Pattern.scanner
866 
867     string: object
868     pos: Py_ssize_t = 0
869     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
870 
871 [clinic start generated code]*/
872 
873 static PyObject *
_sre_SRE_Pattern_scanner_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)874 _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
875                               Py_ssize_t pos, Py_ssize_t endpos)
876 /*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
877 {
878     return pattern_scanner(self, string, pos, endpos);
879 }
880 
881 /*[clinic input]
882 _sre.SRE_Pattern.split
883 
884     string: object
885     maxsplit: Py_ssize_t = 0
886 
887 Split string by the occurrences of pattern.
888 [clinic start generated code]*/
889 
890 static PyObject *
_sre_SRE_Pattern_split_impl(PatternObject * self,PyObject * string,Py_ssize_t maxsplit)891 _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
892                             Py_ssize_t maxsplit)
893 /*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
894 {
895     SRE_STATE state;
896     PyObject* list;
897     PyObject* item;
898     Py_ssize_t status;
899     Py_ssize_t n;
900     Py_ssize_t i;
901     void* last;
902 
903     assert(self->codesize != 0);
904 
905     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
906         return NULL;
907 
908     list = PyList_New(0);
909     if (!list) {
910         state_fini(&state);
911         return NULL;
912     }
913 
914     n = 0;
915     last = state.start;
916 
917     while (!maxsplit || n < maxsplit) {
918 
919         state_reset(&state);
920 
921         state.ptr = state.start;
922 
923         status = sre_search(&state, PatternObject_GetCode(self));
924         if (PyErr_Occurred())
925             goto error;
926 
927         if (status <= 0) {
928             if (status == 0)
929                 break;
930             pattern_error(status);
931             goto error;
932         }
933 
934         /* get segment before this match */
935         item = getslice(state.isbytes, state.beginning,
936             string, STATE_OFFSET(&state, last),
937             STATE_OFFSET(&state, state.start)
938             );
939         if (!item)
940             goto error;
941         status = PyList_Append(list, item);
942         Py_DECREF(item);
943         if (status < 0)
944             goto error;
945 
946         /* add groups (if any) */
947         for (i = 0; i < self->groups; i++) {
948             item = state_getslice(&state, i+1, string, 0);
949             if (!item)
950                 goto error;
951             status = PyList_Append(list, item);
952             Py_DECREF(item);
953             if (status < 0)
954                 goto error;
955         }
956 
957         n = n + 1;
958         state.must_advance = (state.ptr == state.start);
959         last = state.start = state.ptr;
960 
961     }
962 
963     /* get segment following last match (even if empty) */
964     item = getslice(state.isbytes, state.beginning,
965         string, STATE_OFFSET(&state, last), state.endpos
966         );
967     if (!item)
968         goto error;
969     status = PyList_Append(list, item);
970     Py_DECREF(item);
971     if (status < 0)
972         goto error;
973 
974     state_fini(&state);
975     return list;
976 
977 error:
978     Py_DECREF(list);
979     state_fini(&state);
980     return NULL;
981 
982 }
983 
984 static PyObject*
pattern_subx(PatternObject * self,PyObject * ptemplate,PyObject * string,Py_ssize_t count,Py_ssize_t subn)985 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
986              Py_ssize_t count, Py_ssize_t subn)
987 {
988     SRE_STATE state;
989     PyObject* list;
990     PyObject* joiner;
991     PyObject* item;
992     PyObject* filter;
993     PyObject* match;
994     void* ptr;
995     Py_ssize_t status;
996     Py_ssize_t n;
997     Py_ssize_t i, b, e;
998     int isbytes, charsize;
999     int filter_is_callable;
1000     Py_buffer view;
1001 
1002     if (PyCallable_Check(ptemplate)) {
1003         /* sub/subn takes either a function or a template */
1004         filter = ptemplate;
1005         Py_INCREF(filter);
1006         filter_is_callable = 1;
1007     } else {
1008         /* if not callable, check if it's a literal string */
1009         int literal;
1010         view.buf = NULL;
1011         ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1012         b = charsize;
1013         if (ptr) {
1014             if (charsize == 1)
1015                 literal = memchr(ptr, '\\', n) == NULL;
1016             else
1017                 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1018         } else {
1019             PyErr_Clear();
1020             literal = 0;
1021         }
1022         if (view.buf)
1023             PyBuffer_Release(&view);
1024         if (literal) {
1025             filter = ptemplate;
1026             Py_INCREF(filter);
1027             filter_is_callable = 0;
1028         } else {
1029             /* not a literal; hand it over to the template compiler */
1030             filter = call(
1031                 SRE_PY_MODULE, "_subx",
1032                 PyTuple_Pack(2, self, ptemplate)
1033                 );
1034             if (!filter)
1035                 return NULL;
1036             filter_is_callable = PyCallable_Check(filter);
1037         }
1038     }
1039 
1040     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1041         Py_DECREF(filter);
1042         return NULL;
1043     }
1044 
1045     list = PyList_New(0);
1046     if (!list) {
1047         Py_DECREF(filter);
1048         state_fini(&state);
1049         return NULL;
1050     }
1051 
1052     n = i = 0;
1053 
1054     while (!count || n < count) {
1055 
1056         state_reset(&state);
1057 
1058         state.ptr = state.start;
1059 
1060         status = sre_search(&state, PatternObject_GetCode(self));
1061         if (PyErr_Occurred())
1062             goto error;
1063 
1064         if (status <= 0) {
1065             if (status == 0)
1066                 break;
1067             pattern_error(status);
1068             goto error;
1069         }
1070 
1071         b = STATE_OFFSET(&state, state.start);
1072         e = STATE_OFFSET(&state, state.ptr);
1073 
1074         if (i < b) {
1075             /* get segment before this match */
1076             item = getslice(state.isbytes, state.beginning,
1077                 string, i, b);
1078             if (!item)
1079                 goto error;
1080             status = PyList_Append(list, item);
1081             Py_DECREF(item);
1082             if (status < 0)
1083                 goto error;
1084 
1085         }
1086 
1087         if (filter_is_callable) {
1088             /* pass match object through filter */
1089             match = pattern_new_match(self, &state, 1);
1090             if (!match)
1091                 goto error;
1092             item = PyObject_CallFunctionObjArgs(filter, match, NULL);
1093             Py_DECREF(match);
1094             if (!item)
1095                 goto error;
1096         } else {
1097             /* filter is literal string */
1098             item = filter;
1099             Py_INCREF(item);
1100         }
1101 
1102         /* add to list */
1103         if (item != Py_None) {
1104             status = PyList_Append(list, item);
1105             Py_DECREF(item);
1106             if (status < 0)
1107                 goto error;
1108         }
1109 
1110         i = e;
1111         n = n + 1;
1112         state.must_advance = (state.ptr == state.start);
1113         state.start = state.ptr;
1114     }
1115 
1116     /* get segment following last match */
1117     if (i < state.endpos) {
1118         item = getslice(state.isbytes, state.beginning,
1119                         string, i, state.endpos);
1120         if (!item)
1121             goto error;
1122         status = PyList_Append(list, item);
1123         Py_DECREF(item);
1124         if (status < 0)
1125             goto error;
1126     }
1127 
1128     state_fini(&state);
1129 
1130     Py_DECREF(filter);
1131 
1132     /* convert list to single string (also removes list) */
1133     joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1134     if (!joiner) {
1135         Py_DECREF(list);
1136         return NULL;
1137     }
1138     if (PyList_GET_SIZE(list) == 0) {
1139         Py_DECREF(list);
1140         item = joiner;
1141     }
1142     else {
1143         if (state.isbytes)
1144             item = _PyBytes_Join(joiner, list);
1145         else
1146             item = PyUnicode_Join(joiner, list);
1147         Py_DECREF(joiner);
1148         Py_DECREF(list);
1149         if (!item)
1150             return NULL;
1151     }
1152 
1153     if (subn)
1154         return Py_BuildValue("Nn", item, n);
1155 
1156     return item;
1157 
1158 error:
1159     Py_DECREF(list);
1160     state_fini(&state);
1161     Py_DECREF(filter);
1162     return NULL;
1163 
1164 }
1165 
1166 /*[clinic input]
1167 _sre.SRE_Pattern.sub
1168 
1169     repl: object
1170     string: object
1171     count: Py_ssize_t = 0
1172 
1173 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1174 [clinic start generated code]*/
1175 
1176 static PyObject *
_sre_SRE_Pattern_sub_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1177 _sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1178                           PyObject *string, Py_ssize_t count)
1179 /*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1180 {
1181     return pattern_subx(self, repl, string, count, 0);
1182 }
1183 
1184 /*[clinic input]
1185 _sre.SRE_Pattern.subn
1186 
1187     repl: object
1188     string: object
1189     count: Py_ssize_t = 0
1190 
1191 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1192 [clinic start generated code]*/
1193 
1194 static PyObject *
_sre_SRE_Pattern_subn_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1195 _sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1196                            PyObject *string, Py_ssize_t count)
1197 /*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1198 {
1199     return pattern_subx(self, repl, string, count, 1);
1200 }
1201 
1202 /*[clinic input]
1203 _sre.SRE_Pattern.__copy__
1204 
1205 [clinic start generated code]*/
1206 
1207 static PyObject *
_sre_SRE_Pattern___copy___impl(PatternObject * self)1208 _sre_SRE_Pattern___copy___impl(PatternObject *self)
1209 /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1210 {
1211     Py_INCREF(self);
1212     return (PyObject *)self;
1213 }
1214 
1215 /*[clinic input]
1216 _sre.SRE_Pattern.__deepcopy__
1217 
1218     memo: object
1219     /
1220 
1221 [clinic start generated code]*/
1222 
1223 static PyObject *
_sre_SRE_Pattern___deepcopy__(PatternObject * self,PyObject * memo)1224 _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1225 /*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1226 {
1227     Py_INCREF(self);
1228     return (PyObject *)self;
1229 }
1230 
1231 static PyObject *
pattern_repr(PatternObject * obj)1232 pattern_repr(PatternObject *obj)
1233 {
1234     static const struct {
1235         const char *name;
1236         int value;
1237     } flag_names[] = {
1238         {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1239         {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1240         {"re.LOCALE", SRE_FLAG_LOCALE},
1241         {"re.MULTILINE", SRE_FLAG_MULTILINE},
1242         {"re.DOTALL", SRE_FLAG_DOTALL},
1243         {"re.UNICODE", SRE_FLAG_UNICODE},
1244         {"re.VERBOSE", SRE_FLAG_VERBOSE},
1245         {"re.DEBUG", SRE_FLAG_DEBUG},
1246         {"re.ASCII", SRE_FLAG_ASCII},
1247     };
1248     PyObject *result = NULL;
1249     PyObject *flag_items;
1250     size_t i;
1251     int flags = obj->flags;
1252 
1253     /* Omit re.UNICODE for valid string patterns. */
1254     if (obj->isbytes == 0 &&
1255         (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1256          SRE_FLAG_UNICODE)
1257         flags &= ~SRE_FLAG_UNICODE;
1258 
1259     flag_items = PyList_New(0);
1260     if (!flag_items)
1261         return NULL;
1262 
1263     for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1264         if (flags & flag_names[i].value) {
1265             PyObject *item = PyUnicode_FromString(flag_names[i].name);
1266             if (!item)
1267                 goto done;
1268 
1269             if (PyList_Append(flag_items, item) < 0) {
1270                 Py_DECREF(item);
1271                 goto done;
1272             }
1273             Py_DECREF(item);
1274             flags &= ~flag_names[i].value;
1275         }
1276     }
1277     if (flags) {
1278         PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1279         if (!item)
1280             goto done;
1281 
1282         if (PyList_Append(flag_items, item) < 0) {
1283             Py_DECREF(item);
1284             goto done;
1285         }
1286         Py_DECREF(item);
1287     }
1288 
1289     if (PyList_Size(flag_items) > 0) {
1290         PyObject *flags_result;
1291         PyObject *sep = PyUnicode_FromString("|");
1292         if (!sep)
1293             goto done;
1294         flags_result = PyUnicode_Join(sep, flag_items);
1295         Py_DECREF(sep);
1296         if (!flags_result)
1297             goto done;
1298         result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1299                                       obj->pattern, flags_result);
1300         Py_DECREF(flags_result);
1301     }
1302     else {
1303         result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1304     }
1305 
1306 done:
1307     Py_DECREF(flag_items);
1308     return result;
1309 }
1310 
1311 PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1312 
1313 /* PatternObject's 'groupindex' method. */
1314 static PyObject *
pattern_groupindex(PatternObject * self,void * Py_UNUSED (ignored))1315 pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1316 {
1317     if (self->groupindex == NULL)
1318         return PyDict_New();
1319     return PyDictProxy_New(self->groupindex);
1320 }
1321 
1322 static int _validate(PatternObject *self); /* Forward */
1323 
1324 /*[clinic input]
1325 _sre.compile
1326 
1327     pattern: object
1328     flags: int
1329     code: object(subclass_of='&PyList_Type')
1330     groups: Py_ssize_t
1331     groupindex: object(subclass_of='&PyDict_Type')
1332     indexgroup: object(subclass_of='&PyTuple_Type')
1333 
1334 [clinic start generated code]*/
1335 
1336 static PyObject *
_sre_compile_impl(PyObject * module,PyObject * pattern,int flags,PyObject * code,Py_ssize_t groups,PyObject * groupindex,PyObject * indexgroup)1337 _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1338                   PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1339                   PyObject *indexgroup)
1340 /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1341 {
1342     /* "compile" pattern descriptor to pattern object */
1343 
1344     PatternObject* self;
1345     Py_ssize_t i, n;
1346 
1347     n = PyList_GET_SIZE(code);
1348     /* coverity[ampersand_in_size] */
1349     self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1350     if (!self)
1351         return NULL;
1352     self->weakreflist = NULL;
1353     self->pattern = NULL;
1354     self->groupindex = NULL;
1355     self->indexgroup = NULL;
1356 
1357     self->codesize = n;
1358 
1359     for (i = 0; i < n; i++) {
1360         PyObject *o = PyList_GET_ITEM(code, i);
1361         unsigned long value = PyLong_AsUnsignedLong(o);
1362         self->code[i] = (SRE_CODE) value;
1363         if ((unsigned long) self->code[i] != value) {
1364             PyErr_SetString(PyExc_OverflowError,
1365                             "regular expression code size limit exceeded");
1366             break;
1367         }
1368     }
1369 
1370     if (PyErr_Occurred()) {
1371         Py_DECREF(self);
1372         return NULL;
1373     }
1374 
1375     if (pattern == Py_None) {
1376         self->isbytes = -1;
1377     }
1378     else {
1379         Py_ssize_t p_length;
1380         int charsize;
1381         Py_buffer view;
1382         view.buf = NULL;
1383         if (!getstring(pattern, &p_length, &self->isbytes,
1384                        &charsize, &view)) {
1385             Py_DECREF(self);
1386             return NULL;
1387         }
1388         if (view.buf)
1389             PyBuffer_Release(&view);
1390     }
1391 
1392     Py_INCREF(pattern);
1393     self->pattern = pattern;
1394 
1395     self->flags = flags;
1396 
1397     self->groups = groups;
1398 
1399     if (PyDict_GET_SIZE(groupindex) > 0) {
1400         Py_INCREF(groupindex);
1401         self->groupindex = groupindex;
1402         if (PyTuple_GET_SIZE(indexgroup) > 0) {
1403             Py_INCREF(indexgroup);
1404             self->indexgroup = indexgroup;
1405         }
1406     }
1407 
1408     if (!_validate(self)) {
1409         Py_DECREF(self);
1410         return NULL;
1411     }
1412 
1413     return (PyObject*) self;
1414 }
1415 
1416 /* -------------------------------------------------------------------- */
1417 /* Code validation */
1418 
1419 /* To learn more about this code, have a look at the _compile() function in
1420    Lib/sre_compile.py.  The validation functions below checks the code array
1421    for conformance with the code patterns generated there.
1422 
1423    The nice thing about the generated code is that it is position-independent:
1424    all jumps are relative jumps forward.  Also, jumps don't cross each other:
1425    the target of a later jump is always earlier than the target of an earlier
1426    jump.  IOW, this is okay:
1427 
1428    J---------J-------T--------T
1429     \         \_____/        /
1430      \______________________/
1431 
1432    but this is not:
1433 
1434    J---------J-------T--------T
1435     \_________\_____/        /
1436                \____________/
1437 
1438    It also helps that SRE_CODE is always an unsigned type.
1439 */
1440 
1441 /* Defining this one enables tracing of the validator */
1442 #undef VVERBOSE
1443 
1444 /* Trace macro for the validator */
1445 #if defined(VVERBOSE)
1446 #define VTRACE(v) printf v
1447 #else
1448 #define VTRACE(v) do {} while(0)  /* do nothing */
1449 #endif
1450 
1451 /* Report failure */
1452 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1453 
1454 /* Extract opcode, argument, or skip count from code array */
1455 #define GET_OP                                          \
1456     do {                                                \
1457         VTRACE(("%p: ", code));                         \
1458         if (code >= end) FAIL;                          \
1459         op = *code++;                                   \
1460         VTRACE(("%lu (op)\n", (unsigned long)op));      \
1461     } while (0)
1462 #define GET_ARG                                         \
1463     do {                                                \
1464         VTRACE(("%p= ", code));                         \
1465         if (code >= end) FAIL;                          \
1466         arg = *code++;                                  \
1467         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
1468     } while (0)
1469 #define GET_SKIP_ADJ(adj)                               \
1470     do {                                                \
1471         VTRACE(("%p= ", code));                         \
1472         if (code >= end) FAIL;                          \
1473         skip = *code;                                   \
1474         VTRACE(("%lu (skip to %p)\n",                   \
1475                (unsigned long)skip, code+skip));        \
1476         if (skip-adj > (uintptr_t)(end - code))      \
1477             FAIL;                                       \
1478         code++;                                         \
1479     } while (0)
1480 #define GET_SKIP GET_SKIP_ADJ(0)
1481 
1482 static int
_validate_charset(SRE_CODE * code,SRE_CODE * end)1483 _validate_charset(SRE_CODE *code, SRE_CODE *end)
1484 {
1485     /* Some variables are manipulated by the macros above */
1486     SRE_CODE op;
1487     SRE_CODE arg;
1488     SRE_CODE offset;
1489     int i;
1490 
1491     while (code < end) {
1492         GET_OP;
1493         switch (op) {
1494 
1495         case SRE_OP_NEGATE:
1496             break;
1497 
1498         case SRE_OP_LITERAL:
1499             GET_ARG;
1500             break;
1501 
1502         case SRE_OP_RANGE:
1503         case SRE_OP_RANGE_UNI_IGNORE:
1504             GET_ARG;
1505             GET_ARG;
1506             break;
1507 
1508         case SRE_OP_CHARSET:
1509             offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1510             if (offset > (uintptr_t)(end - code))
1511                 FAIL;
1512             code += offset;
1513             break;
1514 
1515         case SRE_OP_BIGCHARSET:
1516             GET_ARG; /* Number of blocks */
1517             offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1518             if (offset > (uintptr_t)(end - code))
1519                 FAIL;
1520             /* Make sure that each byte points to a valid block */
1521             for (i = 0; i < 256; i++) {
1522                 if (((unsigned char *)code)[i] >= arg)
1523                     FAIL;
1524             }
1525             code += offset;
1526             offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1527             if (offset > (uintptr_t)(end - code))
1528                 FAIL;
1529             code += offset;
1530             break;
1531 
1532         case SRE_OP_CATEGORY:
1533             GET_ARG;
1534             switch (arg) {
1535             case SRE_CATEGORY_DIGIT:
1536             case SRE_CATEGORY_NOT_DIGIT:
1537             case SRE_CATEGORY_SPACE:
1538             case SRE_CATEGORY_NOT_SPACE:
1539             case SRE_CATEGORY_WORD:
1540             case SRE_CATEGORY_NOT_WORD:
1541             case SRE_CATEGORY_LINEBREAK:
1542             case SRE_CATEGORY_NOT_LINEBREAK:
1543             case SRE_CATEGORY_LOC_WORD:
1544             case SRE_CATEGORY_LOC_NOT_WORD:
1545             case SRE_CATEGORY_UNI_DIGIT:
1546             case SRE_CATEGORY_UNI_NOT_DIGIT:
1547             case SRE_CATEGORY_UNI_SPACE:
1548             case SRE_CATEGORY_UNI_NOT_SPACE:
1549             case SRE_CATEGORY_UNI_WORD:
1550             case SRE_CATEGORY_UNI_NOT_WORD:
1551             case SRE_CATEGORY_UNI_LINEBREAK:
1552             case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1553                 break;
1554             default:
1555                 FAIL;
1556             }
1557             break;
1558 
1559         default:
1560             FAIL;
1561 
1562         }
1563     }
1564 
1565     return 1;
1566 }
1567 
1568 static int
_validate_inner(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1569 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1570 {
1571     /* Some variables are manipulated by the macros above */
1572     SRE_CODE op;
1573     SRE_CODE arg;
1574     SRE_CODE skip;
1575 
1576     VTRACE(("code=%p, end=%p\n", code, end));
1577 
1578     if (code > end)
1579         FAIL;
1580 
1581     while (code < end) {
1582         GET_OP;
1583         switch (op) {
1584 
1585         case SRE_OP_MARK:
1586             /* We don't check whether marks are properly nested; the
1587                sre_match() code is robust even if they don't, and the worst
1588                you can get is nonsensical match results. */
1589             GET_ARG;
1590             if (arg > 2 * (size_t)groups + 1) {
1591                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1592                 FAIL;
1593             }
1594             break;
1595 
1596         case SRE_OP_LITERAL:
1597         case SRE_OP_NOT_LITERAL:
1598         case SRE_OP_LITERAL_IGNORE:
1599         case SRE_OP_NOT_LITERAL_IGNORE:
1600         case SRE_OP_LITERAL_UNI_IGNORE:
1601         case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1602         case SRE_OP_LITERAL_LOC_IGNORE:
1603         case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1604             GET_ARG;
1605             /* The arg is just a character, nothing to check */
1606             break;
1607 
1608         case SRE_OP_SUCCESS:
1609         case SRE_OP_FAILURE:
1610             /* Nothing to check; these normally end the matching process */
1611             break;
1612 
1613         case SRE_OP_AT:
1614             GET_ARG;
1615             switch (arg) {
1616             case SRE_AT_BEGINNING:
1617             case SRE_AT_BEGINNING_STRING:
1618             case SRE_AT_BEGINNING_LINE:
1619             case SRE_AT_END:
1620             case SRE_AT_END_LINE:
1621             case SRE_AT_END_STRING:
1622             case SRE_AT_BOUNDARY:
1623             case SRE_AT_NON_BOUNDARY:
1624             case SRE_AT_LOC_BOUNDARY:
1625             case SRE_AT_LOC_NON_BOUNDARY:
1626             case SRE_AT_UNI_BOUNDARY:
1627             case SRE_AT_UNI_NON_BOUNDARY:
1628                 break;
1629             default:
1630                 FAIL;
1631             }
1632             break;
1633 
1634         case SRE_OP_ANY:
1635         case SRE_OP_ANY_ALL:
1636             /* These have no operands */
1637             break;
1638 
1639         case SRE_OP_IN:
1640         case SRE_OP_IN_IGNORE:
1641         case SRE_OP_IN_UNI_IGNORE:
1642         case SRE_OP_IN_LOC_IGNORE:
1643             GET_SKIP;
1644             /* Stop 1 before the end; we check the FAILURE below */
1645             if (!_validate_charset(code, code+skip-2))
1646                 FAIL;
1647             if (code[skip-2] != SRE_OP_FAILURE)
1648                 FAIL;
1649             code += skip-1;
1650             break;
1651 
1652         case SRE_OP_INFO:
1653             {
1654                 /* A minimal info field is
1655                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1656                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1657                    more follows. */
1658                 SRE_CODE flags, i;
1659                 SRE_CODE *newcode;
1660                 GET_SKIP;
1661                 newcode = code+skip-1;
1662                 GET_ARG; flags = arg;
1663                 GET_ARG;
1664                 GET_ARG;
1665                 /* Check that only valid flags are present */
1666                 if ((flags & ~(SRE_INFO_PREFIX |
1667                                SRE_INFO_LITERAL |
1668                                SRE_INFO_CHARSET)) != 0)
1669                     FAIL;
1670                 /* PREFIX and CHARSET are mutually exclusive */
1671                 if ((flags & SRE_INFO_PREFIX) &&
1672                     (flags & SRE_INFO_CHARSET))
1673                     FAIL;
1674                 /* LITERAL implies PREFIX */
1675                 if ((flags & SRE_INFO_LITERAL) &&
1676                     !(flags & SRE_INFO_PREFIX))
1677                     FAIL;
1678                 /* Validate the prefix */
1679                 if (flags & SRE_INFO_PREFIX) {
1680                     SRE_CODE prefix_len;
1681                     GET_ARG; prefix_len = arg;
1682                     GET_ARG;
1683                     /* Here comes the prefix string */
1684                     if (prefix_len > (uintptr_t)(newcode - code))
1685                         FAIL;
1686                     code += prefix_len;
1687                     /* And here comes the overlap table */
1688                     if (prefix_len > (uintptr_t)(newcode - code))
1689                         FAIL;
1690                     /* Each overlap value should be < prefix_len */
1691                     for (i = 0; i < prefix_len; i++) {
1692                         if (code[i] >= prefix_len)
1693                             FAIL;
1694                     }
1695                     code += prefix_len;
1696                 }
1697                 /* Validate the charset */
1698                 if (flags & SRE_INFO_CHARSET) {
1699                     if (!_validate_charset(code, newcode-1))
1700                         FAIL;
1701                     if (newcode[-1] != SRE_OP_FAILURE)
1702                         FAIL;
1703                     code = newcode;
1704                 }
1705                 else if (code != newcode) {
1706                   VTRACE(("code=%p, newcode=%p\n", code, newcode));
1707                     FAIL;
1708                 }
1709             }
1710             break;
1711 
1712         case SRE_OP_BRANCH:
1713             {
1714                 SRE_CODE *target = NULL;
1715                 for (;;) {
1716                     GET_SKIP;
1717                     if (skip == 0)
1718                         break;
1719                     /* Stop 2 before the end; we check the JUMP below */
1720                     if (!_validate_inner(code, code+skip-3, groups))
1721                         FAIL;
1722                     code += skip-3;
1723                     /* Check that it ends with a JUMP, and that each JUMP
1724                        has the same target */
1725                     GET_OP;
1726                     if (op != SRE_OP_JUMP)
1727                         FAIL;
1728                     GET_SKIP;
1729                     if (target == NULL)
1730                         target = code+skip-1;
1731                     else if (code+skip-1 != target)
1732                         FAIL;
1733                 }
1734             }
1735             break;
1736 
1737         case SRE_OP_REPEAT_ONE:
1738         case SRE_OP_MIN_REPEAT_ONE:
1739             {
1740                 SRE_CODE min, max;
1741                 GET_SKIP;
1742                 GET_ARG; min = arg;
1743                 GET_ARG; max = arg;
1744                 if (min > max)
1745                     FAIL;
1746                 if (max > SRE_MAXREPEAT)
1747                     FAIL;
1748                 if (!_validate_inner(code, code+skip-4, groups))
1749                     FAIL;
1750                 code += skip-4;
1751                 GET_OP;
1752                 if (op != SRE_OP_SUCCESS)
1753                     FAIL;
1754             }
1755             break;
1756 
1757         case SRE_OP_REPEAT:
1758             {
1759                 SRE_CODE min, max;
1760                 GET_SKIP;
1761                 GET_ARG; min = arg;
1762                 GET_ARG; max = arg;
1763                 if (min > max)
1764                     FAIL;
1765                 if (max > SRE_MAXREPEAT)
1766                     FAIL;
1767                 if (!_validate_inner(code, code+skip-3, groups))
1768                     FAIL;
1769                 code += skip-3;
1770                 GET_OP;
1771                 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1772                     FAIL;
1773             }
1774             break;
1775 
1776         case SRE_OP_GROUPREF:
1777         case SRE_OP_GROUPREF_IGNORE:
1778         case SRE_OP_GROUPREF_UNI_IGNORE:
1779         case SRE_OP_GROUPREF_LOC_IGNORE:
1780             GET_ARG;
1781             if (arg >= (size_t)groups)
1782                 FAIL;
1783             break;
1784 
1785         case SRE_OP_GROUPREF_EXISTS:
1786             /* The regex syntax for this is: '(?(group)then|else)', where
1787                'group' is either an integer group number or a group name,
1788                'then' and 'else' are sub-regexes, and 'else' is optional. */
1789             GET_ARG;
1790             if (arg >= (size_t)groups)
1791                 FAIL;
1792             GET_SKIP_ADJ(1);
1793             code--; /* The skip is relative to the first arg! */
1794             /* There are two possibilities here: if there is both a 'then'
1795                part and an 'else' part, the generated code looks like:
1796 
1797                GROUPREF_EXISTS
1798                <group>
1799                <skipyes>
1800                ...then part...
1801                JUMP
1802                <skipno>
1803                (<skipyes> jumps here)
1804                ...else part...
1805                (<skipno> jumps here)
1806 
1807                If there is only a 'then' part, it looks like:
1808 
1809                GROUPREF_EXISTS
1810                <group>
1811                <skip>
1812                ...then part...
1813                (<skip> jumps here)
1814 
1815                There is no direct way to decide which it is, and we don't want
1816                to allow arbitrary jumps anywhere in the code; so we just look
1817                for a JUMP opcode preceding our skip target.
1818             */
1819             if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
1820                 code[skip-3] == SRE_OP_JUMP)
1821             {
1822                 VTRACE(("both then and else parts present\n"));
1823                 if (!_validate_inner(code+1, code+skip-3, groups))
1824                     FAIL;
1825                 code += skip-2; /* Position after JUMP, at <skipno> */
1826                 GET_SKIP;
1827                 if (!_validate_inner(code, code+skip-1, groups))
1828                     FAIL;
1829                 code += skip-1;
1830             }
1831             else {
1832                 VTRACE(("only a then part present\n"));
1833                 if (!_validate_inner(code+1, code+skip-1, groups))
1834                     FAIL;
1835                 code += skip-1;
1836             }
1837             break;
1838 
1839         case SRE_OP_ASSERT:
1840         case SRE_OP_ASSERT_NOT:
1841             GET_SKIP;
1842             GET_ARG; /* 0 for lookahead, width for lookbehind */
1843             code--; /* Back up over arg to simplify math below */
1844             if (arg & 0x80000000)
1845                 FAIL; /* Width too large */
1846             /* Stop 1 before the end; we check the SUCCESS below */
1847             if (!_validate_inner(code+1, code+skip-2, groups))
1848                 FAIL;
1849             code += skip-2;
1850             GET_OP;
1851             if (op != SRE_OP_SUCCESS)
1852                 FAIL;
1853             break;
1854 
1855         default:
1856             FAIL;
1857 
1858         }
1859     }
1860 
1861     VTRACE(("okay\n"));
1862     return 1;
1863 }
1864 
1865 static int
_validate_outer(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1866 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1867 {
1868     if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1869         code >= end || end[-1] != SRE_OP_SUCCESS)
1870         FAIL;
1871     return _validate_inner(code, end-1, groups);
1872 }
1873 
1874 static int
_validate(PatternObject * self)1875 _validate(PatternObject *self)
1876 {
1877     if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1878     {
1879         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1880         return 0;
1881     }
1882     else
1883         VTRACE(("Success!\n"));
1884     return 1;
1885 }
1886 
1887 /* -------------------------------------------------------------------- */
1888 /* match methods */
1889 
1890 static void
match_dealloc(MatchObject * self)1891 match_dealloc(MatchObject* self)
1892 {
1893     Py_XDECREF(self->regs);
1894     Py_XDECREF(self->string);
1895     Py_DECREF(self->pattern);
1896     PyObject_DEL(self);
1897 }
1898 
1899 static PyObject*
match_getslice_by_index(MatchObject * self,Py_ssize_t index,PyObject * def)1900 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
1901 {
1902     Py_ssize_t length;
1903     int isbytes, charsize;
1904     Py_buffer view;
1905     PyObject *result;
1906     void* ptr;
1907     Py_ssize_t i, j;
1908 
1909     if (index < 0 || index >= self->groups) {
1910         /* raise IndexError if we were given a bad group number */
1911         PyErr_SetString(
1912             PyExc_IndexError,
1913             "no such group"
1914             );
1915         return NULL;
1916     }
1917 
1918     index *= 2;
1919 
1920     if (self->string == Py_None || self->mark[index] < 0) {
1921         /* return default value if the string or group is undefined */
1922         Py_INCREF(def);
1923         return def;
1924     }
1925 
1926     ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
1927     if (ptr == NULL)
1928         return NULL;
1929 
1930     i = self->mark[index];
1931     j = self->mark[index+1];
1932     i = Py_MIN(i, length);
1933     j = Py_MIN(j, length);
1934     result = getslice(isbytes, ptr, self->string, i, j);
1935     if (isbytes && view.buf != NULL)
1936         PyBuffer_Release(&view);
1937     return result;
1938 }
1939 
1940 static Py_ssize_t
match_getindex(MatchObject * self,PyObject * index)1941 match_getindex(MatchObject* self, PyObject* index)
1942 {
1943     Py_ssize_t i;
1944 
1945     if (index == NULL)
1946         /* Default value */
1947         return 0;
1948 
1949     if (PyIndex_Check(index)) {
1950         return PyNumber_AsSsize_t(index, NULL);
1951     }
1952 
1953     i = -1;
1954 
1955     if (self->pattern->groupindex) {
1956         index = PyDict_GetItem(self->pattern->groupindex, index);
1957         if (index && PyLong_Check(index)) {
1958             i = PyLong_AsSsize_t(index);
1959         }
1960     }
1961 
1962     return i;
1963 }
1964 
1965 static PyObject*
match_getslice(MatchObject * self,PyObject * index,PyObject * def)1966 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
1967 {
1968     return match_getslice_by_index(self, match_getindex(self, index), def);
1969 }
1970 
1971 /*[clinic input]
1972 _sre.SRE_Match.expand
1973 
1974     template: object
1975 
1976 Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1977 [clinic start generated code]*/
1978 
1979 static PyObject *
_sre_SRE_Match_expand_impl(MatchObject * self,PyObject * template)1980 _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1981 /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
1982 {
1983     /* delegate to Python code */
1984     return call(
1985         SRE_PY_MODULE, "_expand",
1986         PyTuple_Pack(3, self->pattern, self, template)
1987         );
1988 }
1989 
1990 static PyObject*
match_group(MatchObject * self,PyObject * args)1991 match_group(MatchObject* self, PyObject* args)
1992 {
1993     PyObject* result;
1994     Py_ssize_t i, size;
1995 
1996     size = PyTuple_GET_SIZE(args);
1997 
1998     switch (size) {
1999     case 0:
2000         result = match_getslice(self, _PyLong_Zero, Py_None);
2001         break;
2002     case 1:
2003         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2004         break;
2005     default:
2006         /* fetch multiple items */
2007         result = PyTuple_New(size);
2008         if (!result)
2009             return NULL;
2010         for (i = 0; i < size; i++) {
2011             PyObject* item = match_getslice(
2012                 self, PyTuple_GET_ITEM(args, i), Py_None
2013                 );
2014             if (!item) {
2015                 Py_DECREF(result);
2016                 return NULL;
2017             }
2018             PyTuple_SET_ITEM(result, i, item);
2019         }
2020         break;
2021     }
2022     return result;
2023 }
2024 
2025 static PyObject*
match_getitem(MatchObject * self,PyObject * name)2026 match_getitem(MatchObject* self, PyObject* name)
2027 {
2028     return match_getslice(self, name, Py_None);
2029 }
2030 
2031 /*[clinic input]
2032 _sre.SRE_Match.groups
2033 
2034     default: object = None
2035         Is used for groups that did not participate in the match.
2036 
2037 Return a tuple containing all the subgroups of the match, from 1.
2038 [clinic start generated code]*/
2039 
2040 static PyObject *
_sre_SRE_Match_groups_impl(MatchObject * self,PyObject * default_value)2041 _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2042 /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2043 {
2044     PyObject* result;
2045     Py_ssize_t index;
2046 
2047     result = PyTuple_New(self->groups-1);
2048     if (!result)
2049         return NULL;
2050 
2051     for (index = 1; index < self->groups; index++) {
2052         PyObject* item;
2053         item = match_getslice_by_index(self, index, default_value);
2054         if (!item) {
2055             Py_DECREF(result);
2056             return NULL;
2057         }
2058         PyTuple_SET_ITEM(result, index-1, item);
2059     }
2060 
2061     return result;
2062 }
2063 
2064 /*[clinic input]
2065 _sre.SRE_Match.groupdict
2066 
2067     default: object = None
2068         Is used for groups that did not participate in the match.
2069 
2070 Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2071 [clinic start generated code]*/
2072 
2073 static PyObject *
_sre_SRE_Match_groupdict_impl(MatchObject * self,PyObject * default_value)2074 _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2075 /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2076 {
2077     PyObject *result;
2078     PyObject *key;
2079     PyObject *value;
2080     Py_ssize_t pos = 0;
2081     Py_hash_t hash;
2082 
2083     result = PyDict_New();
2084     if (!result || !self->pattern->groupindex)
2085         return result;
2086 
2087     while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2088         int status;
2089         Py_INCREF(key);
2090         value = match_getslice(self, key, default_value);
2091         if (!value) {
2092             Py_DECREF(key);
2093             goto failed;
2094         }
2095         status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2096         Py_DECREF(value);
2097         Py_DECREF(key);
2098         if (status < 0)
2099             goto failed;
2100     }
2101 
2102     return result;
2103 
2104 failed:
2105     Py_DECREF(result);
2106     return NULL;
2107 }
2108 
2109 /*[clinic input]
2110 _sre.SRE_Match.start -> Py_ssize_t
2111 
2112     group: object(c_default="NULL") = 0
2113     /
2114 
2115 Return index of the start of the substring matched by group.
2116 [clinic start generated code]*/
2117 
2118 static Py_ssize_t
_sre_SRE_Match_start_impl(MatchObject * self,PyObject * group)2119 _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2120 /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2121 {
2122     Py_ssize_t index = match_getindex(self, group);
2123 
2124     if (index < 0 || index >= self->groups) {
2125         PyErr_SetString(
2126             PyExc_IndexError,
2127             "no such group"
2128             );
2129         return -1;
2130     }
2131 
2132     /* mark is -1 if group is undefined */
2133     return self->mark[index*2];
2134 }
2135 
2136 /*[clinic input]
2137 _sre.SRE_Match.end -> Py_ssize_t
2138 
2139     group: object(c_default="NULL") = 0
2140     /
2141 
2142 Return index of the end of the substring matched by group.
2143 [clinic start generated code]*/
2144 
2145 static Py_ssize_t
_sre_SRE_Match_end_impl(MatchObject * self,PyObject * group)2146 _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2147 /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2148 {
2149     Py_ssize_t index = match_getindex(self, group);
2150 
2151     if (index < 0 || index >= self->groups) {
2152         PyErr_SetString(
2153             PyExc_IndexError,
2154             "no such group"
2155             );
2156         return -1;
2157     }
2158 
2159     /* mark is -1 if group is undefined */
2160     return self->mark[index*2+1];
2161 }
2162 
2163 LOCAL(PyObject*)
_pair(Py_ssize_t i1,Py_ssize_t i2)2164 _pair(Py_ssize_t i1, Py_ssize_t i2)
2165 {
2166     PyObject* pair;
2167     PyObject* item;
2168 
2169     pair = PyTuple_New(2);
2170     if (!pair)
2171         return NULL;
2172 
2173     item = PyLong_FromSsize_t(i1);
2174     if (!item)
2175         goto error;
2176     PyTuple_SET_ITEM(pair, 0, item);
2177 
2178     item = PyLong_FromSsize_t(i2);
2179     if (!item)
2180         goto error;
2181     PyTuple_SET_ITEM(pair, 1, item);
2182 
2183     return pair;
2184 
2185   error:
2186     Py_DECREF(pair);
2187     return NULL;
2188 }
2189 
2190 /*[clinic input]
2191 _sre.SRE_Match.span
2192 
2193     group: object(c_default="NULL") = 0
2194     /
2195 
2196 For match object m, return the 2-tuple (m.start(group), m.end(group)).
2197 [clinic start generated code]*/
2198 
2199 static PyObject *
_sre_SRE_Match_span_impl(MatchObject * self,PyObject * group)2200 _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2201 /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2202 {
2203     Py_ssize_t index = match_getindex(self, group);
2204 
2205     if (index < 0 || index >= self->groups) {
2206         PyErr_SetString(
2207             PyExc_IndexError,
2208             "no such group"
2209             );
2210         return NULL;
2211     }
2212 
2213     /* marks are -1 if group is undefined */
2214     return _pair(self->mark[index*2], self->mark[index*2+1]);
2215 }
2216 
2217 static PyObject*
match_regs(MatchObject * self)2218 match_regs(MatchObject* self)
2219 {
2220     PyObject* regs;
2221     PyObject* item;
2222     Py_ssize_t index;
2223 
2224     regs = PyTuple_New(self->groups);
2225     if (!regs)
2226         return NULL;
2227 
2228     for (index = 0; index < self->groups; index++) {
2229         item = _pair(self->mark[index*2], self->mark[index*2+1]);
2230         if (!item) {
2231             Py_DECREF(regs);
2232             return NULL;
2233         }
2234         PyTuple_SET_ITEM(regs, index, item);
2235     }
2236 
2237     Py_INCREF(regs);
2238     self->regs = regs;
2239 
2240     return regs;
2241 }
2242 
2243 /*[clinic input]
2244 _sre.SRE_Match.__copy__
2245 
2246 [clinic start generated code]*/
2247 
2248 static PyObject *
_sre_SRE_Match___copy___impl(MatchObject * self)2249 _sre_SRE_Match___copy___impl(MatchObject *self)
2250 /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2251 {
2252     Py_INCREF(self);
2253     return (PyObject *)self;
2254 }
2255 
2256 /*[clinic input]
2257 _sre.SRE_Match.__deepcopy__
2258 
2259     memo: object
2260     /
2261 
2262 [clinic start generated code]*/
2263 
2264 static PyObject *
_sre_SRE_Match___deepcopy__(MatchObject * self,PyObject * memo)2265 _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2266 /*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2267 {
2268     Py_INCREF(self);
2269     return (PyObject *)self;
2270 }
2271 
2272 PyDoc_STRVAR(match_doc,
2273 "The result of re.match() and re.search().\n\
2274 Match objects always have a boolean value of True.");
2275 
2276 PyDoc_STRVAR(match_group_doc,
2277 "group([group1, ...]) -> str or tuple.\n\
2278     Return subgroup(s) of the match by indices or names.\n\
2279     For 0 returns the entire match.");
2280 
2281 static PyObject *
match_lastindex_get(MatchObject * self,void * Py_UNUSED (ignored))2282 match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2283 {
2284     if (self->lastindex >= 0)
2285         return PyLong_FromSsize_t(self->lastindex);
2286     Py_RETURN_NONE;
2287 }
2288 
2289 static PyObject *
match_lastgroup_get(MatchObject * self,void * Py_UNUSED (ignored))2290 match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2291 {
2292     if (self->pattern->indexgroup &&
2293         self->lastindex >= 0 &&
2294         self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2295     {
2296         PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2297                                             self->lastindex);
2298         Py_INCREF(result);
2299         return result;
2300     }
2301     Py_RETURN_NONE;
2302 }
2303 
2304 static PyObject *
match_regs_get(MatchObject * self,void * Py_UNUSED (ignored))2305 match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2306 {
2307     if (self->regs) {
2308         Py_INCREF(self->regs);
2309         return self->regs;
2310     } else
2311         return match_regs(self);
2312 }
2313 
2314 static PyObject *
match_repr(MatchObject * self)2315 match_repr(MatchObject *self)
2316 {
2317     PyObject *result;
2318     PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2319     if (group0 == NULL)
2320         return NULL;
2321     result = PyUnicode_FromFormat(
2322             "<%s object; span=(%zd, %zd), match=%.50R>",
2323             Py_TYPE(self)->tp_name,
2324             self->mark[0], self->mark[1], group0);
2325     Py_DECREF(group0);
2326     return result;
2327 }
2328 
2329 
2330 static PyObject*
pattern_new_match(PatternObject * pattern,SRE_STATE * state,Py_ssize_t status)2331 pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
2332 {
2333     /* create match object (from state object) */
2334 
2335     MatchObject* match;
2336     Py_ssize_t i, j;
2337     char* base;
2338     int n;
2339 
2340     if (status > 0) {
2341 
2342         /* create match object (with room for extra group marks) */
2343         /* coverity[ampersand_in_size] */
2344         match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2345                                  2*(pattern->groups+1));
2346         if (!match)
2347             return NULL;
2348 
2349         Py_INCREF(pattern);
2350         match->pattern = pattern;
2351 
2352         Py_INCREF(state->string);
2353         match->string = state->string;
2354 
2355         match->regs = NULL;
2356         match->groups = pattern->groups+1;
2357 
2358         /* fill in group slices */
2359 
2360         base = (char*) state->beginning;
2361         n = state->charsize;
2362 
2363         match->mark[0] = ((char*) state->start - base) / n;
2364         match->mark[1] = ((char*) state->ptr - base) / n;
2365 
2366         for (i = j = 0; i < pattern->groups; i++, j+=2)
2367             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2368                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2369                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2370             } else
2371                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2372 
2373         match->pos = state->pos;
2374         match->endpos = state->endpos;
2375 
2376         match->lastindex = state->lastindex;
2377 
2378         return (PyObject*) match;
2379 
2380     } else if (status == 0) {
2381 
2382         /* no match */
2383         Py_RETURN_NONE;
2384 
2385     }
2386 
2387     /* internal error */
2388     pattern_error(status);
2389     return NULL;
2390 }
2391 
2392 
2393 /* -------------------------------------------------------------------- */
2394 /* scanner methods (experimental) */
2395 
2396 static void
scanner_dealloc(ScannerObject * self)2397 scanner_dealloc(ScannerObject* self)
2398 {
2399     state_fini(&self->state);
2400     Py_XDECREF(self->pattern);
2401     PyObject_DEL(self);
2402 }
2403 
2404 /*[clinic input]
2405 _sre.SRE_Scanner.match
2406 
2407 [clinic start generated code]*/
2408 
2409 static PyObject *
_sre_SRE_Scanner_match_impl(ScannerObject * self)2410 _sre_SRE_Scanner_match_impl(ScannerObject *self)
2411 /*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
2412 {
2413     SRE_STATE* state = &self->state;
2414     PyObject* match;
2415     Py_ssize_t status;
2416 
2417     if (state->start == NULL)
2418         Py_RETURN_NONE;
2419 
2420     state_reset(state);
2421 
2422     state->ptr = state->start;
2423 
2424     status = sre_match(state, PatternObject_GetCode(self->pattern));
2425     if (PyErr_Occurred())
2426         return NULL;
2427 
2428     match = pattern_new_match((PatternObject*) self->pattern,
2429                                state, status);
2430 
2431     if (status == 0)
2432         state->start = NULL;
2433     else {
2434         state->must_advance = (state->ptr == state->start);
2435         state->start = state->ptr;
2436     }
2437 
2438     return match;
2439 }
2440 
2441 
2442 /*[clinic input]
2443 _sre.SRE_Scanner.search
2444 
2445 [clinic start generated code]*/
2446 
2447 static PyObject *
_sre_SRE_Scanner_search_impl(ScannerObject * self)2448 _sre_SRE_Scanner_search_impl(ScannerObject *self)
2449 /*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
2450 {
2451     SRE_STATE* state = &self->state;
2452     PyObject* match;
2453     Py_ssize_t status;
2454 
2455     if (state->start == NULL)
2456         Py_RETURN_NONE;
2457 
2458     state_reset(state);
2459 
2460     state->ptr = state->start;
2461 
2462     status = sre_search(state, PatternObject_GetCode(self->pattern));
2463     if (PyErr_Occurred())
2464         return NULL;
2465 
2466     match = pattern_new_match((PatternObject*) self->pattern,
2467                                state, status);
2468 
2469     if (status == 0)
2470         state->start = NULL;
2471     else {
2472         state->must_advance = (state->ptr == state->start);
2473         state->start = state->ptr;
2474     }
2475 
2476     return match;
2477 }
2478 
2479 static PyObject *
pattern_scanner(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)2480 pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
2481 {
2482     ScannerObject* scanner;
2483 
2484     /* create scanner object */
2485     scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2486     if (!scanner)
2487         return NULL;
2488     scanner->pattern = NULL;
2489 
2490     /* create search state object */
2491     if (!state_init(&scanner->state, self, string, pos, endpos)) {
2492         Py_DECREF(scanner);
2493         return NULL;
2494     }
2495 
2496     Py_INCREF(self);
2497     scanner->pattern = (PyObject*) self;
2498 
2499     return (PyObject*) scanner;
2500 }
2501 
2502 static Py_hash_t
pattern_hash(PatternObject * self)2503 pattern_hash(PatternObject *self)
2504 {
2505     Py_hash_t hash, hash2;
2506 
2507     hash = PyObject_Hash(self->pattern);
2508     if (hash == -1) {
2509         return -1;
2510     }
2511 
2512     hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2513     hash ^= hash2;
2514 
2515     hash ^= self->flags;
2516     hash ^= self->isbytes;
2517     hash ^= self->codesize;
2518 
2519     if (hash == -1) {
2520         hash = -2;
2521     }
2522     return hash;
2523 }
2524 
2525 static PyObject*
pattern_richcompare(PyObject * lefto,PyObject * righto,int op)2526 pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2527 {
2528     PatternObject *left, *right;
2529     int cmp;
2530 
2531     if (op != Py_EQ && op != Py_NE) {
2532         Py_RETURN_NOTIMPLEMENTED;
2533     }
2534 
2535     if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2536         Py_RETURN_NOTIMPLEMENTED;
2537     }
2538 
2539     if (lefto == righto) {
2540         /* a pattern is equal to itself */
2541         return PyBool_FromLong(op == Py_EQ);
2542     }
2543 
2544     left = (PatternObject *)lefto;
2545     right = (PatternObject *)righto;
2546 
2547     cmp = (left->flags == right->flags
2548            && left->isbytes == right->isbytes
2549            && left->codesize == right->codesize);
2550     if (cmp) {
2551         /* Compare the code and the pattern because the same pattern can
2552            produce different codes depending on the locale used to compile the
2553            pattern when the re.LOCALE flag is used. Don't compare groups,
2554            indexgroup nor groupindex: they are derivated from the pattern. */
2555         cmp = (memcmp(left->code, right->code,
2556                       sizeof(left->code[0]) * left->codesize) == 0);
2557     }
2558     if (cmp) {
2559         cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2560                                        Py_EQ);
2561         if (cmp < 0) {
2562             return NULL;
2563         }
2564     }
2565     if (op == Py_NE) {
2566         cmp = !cmp;
2567     }
2568     return PyBool_FromLong(cmp);
2569 }
2570 
2571 #include "clinic/_sre.c.h"
2572 
2573 static PyMethodDef pattern_methods[] = {
2574     _SRE_SRE_PATTERN_MATCH_METHODDEF
2575     _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2576     _SRE_SRE_PATTERN_SEARCH_METHODDEF
2577     _SRE_SRE_PATTERN_SUB_METHODDEF
2578     _SRE_SRE_PATTERN_SUBN_METHODDEF
2579     _SRE_SRE_PATTERN_FINDALL_METHODDEF
2580     _SRE_SRE_PATTERN_SPLIT_METHODDEF
2581     _SRE_SRE_PATTERN_FINDITER_METHODDEF
2582     _SRE_SRE_PATTERN_SCANNER_METHODDEF
2583     _SRE_SRE_PATTERN___COPY___METHODDEF
2584     _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2585     {NULL, NULL}
2586 };
2587 
2588 static PyGetSetDef pattern_getset[] = {
2589     {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2590       "A dictionary mapping group names to group numbers."},
2591     {NULL}  /* Sentinel */
2592 };
2593 
2594 #define PAT_OFF(x) offsetof(PatternObject, x)
2595 static PyMemberDef pattern_members[] = {
2596     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY,
2597      "The pattern string from which the RE object was compiled."},
2598     {"flags",      T_INT,       PAT_OFF(flags),         READONLY,
2599      "The regex matching flags."},
2600     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY,
2601      "The number of capturing groups in the pattern."},
2602     {NULL}  /* Sentinel */
2603 };
2604 
2605 static PyTypeObject Pattern_Type = {
2606     PyVarObject_HEAD_INIT(NULL, 0)
2607     "re.Pattern",
2608     sizeof(PatternObject), sizeof(SRE_CODE),
2609     (destructor)pattern_dealloc,        /* tp_dealloc */
2610     0,                                  /* tp_print */
2611     0,                                  /* tp_getattr */
2612     0,                                  /* tp_setattr */
2613     0,                                  /* tp_reserved */
2614     (reprfunc)pattern_repr,             /* tp_repr */
2615     0,                                  /* tp_as_number */
2616     0,                                  /* tp_as_sequence */
2617     0,                                  /* tp_as_mapping */
2618     (hashfunc)pattern_hash,             /* tp_hash */
2619     0,                                  /* tp_call */
2620     0,                                  /* tp_str */
2621     0,                                  /* tp_getattro */
2622     0,                                  /* tp_setattro */
2623     0,                                  /* tp_as_buffer */
2624     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
2625     pattern_doc,                        /* tp_doc */
2626     0,                                  /* tp_traverse */
2627     0,                                  /* tp_clear */
2628     pattern_richcompare,                /* tp_richcompare */
2629     offsetof(PatternObject, weakreflist),       /* tp_weaklistoffset */
2630     0,                                  /* tp_iter */
2631     0,                                  /* tp_iternext */
2632     pattern_methods,                    /* tp_methods */
2633     pattern_members,                    /* tp_members */
2634     pattern_getset,                     /* tp_getset */
2635 };
2636 
2637 /* Match objects do not support length or assignment, but do support
2638    __getitem__. */
2639 static PyMappingMethods match_as_mapping = {
2640     NULL,
2641     (binaryfunc)match_getitem,
2642     NULL
2643 };
2644 
2645 static PyMethodDef match_methods[] = {
2646     {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2647     _SRE_SRE_MATCH_START_METHODDEF
2648     _SRE_SRE_MATCH_END_METHODDEF
2649     _SRE_SRE_MATCH_SPAN_METHODDEF
2650     _SRE_SRE_MATCH_GROUPS_METHODDEF
2651     _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2652     _SRE_SRE_MATCH_EXPAND_METHODDEF
2653     _SRE_SRE_MATCH___COPY___METHODDEF
2654     _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2655     {NULL, NULL}
2656 };
2657 
2658 static PyGetSetDef match_getset[] = {
2659     {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2660      "The integer index of the last matched capturing group."},
2661     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2662      "The name of the last matched capturing group."},
2663     {"regs",      (getter)match_regs_get,      (setter)NULL},
2664     {NULL}
2665 };
2666 
2667 #define MATCH_OFF(x) offsetof(MatchObject, x)
2668 static PyMemberDef match_members[] = {
2669     {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY,
2670      "The string passed to match() or search()."},
2671     {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY,
2672      "The regular expression object."},
2673     {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY,
2674      "The index into the string at which the RE engine started looking for a match."},
2675     {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY,
2676      "The index into the string beyond which the RE engine will not go."},
2677     {NULL}
2678 };
2679 
2680 /* FIXME: implement setattr("string", None) as a special case (to
2681    detach the associated string, if any */
2682 
2683 static PyTypeObject Match_Type = {
2684     PyVarObject_HEAD_INIT(NULL,0)
2685     "re.Match",
2686     sizeof(MatchObject), sizeof(Py_ssize_t),
2687     (destructor)match_dealloc,  /* tp_dealloc */
2688     0,                          /* tp_print */
2689     0,                          /* tp_getattr */
2690     0,                          /* tp_setattr */
2691     0,                          /* tp_reserved */
2692     (reprfunc)match_repr,       /* tp_repr */
2693     0,                          /* tp_as_number */
2694     0,                          /* tp_as_sequence */
2695     &match_as_mapping,          /* tp_as_mapping */
2696     0,                          /* tp_hash */
2697     0,                          /* tp_call */
2698     0,                          /* tp_str */
2699     0,                          /* tp_getattro */
2700     0,                          /* tp_setattro */
2701     0,                          /* tp_as_buffer */
2702     Py_TPFLAGS_DEFAULT,         /* tp_flags */
2703     match_doc,                  /* tp_doc */
2704     0,                          /* tp_traverse */
2705     0,                          /* tp_clear */
2706     0,                          /* tp_richcompare */
2707     0,                          /* tp_weaklistoffset */
2708     0,                          /* tp_iter */
2709     0,                          /* tp_iternext */
2710     match_methods,              /* tp_methods */
2711     match_members,              /* tp_members */
2712     match_getset,               /* tp_getset */
2713 };
2714 
2715 static PyMethodDef scanner_methods[] = {
2716     _SRE_SRE_SCANNER_MATCH_METHODDEF
2717     _SRE_SRE_SCANNER_SEARCH_METHODDEF
2718     {NULL, NULL}
2719 };
2720 
2721 #define SCAN_OFF(x) offsetof(ScannerObject, x)
2722 static PyMemberDef scanner_members[] = {
2723     {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2724     {NULL}  /* Sentinel */
2725 };
2726 
2727 static PyTypeObject Scanner_Type = {
2728     PyVarObject_HEAD_INIT(NULL, 0)
2729     "_" SRE_MODULE ".SRE_Scanner",
2730     sizeof(ScannerObject), 0,
2731     (destructor)scanner_dealloc,/* tp_dealloc */
2732     0,                          /* tp_print */
2733     0,                          /* tp_getattr */
2734     0,                          /* tp_setattr */
2735     0,                          /* tp_reserved */
2736     0,                          /* tp_repr */
2737     0,                          /* tp_as_number */
2738     0,                          /* tp_as_sequence */
2739     0,                          /* tp_as_mapping */
2740     0,                          /* tp_hash */
2741     0,                          /* tp_call */
2742     0,                          /* tp_str */
2743     0,                          /* tp_getattro */
2744     0,                          /* tp_setattro */
2745     0,                          /* tp_as_buffer */
2746     Py_TPFLAGS_DEFAULT,         /* tp_flags */
2747     0,                          /* tp_doc */
2748     0,                          /* tp_traverse */
2749     0,                          /* tp_clear */
2750     0,                          /* tp_richcompare */
2751     0,                          /* tp_weaklistoffset */
2752     0,                          /* tp_iter */
2753     0,                          /* tp_iternext */
2754     scanner_methods,            /* tp_methods */
2755     scanner_members,            /* tp_members */
2756     0,                          /* tp_getset */
2757 };
2758 
2759 static PyMethodDef _functions[] = {
2760     _SRE_COMPILE_METHODDEF
2761     _SRE_GETCODESIZE_METHODDEF
2762     _SRE_ASCII_ISCASED_METHODDEF
2763     _SRE_UNICODE_ISCASED_METHODDEF
2764     _SRE_ASCII_TOLOWER_METHODDEF
2765     _SRE_UNICODE_TOLOWER_METHODDEF
2766     {NULL, NULL}
2767 };
2768 
2769 static struct PyModuleDef sremodule = {
2770         PyModuleDef_HEAD_INIT,
2771         "_" SRE_MODULE,
2772         NULL,
2773         -1,
2774         _functions,
2775         NULL,
2776         NULL,
2777         NULL,
2778         NULL
2779 };
2780 
PyInit__sre(void)2781 PyMODINIT_FUNC PyInit__sre(void)
2782 {
2783     PyObject* m;
2784     PyObject* d;
2785     PyObject* x;
2786 
2787     /* Patch object types */
2788     if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2789         PyType_Ready(&Scanner_Type))
2790         return NULL;
2791 
2792     m = PyModule_Create(&sremodule);
2793     if (m == NULL)
2794         return NULL;
2795     d = PyModule_GetDict(m);
2796 
2797     x = PyLong_FromLong(SRE_MAGIC);
2798     if (x) {
2799         PyDict_SetItemString(d, "MAGIC", x);
2800         Py_DECREF(x);
2801     }
2802 
2803     x = PyLong_FromLong(sizeof(SRE_CODE));
2804     if (x) {
2805         PyDict_SetItemString(d, "CODESIZE", x);
2806         Py_DECREF(x);
2807     }
2808 
2809     x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2810     if (x) {
2811         PyDict_SetItemString(d, "MAXREPEAT", x);
2812         Py_DECREF(x);
2813     }
2814 
2815     x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2816     if (x) {
2817         PyDict_SetItemString(d, "MAXGROUPS", x);
2818         Py_DECREF(x);
2819     }
2820 
2821     x = PyUnicode_FromString(copyright);
2822     if (x) {
2823         PyDict_SetItemString(d, "copyright", x);
2824         Py_DECREF(x);
2825     }
2826     return m;
2827 }
2828 
2829 /* vim:ts=4:sw=4:et
2830 */
2831