1 /*
2  * Secret Labs' Regular Expression Engine
3  *
4  * regular expression matching engine
5  *
6  * partial history:
7  * 1999-10-24 fl   created (based on existing template matcher code)
8  * 2000-03-06 fl   first alpha, sort of
9  * 2000-08-01 fl   fixes for 1.6b1
10  * 2000-08-07 fl   use PyOS_CheckStack() if available
11  * 2000-09-20 fl   added expand method
12  * 2001-03-20 fl   lots of fixes for 2.1b2
13  * 2001-04-15 fl   export copyright as Python attribute, not global
14  * 2001-04-28 fl   added __copy__ methods (work in progress)
15  * 2001-05-14 fl   fixes for 1.5.2 compatibility
16  * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
17  * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
18  * 2001-10-20 fl   added split primitive; reenable unicode for 1.6/2.0/2.1
19  * 2001-10-21 fl   added sub/subn primitive
20  * 2001-10-24 fl   added finditer primitive (for 2.2 only)
21  * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
22  * 2002-11-09 fl   fixed empty sub/subn return type
23  * 2003-04-18 mvl  fully support 4-byte codes
24  * 2003-10-17 gn   implemented non recursive scheme
25  * 2013-02-04 mrab added fullmatch primitive
26  *
27  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
28  *
29  * This version of the SRE library can be redistributed under CNRI's
30  * Python 1.6 license.  For any other use, please contact Secret Labs
31  * AB (info@pythonware.com).
32  *
33  * Portions of this engine have been developed in cooperation with
34  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
35  * other compatibility work.
36  */
37 
38 static const char copyright[] =
39     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40 
41 #define PY_SSIZE_T_CLEAN
42 
43 #include "Python.h"
44 #include "structmember.h"         // PyMemberDef
45 
46 #include "sre.h"
47 
48 #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49 
50 #include <ctype.h>
51 
52 /* name of this module, minus the leading underscore */
53 #if !defined(SRE_MODULE)
54 #define SRE_MODULE "sre"
55 #endif
56 
57 #define SRE_PY_MODULE "re"
58 
59 /* defining this one enables tracing */
60 #undef VERBOSE
61 
62 /* -------------------------------------------------------------------- */
63 
64 #if defined(_MSC_VER)
65 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
66 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
67 /* fastest possible local call under MSVC */
68 #define LOCAL(type) static __inline type __fastcall
69 #else
70 #define LOCAL(type) static inline type
71 #endif
72 
73 /* error codes */
74 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
75 #define SRE_ERROR_STATE -2 /* illegal state */
76 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
77 #define SRE_ERROR_MEMORY -9 /* out of memory */
78 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
79 
80 #if defined(VERBOSE)
81 #define TRACE(v) printf v
82 #else
83 #define TRACE(v)
84 #endif
85 
86 /* -------------------------------------------------------------------- */
87 /* search engine state */
88 
89 #define SRE_IS_DIGIT(ch)\
90     ((ch) <= '9' && Py_ISDIGIT(ch))
91 #define SRE_IS_SPACE(ch)\
92     ((ch) <= ' ' && Py_ISSPACE(ch))
93 #define SRE_IS_LINEBREAK(ch)\
94     ((ch) == '\n')
95 #define SRE_IS_WORD(ch)\
96     ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
97 
sre_lower_ascii(unsigned int ch)98 static unsigned int sre_lower_ascii(unsigned int ch)
99 {
100     return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
101 }
102 
103 /* locale-specific character predicates */
104 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
105  * warnings when c's type supports only numbers < N+1 */
106 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
107 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
108 
sre_lower_locale(unsigned int ch)109 static unsigned int sre_lower_locale(unsigned int ch)
110 {
111     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
112 }
113 
sre_upper_locale(unsigned int ch)114 static unsigned int sre_upper_locale(unsigned int ch)
115 {
116     return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
117 }
118 
119 /* unicode-specific character predicates */
120 
121 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
122 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
123 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
124 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
125 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
126 
sre_lower_unicode(unsigned int ch)127 static unsigned int sre_lower_unicode(unsigned int ch)
128 {
129     return (unsigned int) Py_UNICODE_TOLOWER(ch);
130 }
131 
sre_upper_unicode(unsigned int ch)132 static unsigned int sre_upper_unicode(unsigned int ch)
133 {
134     return (unsigned int) Py_UNICODE_TOUPPER(ch);
135 }
136 
137 LOCAL(int)
sre_category(SRE_CODE category,unsigned int ch)138 sre_category(SRE_CODE category, unsigned int ch)
139 {
140     switch (category) {
141 
142     case SRE_CATEGORY_DIGIT:
143         return SRE_IS_DIGIT(ch);
144     case SRE_CATEGORY_NOT_DIGIT:
145         return !SRE_IS_DIGIT(ch);
146     case SRE_CATEGORY_SPACE:
147         return SRE_IS_SPACE(ch);
148     case SRE_CATEGORY_NOT_SPACE:
149         return !SRE_IS_SPACE(ch);
150     case SRE_CATEGORY_WORD:
151         return SRE_IS_WORD(ch);
152     case SRE_CATEGORY_NOT_WORD:
153         return !SRE_IS_WORD(ch);
154     case SRE_CATEGORY_LINEBREAK:
155         return SRE_IS_LINEBREAK(ch);
156     case SRE_CATEGORY_NOT_LINEBREAK:
157         return !SRE_IS_LINEBREAK(ch);
158 
159     case SRE_CATEGORY_LOC_WORD:
160         return SRE_LOC_IS_WORD(ch);
161     case SRE_CATEGORY_LOC_NOT_WORD:
162         return !SRE_LOC_IS_WORD(ch);
163 
164     case SRE_CATEGORY_UNI_DIGIT:
165         return SRE_UNI_IS_DIGIT(ch);
166     case SRE_CATEGORY_UNI_NOT_DIGIT:
167         return !SRE_UNI_IS_DIGIT(ch);
168     case SRE_CATEGORY_UNI_SPACE:
169         return SRE_UNI_IS_SPACE(ch);
170     case SRE_CATEGORY_UNI_NOT_SPACE:
171         return !SRE_UNI_IS_SPACE(ch);
172     case SRE_CATEGORY_UNI_WORD:
173         return SRE_UNI_IS_WORD(ch);
174     case SRE_CATEGORY_UNI_NOT_WORD:
175         return !SRE_UNI_IS_WORD(ch);
176     case SRE_CATEGORY_UNI_LINEBREAK:
177         return SRE_UNI_IS_LINEBREAK(ch);
178     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
179         return !SRE_UNI_IS_LINEBREAK(ch);
180     }
181     return 0;
182 }
183 
184 LOCAL(int)
char_loc_ignore(SRE_CODE pattern,SRE_CODE ch)185 char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
186 {
187     return ch == pattern
188         || (SRE_CODE) sre_lower_locale(ch) == pattern
189         || (SRE_CODE) sre_upper_locale(ch) == pattern;
190 }
191 
192 
193 /* helpers */
194 
195 static void
data_stack_dealloc(SRE_STATE * state)196 data_stack_dealloc(SRE_STATE* state)
197 {
198     if (state->data_stack) {
199         PyMem_FREE(state->data_stack);
200         state->data_stack = NULL;
201     }
202     state->data_stack_size = state->data_stack_base = 0;
203 }
204 
205 static int
data_stack_grow(SRE_STATE * state,Py_ssize_t size)206 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
207 {
208     Py_ssize_t minsize, cursize;
209     minsize = state->data_stack_base+size;
210     cursize = state->data_stack_size;
211     if (cursize < minsize) {
212         void* stack;
213         cursize = minsize+minsize/4+1024;
214         TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
215         stack = PyMem_REALLOC(state->data_stack, cursize);
216         if (!stack) {
217             data_stack_dealloc(state);
218             return SRE_ERROR_MEMORY;
219         }
220         state->data_stack = (char *)stack;
221         state->data_stack_size = cursize;
222     }
223     return 0;
224 }
225 
226 /* generate 8-bit version */
227 
228 #define SRE_CHAR Py_UCS1
229 #define SIZEOF_SRE_CHAR 1
230 #define SRE(F) sre_ucs1_##F
231 #include "sre_lib.h"
232 
233 /* generate 16-bit unicode version */
234 
235 #define SRE_CHAR Py_UCS2
236 #define SIZEOF_SRE_CHAR 2
237 #define SRE(F) sre_ucs2_##F
238 #include "sre_lib.h"
239 
240 /* generate 32-bit unicode version */
241 
242 #define SRE_CHAR Py_UCS4
243 #define SIZEOF_SRE_CHAR 4
244 #define SRE(F) sre_ucs4_##F
245 #include "sre_lib.h"
246 
247 /* -------------------------------------------------------------------- */
248 /* factories and destructors */
249 
250 /* see sre.h for object declarations */
251 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
252 static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
253 
254 
255 /*[clinic input]
256 module _sre
257 class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
258 class _sre.SRE_Match "MatchObject *" "&Match_Type"
259 class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
260 [clinic start generated code]*/
261 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
262 
263 static PyTypeObject Pattern_Type;
264 static PyTypeObject Match_Type;
265 static PyTypeObject Scanner_Type;
266 
267 /*[clinic input]
268 _sre.getcodesize -> int
269 [clinic start generated code]*/
270 
271 static int
_sre_getcodesize_impl(PyObject * module)272 _sre_getcodesize_impl(PyObject *module)
273 /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
274 {
275     return sizeof(SRE_CODE);
276 }
277 
278 /*[clinic input]
279 _sre.ascii_iscased -> bool
280 
281     character: int
282     /
283 
284 [clinic start generated code]*/
285 
286 static int
_sre_ascii_iscased_impl(PyObject * module,int character)287 _sre_ascii_iscased_impl(PyObject *module, int character)
288 /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
289 {
290     unsigned int ch = (unsigned int)character;
291     return ch < 128 && Py_ISALPHA(ch);
292 }
293 
294 /*[clinic input]
295 _sre.unicode_iscased -> bool
296 
297     character: int
298     /
299 
300 [clinic start generated code]*/
301 
302 static int
_sre_unicode_iscased_impl(PyObject * module,int character)303 _sre_unicode_iscased_impl(PyObject *module, int character)
304 /*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
305 {
306     unsigned int ch = (unsigned int)character;
307     return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
308 }
309 
310 /*[clinic input]
311 _sre.ascii_tolower -> int
312 
313     character: int
314     /
315 
316 [clinic start generated code]*/
317 
318 static int
_sre_ascii_tolower_impl(PyObject * module,int character)319 _sre_ascii_tolower_impl(PyObject *module, int character)
320 /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
321 {
322     return sre_lower_ascii(character);
323 }
324 
325 /*[clinic input]
326 _sre.unicode_tolower -> int
327 
328     character: int
329     /
330 
331 [clinic start generated code]*/
332 
333 static int
_sre_unicode_tolower_impl(PyObject * module,int character)334 _sre_unicode_tolower_impl(PyObject *module, int character)
335 /*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
336 {
337     return sre_lower_unicode(character);
338 }
339 
340 LOCAL(void)
state_reset(SRE_STATE * state)341 state_reset(SRE_STATE* state)
342 {
343     /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
344     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
345 
346     state->lastmark = -1;
347     state->lastindex = -1;
348 
349     state->repeat = NULL;
350 
351     data_stack_dealloc(state);
352 }
353 
354 static const void*
getstring(PyObject * string,Py_ssize_t * p_length,int * p_isbytes,int * p_charsize,Py_buffer * view)355 getstring(PyObject* string, Py_ssize_t* p_length,
356           int* p_isbytes, int* p_charsize,
357           Py_buffer *view)
358 {
359     /* given a python object, return a data pointer, a length (in
360        characters), and a character size.  return NULL if the object
361        is not a string (or not compatible) */
362 
363     /* Unicode objects do not support the buffer API. So, get the data
364        directly instead. */
365     if (PyUnicode_Check(string)) {
366         if (PyUnicode_READY(string) == -1)
367             return NULL;
368         *p_length = PyUnicode_GET_LENGTH(string);
369         *p_charsize = PyUnicode_KIND(string);
370         *p_isbytes = 0;
371         return PyUnicode_DATA(string);
372     }
373 
374     /* get pointer to byte string buffer */
375     if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
376         PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
377         return NULL;
378     }
379 
380     *p_length = view->len;
381     *p_charsize = 1;
382     *p_isbytes = 1;
383 
384     if (view->buf == NULL) {
385         PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
386         PyBuffer_Release(view);
387         view->buf = NULL;
388         return NULL;
389     }
390     return view->buf;
391 }
392 
393 LOCAL(PyObject*)
state_init(SRE_STATE * state,PatternObject * pattern,PyObject * string,Py_ssize_t start,Py_ssize_t end)394 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
395            Py_ssize_t start, Py_ssize_t end)
396 {
397     /* prepare state object */
398 
399     Py_ssize_t length;
400     int isbytes, charsize;
401     const void* ptr;
402 
403     memset(state, 0, sizeof(SRE_STATE));
404 
405     state->mark = PyMem_New(const void *, pattern->groups * 2);
406     if (!state->mark) {
407         PyErr_NoMemory();
408         goto err;
409     }
410     state->lastmark = -1;
411     state->lastindex = -1;
412 
413     state->buffer.buf = NULL;
414     ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
415     if (!ptr)
416         goto err;
417 
418     if (isbytes && pattern->isbytes == 0) {
419         PyErr_SetString(PyExc_TypeError,
420                         "cannot use a string pattern on a bytes-like object");
421         goto err;
422     }
423     if (!isbytes && pattern->isbytes > 0) {
424         PyErr_SetString(PyExc_TypeError,
425                         "cannot use a bytes pattern on a string-like object");
426         goto err;
427     }
428 
429     /* adjust boundaries */
430     if (start < 0)
431         start = 0;
432     else if (start > length)
433         start = length;
434 
435     if (end < 0)
436         end = 0;
437     else if (end > length)
438         end = length;
439 
440     state->isbytes = isbytes;
441     state->charsize = charsize;
442     state->match_all = 0;
443     state->must_advance = 0;
444 
445     state->beginning = ptr;
446 
447     state->start = (void*) ((char*) ptr + start * state->charsize);
448     state->end = (void*) ((char*) ptr + end * state->charsize);
449 
450     Py_INCREF(string);
451     state->string = string;
452     state->pos = start;
453     state->endpos = end;
454 
455     return string;
456   err:
457     PyMem_Del(state->mark);
458     state->mark = NULL;
459     if (state->buffer.buf)
460         PyBuffer_Release(&state->buffer);
461     return NULL;
462 }
463 
464 LOCAL(void)
state_fini(SRE_STATE * state)465 state_fini(SRE_STATE* state)
466 {
467     if (state->buffer.buf)
468         PyBuffer_Release(&state->buffer);
469     Py_XDECREF(state->string);
470     data_stack_dealloc(state);
471     PyMem_Del(state->mark);
472     state->mark = NULL;
473 }
474 
475 /* calculate offset from start of string */
476 #define STATE_OFFSET(state, member)\
477     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
478 
479 LOCAL(PyObject*)
getslice(int isbytes,const void * ptr,PyObject * string,Py_ssize_t start,Py_ssize_t end)480 getslice(int isbytes, const void *ptr,
481          PyObject* string, Py_ssize_t start, Py_ssize_t end)
482 {
483     if (isbytes) {
484         if (PyBytes_CheckExact(string) &&
485             start == 0 && end == PyBytes_GET_SIZE(string)) {
486             Py_INCREF(string);
487             return string;
488         }
489         return PyBytes_FromStringAndSize(
490                 (const char *)ptr + start, end - start);
491     }
492     else {
493         return PyUnicode_Substring(string, start, end);
494     }
495 }
496 
497 LOCAL(PyObject*)
state_getslice(SRE_STATE * state,Py_ssize_t index,PyObject * string,int empty)498 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
499 {
500     Py_ssize_t i, j;
501 
502     index = (index - 1) * 2;
503 
504     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
505         if (empty)
506             /* want empty string */
507             i = j = 0;
508         else {
509             Py_RETURN_NONE;
510         }
511     } else {
512         i = STATE_OFFSET(state, state->mark[index]);
513         j = STATE_OFFSET(state, state->mark[index+1]);
514     }
515 
516     return getslice(state->isbytes, state->beginning, string, i, j);
517 }
518 
519 static void
pattern_error(Py_ssize_t status)520 pattern_error(Py_ssize_t status)
521 {
522     switch (status) {
523     case SRE_ERROR_RECURSION_LIMIT:
524         /* This error code seems to be unused. */
525         PyErr_SetString(
526             PyExc_RecursionError,
527             "maximum recursion limit exceeded"
528             );
529         break;
530     case SRE_ERROR_MEMORY:
531         PyErr_NoMemory();
532         break;
533     case SRE_ERROR_INTERRUPTED:
534     /* An exception has already been raised, so let it fly */
535         break;
536     default:
537         /* other error codes indicate compiler/engine bugs */
538         PyErr_SetString(
539             PyExc_RuntimeError,
540             "internal error in regular expression engine"
541             );
542     }
543 }
544 
545 static void
pattern_dealloc(PatternObject * self)546 pattern_dealloc(PatternObject* self)
547 {
548     if (self->weakreflist != NULL)
549         PyObject_ClearWeakRefs((PyObject *) self);
550     Py_XDECREF(self->pattern);
551     Py_XDECREF(self->groupindex);
552     Py_XDECREF(self->indexgroup);
553     PyObject_DEL(self);
554 }
555 
556 LOCAL(Py_ssize_t)
sre_match(SRE_STATE * state,SRE_CODE * pattern)557 sre_match(SRE_STATE* state, SRE_CODE* pattern)
558 {
559     if (state->charsize == 1)
560         return sre_ucs1_match(state, pattern, 1);
561     if (state->charsize == 2)
562         return sre_ucs2_match(state, pattern, 1);
563     assert(state->charsize == 4);
564     return sre_ucs4_match(state, pattern, 1);
565 }
566 
567 LOCAL(Py_ssize_t)
sre_search(SRE_STATE * state,SRE_CODE * pattern)568 sre_search(SRE_STATE* state, SRE_CODE* pattern)
569 {
570     if (state->charsize == 1)
571         return sre_ucs1_search(state, pattern);
572     if (state->charsize == 2)
573         return sre_ucs2_search(state, pattern);
574     assert(state->charsize == 4);
575     return sre_ucs4_search(state, pattern);
576 }
577 
578 /*[clinic input]
579 _sre.SRE_Pattern.match
580 
581     string: object
582     pos: Py_ssize_t = 0
583     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
584 
585 Matches zero or more characters at the beginning of the string.
586 [clinic start generated code]*/
587 
588 static PyObject *
_sre_SRE_Pattern_match_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)589 _sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
590                             Py_ssize_t pos, Py_ssize_t endpos)
591 /*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
592 {
593     SRE_STATE state;
594     Py_ssize_t status;
595     PyObject *match;
596 
597     if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
598         return NULL;
599 
600     state.ptr = state.start;
601 
602     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
603 
604     status = sre_match(&state, PatternObject_GetCode(self));
605 
606     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
607     if (PyErr_Occurred()) {
608         state_fini(&state);
609         return NULL;
610     }
611 
612     match = pattern_new_match(self, &state, status);
613     state_fini(&state);
614     return match;
615 }
616 
617 /*[clinic input]
618 _sre.SRE_Pattern.fullmatch
619 
620     string: object
621     pos: Py_ssize_t = 0
622     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
623 
624 Matches against all of the string.
625 [clinic start generated code]*/
626 
627 static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)628 _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
629                                 Py_ssize_t pos, Py_ssize_t endpos)
630 /*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
631 {
632     SRE_STATE state;
633     Py_ssize_t status;
634     PyObject *match;
635 
636     if (!state_init(&state, self, string, pos, endpos))
637         return NULL;
638 
639     state.ptr = state.start;
640 
641     TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
642 
643     state.match_all = 1;
644     status = sre_match(&state, PatternObject_GetCode(self));
645 
646     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
647     if (PyErr_Occurred()) {
648         state_fini(&state);
649         return NULL;
650     }
651 
652     match = pattern_new_match(self, &state, status);
653     state_fini(&state);
654     return match;
655 }
656 
657 /*[clinic input]
658 _sre.SRE_Pattern.search
659 
660     string: object
661     pos: Py_ssize_t = 0
662     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
663 
664 Scan through string looking for a match, and return a corresponding match object instance.
665 
666 Return None if no position in the string matches.
667 [clinic start generated code]*/
668 
669 static PyObject *
_sre_SRE_Pattern_search_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)670 _sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
671                              Py_ssize_t pos, Py_ssize_t endpos)
672 /*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
673 {
674     SRE_STATE state;
675     Py_ssize_t status;
676     PyObject *match;
677 
678     if (!state_init(&state, self, string, pos, endpos))
679         return NULL;
680 
681     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
682 
683     status = sre_search(&state, PatternObject_GetCode(self));
684 
685     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
686 
687     if (PyErr_Occurred()) {
688         state_fini(&state);
689         return NULL;
690     }
691 
692     match = pattern_new_match(self, &state, status);
693     state_fini(&state);
694     return match;
695 }
696 
697 static PyObject*
call(const char * module,const char * function,PyObject * args)698 call(const char* module, const char* function, PyObject* args)
699 {
700     PyObject* name;
701     PyObject* mod;
702     PyObject* func;
703     PyObject* result;
704 
705     if (!args)
706         return NULL;
707     name = PyUnicode_FromString(module);
708     if (!name)
709         return NULL;
710     mod = PyImport_Import(name);
711     Py_DECREF(name);
712     if (!mod)
713         return NULL;
714     func = PyObject_GetAttrString(mod, function);
715     Py_DECREF(mod);
716     if (!func)
717         return NULL;
718     result = PyObject_CallObject(func, args);
719     Py_DECREF(func);
720     Py_DECREF(args);
721     return result;
722 }
723 
724 /*[clinic input]
725 _sre.SRE_Pattern.findall
726 
727     string: object
728     pos: Py_ssize_t = 0
729     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
730 
731 Return a list of all non-overlapping matches of pattern in string.
732 [clinic start generated code]*/
733 
734 static PyObject *
_sre_SRE_Pattern_findall_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)735 _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
736                               Py_ssize_t pos, Py_ssize_t endpos)
737 /*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
738 {
739     SRE_STATE state;
740     PyObject* list;
741     Py_ssize_t status;
742     Py_ssize_t i, b, e;
743 
744     if (!state_init(&state, self, string, pos, endpos))
745         return NULL;
746 
747     list = PyList_New(0);
748     if (!list) {
749         state_fini(&state);
750         return NULL;
751     }
752 
753     while (state.start <= state.end) {
754 
755         PyObject* item;
756 
757         state_reset(&state);
758 
759         state.ptr = state.start;
760 
761         status = sre_search(&state, PatternObject_GetCode(self));
762         if (PyErr_Occurred())
763             goto error;
764 
765         if (status <= 0) {
766             if (status == 0)
767                 break;
768             pattern_error(status);
769             goto error;
770         }
771 
772         /* don't bother to build a match object */
773         switch (self->groups) {
774         case 0:
775             b = STATE_OFFSET(&state, state.start);
776             e = STATE_OFFSET(&state, state.ptr);
777             item = getslice(state.isbytes, state.beginning,
778                             string, b, e);
779             if (!item)
780                 goto error;
781             break;
782         case 1:
783             item = state_getslice(&state, 1, string, 1);
784             if (!item)
785                 goto error;
786             break;
787         default:
788             item = PyTuple_New(self->groups);
789             if (!item)
790                 goto error;
791             for (i = 0; i < self->groups; i++) {
792                 PyObject* o = state_getslice(&state, i+1, string, 1);
793                 if (!o) {
794                     Py_DECREF(item);
795                     goto error;
796                 }
797                 PyTuple_SET_ITEM(item, i, o);
798             }
799             break;
800         }
801 
802         status = PyList_Append(list, item);
803         Py_DECREF(item);
804         if (status < 0)
805             goto error;
806 
807         state.must_advance = (state.ptr == state.start);
808         state.start = state.ptr;
809     }
810 
811     state_fini(&state);
812     return list;
813 
814 error:
815     Py_DECREF(list);
816     state_fini(&state);
817     return NULL;
818 
819 }
820 
821 /*[clinic input]
822 _sre.SRE_Pattern.finditer
823 
824     string: object
825     pos: Py_ssize_t = 0
826     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
827 
828 Return an iterator over all non-overlapping matches for the RE pattern in string.
829 
830 For each match, the iterator returns a match object.
831 [clinic start generated code]*/
832 
833 static PyObject *
_sre_SRE_Pattern_finditer_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)834 _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
835                                Py_ssize_t pos, Py_ssize_t endpos)
836 /*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
837 {
838     PyObject* scanner;
839     PyObject* search;
840     PyObject* iterator;
841 
842     scanner = pattern_scanner(self, string, pos, endpos);
843     if (!scanner)
844         return NULL;
845 
846     search = PyObject_GetAttrString(scanner, "search");
847     Py_DECREF(scanner);
848     if (!search)
849         return NULL;
850 
851     iterator = PyCallIter_New(search, Py_None);
852     Py_DECREF(search);
853 
854     return iterator;
855 }
856 
857 /*[clinic input]
858 _sre.SRE_Pattern.scanner
859 
860     string: object
861     pos: Py_ssize_t = 0
862     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
863 
864 [clinic start generated code]*/
865 
866 static PyObject *
_sre_SRE_Pattern_scanner_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)867 _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
868                               Py_ssize_t pos, Py_ssize_t endpos)
869 /*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
870 {
871     return pattern_scanner(self, string, pos, endpos);
872 }
873 
874 /*[clinic input]
875 _sre.SRE_Pattern.split
876 
877     string: object
878     maxsplit: Py_ssize_t = 0
879 
880 Split string by the occurrences of pattern.
881 [clinic start generated code]*/
882 
883 static PyObject *
_sre_SRE_Pattern_split_impl(PatternObject * self,PyObject * string,Py_ssize_t maxsplit)884 _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
885                             Py_ssize_t maxsplit)
886 /*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
887 {
888     SRE_STATE state;
889     PyObject* list;
890     PyObject* item;
891     Py_ssize_t status;
892     Py_ssize_t n;
893     Py_ssize_t i;
894     const void* last;
895 
896     assert(self->codesize != 0);
897 
898     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
899         return NULL;
900 
901     list = PyList_New(0);
902     if (!list) {
903         state_fini(&state);
904         return NULL;
905     }
906 
907     n = 0;
908     last = state.start;
909 
910     while (!maxsplit || n < maxsplit) {
911 
912         state_reset(&state);
913 
914         state.ptr = state.start;
915 
916         status = sre_search(&state, PatternObject_GetCode(self));
917         if (PyErr_Occurred())
918             goto error;
919 
920         if (status <= 0) {
921             if (status == 0)
922                 break;
923             pattern_error(status);
924             goto error;
925         }
926 
927         /* get segment before this match */
928         item = getslice(state.isbytes, state.beginning,
929             string, STATE_OFFSET(&state, last),
930             STATE_OFFSET(&state, state.start)
931             );
932         if (!item)
933             goto error;
934         status = PyList_Append(list, item);
935         Py_DECREF(item);
936         if (status < 0)
937             goto error;
938 
939         /* add groups (if any) */
940         for (i = 0; i < self->groups; i++) {
941             item = state_getslice(&state, i+1, string, 0);
942             if (!item)
943                 goto error;
944             status = PyList_Append(list, item);
945             Py_DECREF(item);
946             if (status < 0)
947                 goto error;
948         }
949 
950         n = n + 1;
951         state.must_advance = (state.ptr == state.start);
952         last = state.start = state.ptr;
953 
954     }
955 
956     /* get segment following last match (even if empty) */
957     item = getslice(state.isbytes, state.beginning,
958         string, STATE_OFFSET(&state, last), state.endpos
959         );
960     if (!item)
961         goto error;
962     status = PyList_Append(list, item);
963     Py_DECREF(item);
964     if (status < 0)
965         goto error;
966 
967     state_fini(&state);
968     return list;
969 
970 error:
971     Py_DECREF(list);
972     state_fini(&state);
973     return NULL;
974 
975 }
976 
977 static PyObject*
pattern_subx(PatternObject * self,PyObject * ptemplate,PyObject * string,Py_ssize_t count,Py_ssize_t subn)978 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
979              Py_ssize_t count, Py_ssize_t subn)
980 {
981     SRE_STATE state;
982     PyObject* list;
983     PyObject* joiner;
984     PyObject* item;
985     PyObject* filter;
986     PyObject* match;
987     const void* ptr;
988     Py_ssize_t status;
989     Py_ssize_t n;
990     Py_ssize_t i, b, e;
991     int isbytes, charsize;
992     int filter_is_callable;
993     Py_buffer view;
994 
995     if (PyCallable_Check(ptemplate)) {
996         /* sub/subn takes either a function or a template */
997         filter = ptemplate;
998         Py_INCREF(filter);
999         filter_is_callable = 1;
1000     } else {
1001         /* if not callable, check if it's a literal string */
1002         int literal;
1003         view.buf = NULL;
1004         ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1005         if (ptr) {
1006             if (charsize == 1)
1007                 literal = memchr(ptr, '\\', n) == NULL;
1008             else
1009                 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1010         } else {
1011             PyErr_Clear();
1012             literal = 0;
1013         }
1014         if (view.buf)
1015             PyBuffer_Release(&view);
1016         if (literal) {
1017             filter = ptemplate;
1018             Py_INCREF(filter);
1019             filter_is_callable = 0;
1020         } else {
1021             /* not a literal; hand it over to the template compiler */
1022             filter = call(
1023                 SRE_PY_MODULE, "_subx",
1024                 PyTuple_Pack(2, self, ptemplate)
1025                 );
1026             if (!filter)
1027                 return NULL;
1028             filter_is_callable = PyCallable_Check(filter);
1029         }
1030     }
1031 
1032     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1033         Py_DECREF(filter);
1034         return NULL;
1035     }
1036 
1037     list = PyList_New(0);
1038     if (!list) {
1039         Py_DECREF(filter);
1040         state_fini(&state);
1041         return NULL;
1042     }
1043 
1044     n = i = 0;
1045 
1046     while (!count || n < count) {
1047 
1048         state_reset(&state);
1049 
1050         state.ptr = state.start;
1051 
1052         status = sre_search(&state, PatternObject_GetCode(self));
1053         if (PyErr_Occurred())
1054             goto error;
1055 
1056         if (status <= 0) {
1057             if (status == 0)
1058                 break;
1059             pattern_error(status);
1060             goto error;
1061         }
1062 
1063         b = STATE_OFFSET(&state, state.start);
1064         e = STATE_OFFSET(&state, state.ptr);
1065 
1066         if (i < b) {
1067             /* get segment before this match */
1068             item = getslice(state.isbytes, state.beginning,
1069                 string, i, b);
1070             if (!item)
1071                 goto error;
1072             status = PyList_Append(list, item);
1073             Py_DECREF(item);
1074             if (status < 0)
1075                 goto error;
1076 
1077         }
1078 
1079         if (filter_is_callable) {
1080             /* pass match object through filter */
1081             match = pattern_new_match(self, &state, 1);
1082             if (!match)
1083                 goto error;
1084             item = PyObject_CallOneArg(filter, match);
1085             Py_DECREF(match);
1086             if (!item)
1087                 goto error;
1088         } else {
1089             /* filter is literal string */
1090             item = filter;
1091             Py_INCREF(item);
1092         }
1093 
1094         /* add to list */
1095         if (item != Py_None) {
1096             status = PyList_Append(list, item);
1097             Py_DECREF(item);
1098             if (status < 0)
1099                 goto error;
1100         }
1101 
1102         i = e;
1103         n = n + 1;
1104         state.must_advance = (state.ptr == state.start);
1105         state.start = state.ptr;
1106     }
1107 
1108     /* get segment following last match */
1109     if (i < state.endpos) {
1110         item = getslice(state.isbytes, state.beginning,
1111                         string, i, state.endpos);
1112         if (!item)
1113             goto error;
1114         status = PyList_Append(list, item);
1115         Py_DECREF(item);
1116         if (status < 0)
1117             goto error;
1118     }
1119 
1120     state_fini(&state);
1121 
1122     Py_DECREF(filter);
1123 
1124     /* convert list to single string (also removes list) */
1125     joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1126     if (!joiner) {
1127         Py_DECREF(list);
1128         return NULL;
1129     }
1130     if (PyList_GET_SIZE(list) == 0) {
1131         Py_DECREF(list);
1132         item = joiner;
1133     }
1134     else {
1135         if (state.isbytes)
1136             item = _PyBytes_Join(joiner, list);
1137         else
1138             item = PyUnicode_Join(joiner, list);
1139         Py_DECREF(joiner);
1140         Py_DECREF(list);
1141         if (!item)
1142             return NULL;
1143     }
1144 
1145     if (subn)
1146         return Py_BuildValue("Nn", item, n);
1147 
1148     return item;
1149 
1150 error:
1151     Py_DECREF(list);
1152     state_fini(&state);
1153     Py_DECREF(filter);
1154     return NULL;
1155 
1156 }
1157 
1158 /*[clinic input]
1159 _sre.SRE_Pattern.sub
1160 
1161     repl: object
1162     string: object
1163     count: Py_ssize_t = 0
1164 
1165 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1166 [clinic start generated code]*/
1167 
1168 static PyObject *
_sre_SRE_Pattern_sub_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1169 _sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1170                           PyObject *string, Py_ssize_t count)
1171 /*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1172 {
1173     return pattern_subx(self, repl, string, count, 0);
1174 }
1175 
1176 /*[clinic input]
1177 _sre.SRE_Pattern.subn
1178 
1179     repl: object
1180     string: object
1181     count: Py_ssize_t = 0
1182 
1183 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1184 [clinic start generated code]*/
1185 
1186 static PyObject *
_sre_SRE_Pattern_subn_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1187 _sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1188                            PyObject *string, Py_ssize_t count)
1189 /*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1190 {
1191     return pattern_subx(self, repl, string, count, 1);
1192 }
1193 
1194 /*[clinic input]
1195 _sre.SRE_Pattern.__copy__
1196 
1197 [clinic start generated code]*/
1198 
1199 static PyObject *
_sre_SRE_Pattern___copy___impl(PatternObject * self)1200 _sre_SRE_Pattern___copy___impl(PatternObject *self)
1201 /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1202 {
1203     Py_INCREF(self);
1204     return (PyObject *)self;
1205 }
1206 
1207 /*[clinic input]
1208 _sre.SRE_Pattern.__deepcopy__
1209 
1210     memo: object
1211     /
1212 
1213 [clinic start generated code]*/
1214 
1215 static PyObject *
_sre_SRE_Pattern___deepcopy__(PatternObject * self,PyObject * memo)1216 _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1217 /*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1218 {
1219     Py_INCREF(self);
1220     return (PyObject *)self;
1221 }
1222 
1223 static PyObject *
pattern_repr(PatternObject * obj)1224 pattern_repr(PatternObject *obj)
1225 {
1226     static const struct {
1227         const char *name;
1228         int value;
1229     } flag_names[] = {
1230         {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1231         {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1232         {"re.LOCALE", SRE_FLAG_LOCALE},
1233         {"re.MULTILINE", SRE_FLAG_MULTILINE},
1234         {"re.DOTALL", SRE_FLAG_DOTALL},
1235         {"re.UNICODE", SRE_FLAG_UNICODE},
1236         {"re.VERBOSE", SRE_FLAG_VERBOSE},
1237         {"re.DEBUG", SRE_FLAG_DEBUG},
1238         {"re.ASCII", SRE_FLAG_ASCII},
1239     };
1240     PyObject *result = NULL;
1241     PyObject *flag_items;
1242     size_t i;
1243     int flags = obj->flags;
1244 
1245     /* Omit re.UNICODE for valid string patterns. */
1246     if (obj->isbytes == 0 &&
1247         (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1248          SRE_FLAG_UNICODE)
1249         flags &= ~SRE_FLAG_UNICODE;
1250 
1251     flag_items = PyList_New(0);
1252     if (!flag_items)
1253         return NULL;
1254 
1255     for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1256         if (flags & flag_names[i].value) {
1257             PyObject *item = PyUnicode_FromString(flag_names[i].name);
1258             if (!item)
1259                 goto done;
1260 
1261             if (PyList_Append(flag_items, item) < 0) {
1262                 Py_DECREF(item);
1263                 goto done;
1264             }
1265             Py_DECREF(item);
1266             flags &= ~flag_names[i].value;
1267         }
1268     }
1269     if (flags) {
1270         PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1271         if (!item)
1272             goto done;
1273 
1274         if (PyList_Append(flag_items, item) < 0) {
1275             Py_DECREF(item);
1276             goto done;
1277         }
1278         Py_DECREF(item);
1279     }
1280 
1281     if (PyList_Size(flag_items) > 0) {
1282         PyObject *flags_result;
1283         PyObject *sep = PyUnicode_FromString("|");
1284         if (!sep)
1285             goto done;
1286         flags_result = PyUnicode_Join(sep, flag_items);
1287         Py_DECREF(sep);
1288         if (!flags_result)
1289             goto done;
1290         result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1291                                       obj->pattern, flags_result);
1292         Py_DECREF(flags_result);
1293     }
1294     else {
1295         result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1296     }
1297 
1298 done:
1299     Py_DECREF(flag_items);
1300     return result;
1301 }
1302 
1303 PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1304 
1305 /* PatternObject's 'groupindex' method. */
1306 static PyObject *
pattern_groupindex(PatternObject * self,void * Py_UNUSED (ignored))1307 pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1308 {
1309     if (self->groupindex == NULL)
1310         return PyDict_New();
1311     return PyDictProxy_New(self->groupindex);
1312 }
1313 
1314 static int _validate(PatternObject *self); /* Forward */
1315 
1316 /*[clinic input]
1317 _sre.compile
1318 
1319     pattern: object
1320     flags: int
1321     code: object(subclass_of='&PyList_Type')
1322     groups: Py_ssize_t
1323     groupindex: object(subclass_of='&PyDict_Type')
1324     indexgroup: object(subclass_of='&PyTuple_Type')
1325 
1326 [clinic start generated code]*/
1327 
1328 static PyObject *
_sre_compile_impl(PyObject * module,PyObject * pattern,int flags,PyObject * code,Py_ssize_t groups,PyObject * groupindex,PyObject * indexgroup)1329 _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1330                   PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1331                   PyObject *indexgroup)
1332 /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1333 {
1334     /* "compile" pattern descriptor to pattern object */
1335 
1336     PatternObject* self;
1337     Py_ssize_t i, n;
1338 
1339     n = PyList_GET_SIZE(code);
1340     /* coverity[ampersand_in_size] */
1341     self = PyObject_NewVar(PatternObject, &Pattern_Type, n);
1342     if (!self)
1343         return NULL;
1344     self->weakreflist = NULL;
1345     self->pattern = NULL;
1346     self->groupindex = NULL;
1347     self->indexgroup = NULL;
1348 
1349     self->codesize = n;
1350 
1351     for (i = 0; i < n; i++) {
1352         PyObject *o = PyList_GET_ITEM(code, i);
1353         unsigned long value = PyLong_AsUnsignedLong(o);
1354         self->code[i] = (SRE_CODE) value;
1355         if ((unsigned long) self->code[i] != value) {
1356             PyErr_SetString(PyExc_OverflowError,
1357                             "regular expression code size limit exceeded");
1358             break;
1359         }
1360     }
1361 
1362     if (PyErr_Occurred()) {
1363         Py_DECREF(self);
1364         return NULL;
1365     }
1366 
1367     if (pattern == Py_None) {
1368         self->isbytes = -1;
1369     }
1370     else {
1371         Py_ssize_t p_length;
1372         int charsize;
1373         Py_buffer view;
1374         view.buf = NULL;
1375         if (!getstring(pattern, &p_length, &self->isbytes,
1376                        &charsize, &view)) {
1377             Py_DECREF(self);
1378             return NULL;
1379         }
1380         if (view.buf)
1381             PyBuffer_Release(&view);
1382     }
1383 
1384     Py_INCREF(pattern);
1385     self->pattern = pattern;
1386 
1387     self->flags = flags;
1388 
1389     self->groups = groups;
1390 
1391     if (PyDict_GET_SIZE(groupindex) > 0) {
1392         Py_INCREF(groupindex);
1393         self->groupindex = groupindex;
1394         if (PyTuple_GET_SIZE(indexgroup) > 0) {
1395             Py_INCREF(indexgroup);
1396             self->indexgroup = indexgroup;
1397         }
1398     }
1399 
1400     if (!_validate(self)) {
1401         Py_DECREF(self);
1402         return NULL;
1403     }
1404 
1405     return (PyObject*) self;
1406 }
1407 
1408 /* -------------------------------------------------------------------- */
1409 /* Code validation */
1410 
1411 /* To learn more about this code, have a look at the _compile() function in
1412    Lib/sre_compile.py.  The validation functions below checks the code array
1413    for conformance with the code patterns generated there.
1414 
1415    The nice thing about the generated code is that it is position-independent:
1416    all jumps are relative jumps forward.  Also, jumps don't cross each other:
1417    the target of a later jump is always earlier than the target of an earlier
1418    jump.  IOW, this is okay:
1419 
1420    J---------J-------T--------T
1421     \         \_____/        /
1422      \______________________/
1423 
1424    but this is not:
1425 
1426    J---------J-------T--------T
1427     \_________\_____/        /
1428                \____________/
1429 
1430    It also helps that SRE_CODE is always an unsigned type.
1431 */
1432 
1433 /* Defining this one enables tracing of the validator */
1434 #undef VVERBOSE
1435 
1436 /* Trace macro for the validator */
1437 #if defined(VVERBOSE)
1438 #define VTRACE(v) printf v
1439 #else
1440 #define VTRACE(v) do {} while(0)  /* do nothing */
1441 #endif
1442 
1443 /* Report failure */
1444 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1445 
1446 /* Extract opcode, argument, or skip count from code array */
1447 #define GET_OP                                          \
1448     do {                                                \
1449         VTRACE(("%p: ", code));                         \
1450         if (code >= end) FAIL;                          \
1451         op = *code++;                                   \
1452         VTRACE(("%lu (op)\n", (unsigned long)op));      \
1453     } while (0)
1454 #define GET_ARG                                         \
1455     do {                                                \
1456         VTRACE(("%p= ", code));                         \
1457         if (code >= end) FAIL;                          \
1458         arg = *code++;                                  \
1459         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
1460     } while (0)
1461 #define GET_SKIP_ADJ(adj)                               \
1462     do {                                                \
1463         VTRACE(("%p= ", code));                         \
1464         if (code >= end) FAIL;                          \
1465         skip = *code;                                   \
1466         VTRACE(("%lu (skip to %p)\n",                   \
1467                (unsigned long)skip, code+skip));        \
1468         if (skip-adj > (uintptr_t)(end - code))      \
1469             FAIL;                                       \
1470         code++;                                         \
1471     } while (0)
1472 #define GET_SKIP GET_SKIP_ADJ(0)
1473 
1474 static int
_validate_charset(SRE_CODE * code,SRE_CODE * end)1475 _validate_charset(SRE_CODE *code, SRE_CODE *end)
1476 {
1477     /* Some variables are manipulated by the macros above */
1478     SRE_CODE op;
1479     SRE_CODE arg;
1480     SRE_CODE offset;
1481     int i;
1482 
1483     while (code < end) {
1484         GET_OP;
1485         switch (op) {
1486 
1487         case SRE_OP_NEGATE:
1488             break;
1489 
1490         case SRE_OP_LITERAL:
1491             GET_ARG;
1492             break;
1493 
1494         case SRE_OP_RANGE:
1495         case SRE_OP_RANGE_UNI_IGNORE:
1496             GET_ARG;
1497             GET_ARG;
1498             break;
1499 
1500         case SRE_OP_CHARSET:
1501             offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1502             if (offset > (uintptr_t)(end - code))
1503                 FAIL;
1504             code += offset;
1505             break;
1506 
1507         case SRE_OP_BIGCHARSET:
1508             GET_ARG; /* Number of blocks */
1509             offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1510             if (offset > (uintptr_t)(end - code))
1511                 FAIL;
1512             /* Make sure that each byte points to a valid block */
1513             for (i = 0; i < 256; i++) {
1514                 if (((unsigned char *)code)[i] >= arg)
1515                     FAIL;
1516             }
1517             code += offset;
1518             offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1519             if (offset > (uintptr_t)(end - code))
1520                 FAIL;
1521             code += offset;
1522             break;
1523 
1524         case SRE_OP_CATEGORY:
1525             GET_ARG;
1526             switch (arg) {
1527             case SRE_CATEGORY_DIGIT:
1528             case SRE_CATEGORY_NOT_DIGIT:
1529             case SRE_CATEGORY_SPACE:
1530             case SRE_CATEGORY_NOT_SPACE:
1531             case SRE_CATEGORY_WORD:
1532             case SRE_CATEGORY_NOT_WORD:
1533             case SRE_CATEGORY_LINEBREAK:
1534             case SRE_CATEGORY_NOT_LINEBREAK:
1535             case SRE_CATEGORY_LOC_WORD:
1536             case SRE_CATEGORY_LOC_NOT_WORD:
1537             case SRE_CATEGORY_UNI_DIGIT:
1538             case SRE_CATEGORY_UNI_NOT_DIGIT:
1539             case SRE_CATEGORY_UNI_SPACE:
1540             case SRE_CATEGORY_UNI_NOT_SPACE:
1541             case SRE_CATEGORY_UNI_WORD:
1542             case SRE_CATEGORY_UNI_NOT_WORD:
1543             case SRE_CATEGORY_UNI_LINEBREAK:
1544             case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1545                 break;
1546             default:
1547                 FAIL;
1548             }
1549             break;
1550 
1551         default:
1552             FAIL;
1553 
1554         }
1555     }
1556 
1557     return 1;
1558 }
1559 
1560 static int
_validate_inner(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1561 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1562 {
1563     /* Some variables are manipulated by the macros above */
1564     SRE_CODE op;
1565     SRE_CODE arg;
1566     SRE_CODE skip;
1567 
1568     VTRACE(("code=%p, end=%p\n", code, end));
1569 
1570     if (code > end)
1571         FAIL;
1572 
1573     while (code < end) {
1574         GET_OP;
1575         switch (op) {
1576 
1577         case SRE_OP_MARK:
1578             /* We don't check whether marks are properly nested; the
1579                sre_match() code is robust even if they don't, and the worst
1580                you can get is nonsensical match results. */
1581             GET_ARG;
1582             if (arg > 2 * (size_t)groups + 1) {
1583                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1584                 FAIL;
1585             }
1586             break;
1587 
1588         case SRE_OP_LITERAL:
1589         case SRE_OP_NOT_LITERAL:
1590         case SRE_OP_LITERAL_IGNORE:
1591         case SRE_OP_NOT_LITERAL_IGNORE:
1592         case SRE_OP_LITERAL_UNI_IGNORE:
1593         case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1594         case SRE_OP_LITERAL_LOC_IGNORE:
1595         case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1596             GET_ARG;
1597             /* The arg is just a character, nothing to check */
1598             break;
1599 
1600         case SRE_OP_SUCCESS:
1601         case SRE_OP_FAILURE:
1602             /* Nothing to check; these normally end the matching process */
1603             break;
1604 
1605         case SRE_OP_AT:
1606             GET_ARG;
1607             switch (arg) {
1608             case SRE_AT_BEGINNING:
1609             case SRE_AT_BEGINNING_STRING:
1610             case SRE_AT_BEGINNING_LINE:
1611             case SRE_AT_END:
1612             case SRE_AT_END_LINE:
1613             case SRE_AT_END_STRING:
1614             case SRE_AT_BOUNDARY:
1615             case SRE_AT_NON_BOUNDARY:
1616             case SRE_AT_LOC_BOUNDARY:
1617             case SRE_AT_LOC_NON_BOUNDARY:
1618             case SRE_AT_UNI_BOUNDARY:
1619             case SRE_AT_UNI_NON_BOUNDARY:
1620                 break;
1621             default:
1622                 FAIL;
1623             }
1624             break;
1625 
1626         case SRE_OP_ANY:
1627         case SRE_OP_ANY_ALL:
1628             /* These have no operands */
1629             break;
1630 
1631         case SRE_OP_IN:
1632         case SRE_OP_IN_IGNORE:
1633         case SRE_OP_IN_UNI_IGNORE:
1634         case SRE_OP_IN_LOC_IGNORE:
1635             GET_SKIP;
1636             /* Stop 1 before the end; we check the FAILURE below */
1637             if (!_validate_charset(code, code+skip-2))
1638                 FAIL;
1639             if (code[skip-2] != SRE_OP_FAILURE)
1640                 FAIL;
1641             code += skip-1;
1642             break;
1643 
1644         case SRE_OP_INFO:
1645             {
1646                 /* A minimal info field is
1647                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1648                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1649                    more follows. */
1650                 SRE_CODE flags, i;
1651                 SRE_CODE *newcode;
1652                 GET_SKIP;
1653                 newcode = code+skip-1;
1654                 GET_ARG; flags = arg;
1655                 GET_ARG;
1656                 GET_ARG;
1657                 /* Check that only valid flags are present */
1658                 if ((flags & ~(SRE_INFO_PREFIX |
1659                                SRE_INFO_LITERAL |
1660                                SRE_INFO_CHARSET)) != 0)
1661                     FAIL;
1662                 /* PREFIX and CHARSET are mutually exclusive */
1663                 if ((flags & SRE_INFO_PREFIX) &&
1664                     (flags & SRE_INFO_CHARSET))
1665                     FAIL;
1666                 /* LITERAL implies PREFIX */
1667                 if ((flags & SRE_INFO_LITERAL) &&
1668                     !(flags & SRE_INFO_PREFIX))
1669                     FAIL;
1670                 /* Validate the prefix */
1671                 if (flags & SRE_INFO_PREFIX) {
1672                     SRE_CODE prefix_len;
1673                     GET_ARG; prefix_len = arg;
1674                     GET_ARG;
1675                     /* Here comes the prefix string */
1676                     if (prefix_len > (uintptr_t)(newcode - code))
1677                         FAIL;
1678                     code += prefix_len;
1679                     /* And here comes the overlap table */
1680                     if (prefix_len > (uintptr_t)(newcode - code))
1681                         FAIL;
1682                     /* Each overlap value should be < prefix_len */
1683                     for (i = 0; i < prefix_len; i++) {
1684                         if (code[i] >= prefix_len)
1685                             FAIL;
1686                     }
1687                     code += prefix_len;
1688                 }
1689                 /* Validate the charset */
1690                 if (flags & SRE_INFO_CHARSET) {
1691                     if (!_validate_charset(code, newcode-1))
1692                         FAIL;
1693                     if (newcode[-1] != SRE_OP_FAILURE)
1694                         FAIL;
1695                     code = newcode;
1696                 }
1697                 else if (code != newcode) {
1698                   VTRACE(("code=%p, newcode=%p\n", code, newcode));
1699                     FAIL;
1700                 }
1701             }
1702             break;
1703 
1704         case SRE_OP_BRANCH:
1705             {
1706                 SRE_CODE *target = NULL;
1707                 for (;;) {
1708                     GET_SKIP;
1709                     if (skip == 0)
1710                         break;
1711                     /* Stop 2 before the end; we check the JUMP below */
1712                     if (!_validate_inner(code, code+skip-3, groups))
1713                         FAIL;
1714                     code += skip-3;
1715                     /* Check that it ends with a JUMP, and that each JUMP
1716                        has the same target */
1717                     GET_OP;
1718                     if (op != SRE_OP_JUMP)
1719                         FAIL;
1720                     GET_SKIP;
1721                     if (target == NULL)
1722                         target = code+skip-1;
1723                     else if (code+skip-1 != target)
1724                         FAIL;
1725                 }
1726             }
1727             break;
1728 
1729         case SRE_OP_REPEAT_ONE:
1730         case SRE_OP_MIN_REPEAT_ONE:
1731             {
1732                 SRE_CODE min, max;
1733                 GET_SKIP;
1734                 GET_ARG; min = arg;
1735                 GET_ARG; max = arg;
1736                 if (min > max)
1737                     FAIL;
1738                 if (max > SRE_MAXREPEAT)
1739                     FAIL;
1740                 if (!_validate_inner(code, code+skip-4, groups))
1741                     FAIL;
1742                 code += skip-4;
1743                 GET_OP;
1744                 if (op != SRE_OP_SUCCESS)
1745                     FAIL;
1746             }
1747             break;
1748 
1749         case SRE_OP_REPEAT:
1750             {
1751                 SRE_CODE min, max;
1752                 GET_SKIP;
1753                 GET_ARG; min = arg;
1754                 GET_ARG; max = arg;
1755                 if (min > max)
1756                     FAIL;
1757                 if (max > SRE_MAXREPEAT)
1758                     FAIL;
1759                 if (!_validate_inner(code, code+skip-3, groups))
1760                     FAIL;
1761                 code += skip-3;
1762                 GET_OP;
1763                 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1764                     FAIL;
1765             }
1766             break;
1767 
1768         case SRE_OP_GROUPREF:
1769         case SRE_OP_GROUPREF_IGNORE:
1770         case SRE_OP_GROUPREF_UNI_IGNORE:
1771         case SRE_OP_GROUPREF_LOC_IGNORE:
1772             GET_ARG;
1773             if (arg >= (size_t)groups)
1774                 FAIL;
1775             break;
1776 
1777         case SRE_OP_GROUPREF_EXISTS:
1778             /* The regex syntax for this is: '(?(group)then|else)', where
1779                'group' is either an integer group number or a group name,
1780                'then' and 'else' are sub-regexes, and 'else' is optional. */
1781             GET_ARG;
1782             if (arg >= (size_t)groups)
1783                 FAIL;
1784             GET_SKIP_ADJ(1);
1785             code--; /* The skip is relative to the first arg! */
1786             /* There are two possibilities here: if there is both a 'then'
1787                part and an 'else' part, the generated code looks like:
1788 
1789                GROUPREF_EXISTS
1790                <group>
1791                <skipyes>
1792                ...then part...
1793                JUMP
1794                <skipno>
1795                (<skipyes> jumps here)
1796                ...else part...
1797                (<skipno> jumps here)
1798 
1799                If there is only a 'then' part, it looks like:
1800 
1801                GROUPREF_EXISTS
1802                <group>
1803                <skip>
1804                ...then part...
1805                (<skip> jumps here)
1806 
1807                There is no direct way to decide which it is, and we don't want
1808                to allow arbitrary jumps anywhere in the code; so we just look
1809                for a JUMP opcode preceding our skip target.
1810             */
1811             if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
1812                 code[skip-3] == SRE_OP_JUMP)
1813             {
1814                 VTRACE(("both then and else parts present\n"));
1815                 if (!_validate_inner(code+1, code+skip-3, groups))
1816                     FAIL;
1817                 code += skip-2; /* Position after JUMP, at <skipno> */
1818                 GET_SKIP;
1819                 if (!_validate_inner(code, code+skip-1, groups))
1820                     FAIL;
1821                 code += skip-1;
1822             }
1823             else {
1824                 VTRACE(("only a then part present\n"));
1825                 if (!_validate_inner(code+1, code+skip-1, groups))
1826                     FAIL;
1827                 code += skip-1;
1828             }
1829             break;
1830 
1831         case SRE_OP_ASSERT:
1832         case SRE_OP_ASSERT_NOT:
1833             GET_SKIP;
1834             GET_ARG; /* 0 for lookahead, width for lookbehind */
1835             code--; /* Back up over arg to simplify math below */
1836             if (arg & 0x80000000)
1837                 FAIL; /* Width too large */
1838             /* Stop 1 before the end; we check the SUCCESS below */
1839             if (!_validate_inner(code+1, code+skip-2, groups))
1840                 FAIL;
1841             code += skip-2;
1842             GET_OP;
1843             if (op != SRE_OP_SUCCESS)
1844                 FAIL;
1845             break;
1846 
1847         default:
1848             FAIL;
1849 
1850         }
1851     }
1852 
1853     VTRACE(("okay\n"));
1854     return 1;
1855 }
1856 
1857 static int
_validate_outer(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1858 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1859 {
1860     if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1861         code >= end || end[-1] != SRE_OP_SUCCESS)
1862         FAIL;
1863     return _validate_inner(code, end-1, groups);
1864 }
1865 
1866 static int
_validate(PatternObject * self)1867 _validate(PatternObject *self)
1868 {
1869     if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1870     {
1871         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1872         return 0;
1873     }
1874     else
1875         VTRACE(("Success!\n"));
1876     return 1;
1877 }
1878 
1879 /* -------------------------------------------------------------------- */
1880 /* match methods */
1881 
1882 static void
match_dealloc(MatchObject * self)1883 match_dealloc(MatchObject* self)
1884 {
1885     Py_XDECREF(self->regs);
1886     Py_XDECREF(self->string);
1887     Py_DECREF(self->pattern);
1888     PyObject_DEL(self);
1889 }
1890 
1891 static PyObject*
match_getslice_by_index(MatchObject * self,Py_ssize_t index,PyObject * def)1892 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
1893 {
1894     Py_ssize_t length;
1895     int isbytes, charsize;
1896     Py_buffer view;
1897     PyObject *result;
1898     const void* ptr;
1899     Py_ssize_t i, j;
1900 
1901     assert(0 <= index && index < self->groups);
1902     index *= 2;
1903 
1904     if (self->string == Py_None || self->mark[index] < 0) {
1905         /* return default value if the string or group is undefined */
1906         Py_INCREF(def);
1907         return def;
1908     }
1909 
1910     ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
1911     if (ptr == NULL)
1912         return NULL;
1913 
1914     i = self->mark[index];
1915     j = self->mark[index+1];
1916     i = Py_MIN(i, length);
1917     j = Py_MIN(j, length);
1918     result = getslice(isbytes, ptr, self->string, i, j);
1919     if (isbytes && view.buf != NULL)
1920         PyBuffer_Release(&view);
1921     return result;
1922 }
1923 
1924 static Py_ssize_t
match_getindex(MatchObject * self,PyObject * index)1925 match_getindex(MatchObject* self, PyObject* index)
1926 {
1927     Py_ssize_t i;
1928 
1929     if (index == NULL)
1930         /* Default value */
1931         return 0;
1932 
1933     if (PyIndex_Check(index)) {
1934         i = PyNumber_AsSsize_t(index, NULL);
1935     }
1936     else {
1937         i = -1;
1938 
1939         if (self->pattern->groupindex) {
1940             index = PyDict_GetItemWithError(self->pattern->groupindex, index);
1941             if (index && PyLong_Check(index)) {
1942                 i = PyLong_AsSsize_t(index);
1943             }
1944         }
1945     }
1946     if (i < 0 || i >= self->groups) {
1947         /* raise IndexError if we were given a bad group number */
1948         if (!PyErr_Occurred()) {
1949             PyErr_SetString(PyExc_IndexError, "no such group");
1950         }
1951         return -1;
1952     }
1953 
1954     return i;
1955 }
1956 
1957 static PyObject*
match_getslice(MatchObject * self,PyObject * index,PyObject * def)1958 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
1959 {
1960     Py_ssize_t i = match_getindex(self, index);
1961 
1962     if (i < 0) {
1963         return NULL;
1964     }
1965 
1966     return match_getslice_by_index(self, i, def);
1967 }
1968 
1969 /*[clinic input]
1970 _sre.SRE_Match.expand
1971 
1972     template: object
1973 
1974 Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1975 [clinic start generated code]*/
1976 
1977 static PyObject *
_sre_SRE_Match_expand_impl(MatchObject * self,PyObject * template)1978 _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1979 /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
1980 {
1981     /* delegate to Python code */
1982     return call(
1983         SRE_PY_MODULE, "_expand",
1984         PyTuple_Pack(3, self->pattern, self, template)
1985         );
1986 }
1987 
1988 static PyObject*
match_group(MatchObject * self,PyObject * args)1989 match_group(MatchObject* self, PyObject* args)
1990 {
1991     PyObject* result;
1992     Py_ssize_t i, size;
1993 
1994     size = PyTuple_GET_SIZE(args);
1995 
1996     switch (size) {
1997     case 0:
1998         result = match_getslice(self, _PyLong_Zero, Py_None);
1999         break;
2000     case 1:
2001         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2002         break;
2003     default:
2004         /* fetch multiple items */
2005         result = PyTuple_New(size);
2006         if (!result)
2007             return NULL;
2008         for (i = 0; i < size; i++) {
2009             PyObject* item = match_getslice(
2010                 self, PyTuple_GET_ITEM(args, i), Py_None
2011                 );
2012             if (!item) {
2013                 Py_DECREF(result);
2014                 return NULL;
2015             }
2016             PyTuple_SET_ITEM(result, i, item);
2017         }
2018         break;
2019     }
2020     return result;
2021 }
2022 
2023 static PyObject*
match_getitem(MatchObject * self,PyObject * name)2024 match_getitem(MatchObject* self, PyObject* name)
2025 {
2026     return match_getslice(self, name, Py_None);
2027 }
2028 
2029 /*[clinic input]
2030 _sre.SRE_Match.groups
2031 
2032     default: object = None
2033         Is used for groups that did not participate in the match.
2034 
2035 Return a tuple containing all the subgroups of the match, from 1.
2036 [clinic start generated code]*/
2037 
2038 static PyObject *
_sre_SRE_Match_groups_impl(MatchObject * self,PyObject * default_value)2039 _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2040 /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2041 {
2042     PyObject* result;
2043     Py_ssize_t index;
2044 
2045     result = PyTuple_New(self->groups-1);
2046     if (!result)
2047         return NULL;
2048 
2049     for (index = 1; index < self->groups; index++) {
2050         PyObject* item;
2051         item = match_getslice_by_index(self, index, default_value);
2052         if (!item) {
2053             Py_DECREF(result);
2054             return NULL;
2055         }
2056         PyTuple_SET_ITEM(result, index-1, item);
2057     }
2058 
2059     return result;
2060 }
2061 
2062 /*[clinic input]
2063 _sre.SRE_Match.groupdict
2064 
2065     default: object = None
2066         Is used for groups that did not participate in the match.
2067 
2068 Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2069 [clinic start generated code]*/
2070 
2071 static PyObject *
_sre_SRE_Match_groupdict_impl(MatchObject * self,PyObject * default_value)2072 _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2073 /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2074 {
2075     PyObject *result;
2076     PyObject *key;
2077     PyObject *value;
2078     Py_ssize_t pos = 0;
2079     Py_hash_t hash;
2080 
2081     result = PyDict_New();
2082     if (!result || !self->pattern->groupindex)
2083         return result;
2084 
2085     while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2086         int status;
2087         Py_INCREF(key);
2088         value = match_getslice(self, key, default_value);
2089         if (!value) {
2090             Py_DECREF(key);
2091             goto failed;
2092         }
2093         status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2094         Py_DECREF(value);
2095         Py_DECREF(key);
2096         if (status < 0)
2097             goto failed;
2098     }
2099 
2100     return result;
2101 
2102 failed:
2103     Py_DECREF(result);
2104     return NULL;
2105 }
2106 
2107 /*[clinic input]
2108 _sre.SRE_Match.start -> Py_ssize_t
2109 
2110     group: object(c_default="NULL") = 0
2111     /
2112 
2113 Return index of the start of the substring matched by group.
2114 [clinic start generated code]*/
2115 
2116 static Py_ssize_t
_sre_SRE_Match_start_impl(MatchObject * self,PyObject * group)2117 _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2118 /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2119 {
2120     Py_ssize_t index = match_getindex(self, group);
2121 
2122     if (index < 0) {
2123         return -1;
2124     }
2125 
2126     /* mark is -1 if group is undefined */
2127     return self->mark[index*2];
2128 }
2129 
2130 /*[clinic input]
2131 _sre.SRE_Match.end -> Py_ssize_t
2132 
2133     group: object(c_default="NULL") = 0
2134     /
2135 
2136 Return index of the end of the substring matched by group.
2137 [clinic start generated code]*/
2138 
2139 static Py_ssize_t
_sre_SRE_Match_end_impl(MatchObject * self,PyObject * group)2140 _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2141 /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2142 {
2143     Py_ssize_t index = match_getindex(self, group);
2144 
2145     if (index < 0) {
2146         return -1;
2147     }
2148 
2149     /* mark is -1 if group is undefined */
2150     return self->mark[index*2+1];
2151 }
2152 
2153 LOCAL(PyObject*)
_pair(Py_ssize_t i1,Py_ssize_t i2)2154 _pair(Py_ssize_t i1, Py_ssize_t i2)
2155 {
2156     PyObject* pair;
2157     PyObject* item;
2158 
2159     pair = PyTuple_New(2);
2160     if (!pair)
2161         return NULL;
2162 
2163     item = PyLong_FromSsize_t(i1);
2164     if (!item)
2165         goto error;
2166     PyTuple_SET_ITEM(pair, 0, item);
2167 
2168     item = PyLong_FromSsize_t(i2);
2169     if (!item)
2170         goto error;
2171     PyTuple_SET_ITEM(pair, 1, item);
2172 
2173     return pair;
2174 
2175   error:
2176     Py_DECREF(pair);
2177     return NULL;
2178 }
2179 
2180 /*[clinic input]
2181 _sre.SRE_Match.span
2182 
2183     group: object(c_default="NULL") = 0
2184     /
2185 
2186 For match object m, return the 2-tuple (m.start(group), m.end(group)).
2187 [clinic start generated code]*/
2188 
2189 static PyObject *
_sre_SRE_Match_span_impl(MatchObject * self,PyObject * group)2190 _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2191 /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2192 {
2193     Py_ssize_t index = match_getindex(self, group);
2194 
2195     if (index < 0) {
2196         return NULL;
2197     }
2198 
2199     /* marks are -1 if group is undefined */
2200     return _pair(self->mark[index*2], self->mark[index*2+1]);
2201 }
2202 
2203 static PyObject*
match_regs(MatchObject * self)2204 match_regs(MatchObject* self)
2205 {
2206     PyObject* regs;
2207     PyObject* item;
2208     Py_ssize_t index;
2209 
2210     regs = PyTuple_New(self->groups);
2211     if (!regs)
2212         return NULL;
2213 
2214     for (index = 0; index < self->groups; index++) {
2215         item = _pair(self->mark[index*2], self->mark[index*2+1]);
2216         if (!item) {
2217             Py_DECREF(regs);
2218             return NULL;
2219         }
2220         PyTuple_SET_ITEM(regs, index, item);
2221     }
2222 
2223     Py_INCREF(regs);
2224     self->regs = regs;
2225 
2226     return regs;
2227 }
2228 
2229 /*[clinic input]
2230 _sre.SRE_Match.__copy__
2231 
2232 [clinic start generated code]*/
2233 
2234 static PyObject *
_sre_SRE_Match___copy___impl(MatchObject * self)2235 _sre_SRE_Match___copy___impl(MatchObject *self)
2236 /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2237 {
2238     Py_INCREF(self);
2239     return (PyObject *)self;
2240 }
2241 
2242 /*[clinic input]
2243 _sre.SRE_Match.__deepcopy__
2244 
2245     memo: object
2246     /
2247 
2248 [clinic start generated code]*/
2249 
2250 static PyObject *
_sre_SRE_Match___deepcopy__(MatchObject * self,PyObject * memo)2251 _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2252 /*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2253 {
2254     Py_INCREF(self);
2255     return (PyObject *)self;
2256 }
2257 
2258 PyDoc_STRVAR(match_doc,
2259 "The result of re.match() and re.search().\n\
2260 Match objects always have a boolean value of True.");
2261 
2262 PyDoc_STRVAR(match_group_doc,
2263 "group([group1, ...]) -> str or tuple.\n\
2264     Return subgroup(s) of the match by indices or names.\n\
2265     For 0 returns the entire match.");
2266 
2267 static PyObject *
match_lastindex_get(MatchObject * self,void * Py_UNUSED (ignored))2268 match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2269 {
2270     if (self->lastindex >= 0)
2271         return PyLong_FromSsize_t(self->lastindex);
2272     Py_RETURN_NONE;
2273 }
2274 
2275 static PyObject *
match_lastgroup_get(MatchObject * self,void * Py_UNUSED (ignored))2276 match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2277 {
2278     if (self->pattern->indexgroup &&
2279         self->lastindex >= 0 &&
2280         self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2281     {
2282         PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2283                                             self->lastindex);
2284         Py_INCREF(result);
2285         return result;
2286     }
2287     Py_RETURN_NONE;
2288 }
2289 
2290 static PyObject *
match_regs_get(MatchObject * self,void * Py_UNUSED (ignored))2291 match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2292 {
2293     if (self->regs) {
2294         Py_INCREF(self->regs);
2295         return self->regs;
2296     } else
2297         return match_regs(self);
2298 }
2299 
2300 static PyObject *
match_repr(MatchObject * self)2301 match_repr(MatchObject *self)
2302 {
2303     PyObject *result;
2304     PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2305     if (group0 == NULL)
2306         return NULL;
2307     result = PyUnicode_FromFormat(
2308             "<%s object; span=(%zd, %zd), match=%.50R>",
2309             Py_TYPE(self)->tp_name,
2310             self->mark[0], self->mark[1], group0);
2311     Py_DECREF(group0);
2312     return result;
2313 }
2314 
2315 
2316 static PyObject*
pattern_new_match(PatternObject * pattern,SRE_STATE * state,Py_ssize_t status)2317 pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
2318 {
2319     /* create match object (from state object) */
2320 
2321     MatchObject* match;
2322     Py_ssize_t i, j;
2323     char* base;
2324     int n;
2325 
2326     if (status > 0) {
2327 
2328         /* create match object (with room for extra group marks) */
2329         /* coverity[ampersand_in_size] */
2330         match = PyObject_NewVar(MatchObject, &Match_Type,
2331                                 2*(pattern->groups+1));
2332         if (!match)
2333             return NULL;
2334 
2335         Py_INCREF(pattern);
2336         match->pattern = pattern;
2337 
2338         Py_INCREF(state->string);
2339         match->string = state->string;
2340 
2341         match->regs = NULL;
2342         match->groups = pattern->groups+1;
2343 
2344         /* fill in group slices */
2345 
2346         base = (char*) state->beginning;
2347         n = state->charsize;
2348 
2349         match->mark[0] = ((char*) state->start - base) / n;
2350         match->mark[1] = ((char*) state->ptr - base) / n;
2351 
2352         for (i = j = 0; i < pattern->groups; i++, j+=2)
2353             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2354                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2355                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2356             } else
2357                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2358 
2359         match->pos = state->pos;
2360         match->endpos = state->endpos;
2361 
2362         match->lastindex = state->lastindex;
2363 
2364         return (PyObject*) match;
2365 
2366     } else if (status == 0) {
2367 
2368         /* no match */
2369         Py_RETURN_NONE;
2370 
2371     }
2372 
2373     /* internal error */
2374     pattern_error(status);
2375     return NULL;
2376 }
2377 
2378 
2379 /* -------------------------------------------------------------------- */
2380 /* scanner methods (experimental) */
2381 
2382 static void
scanner_dealloc(ScannerObject * self)2383 scanner_dealloc(ScannerObject* self)
2384 {
2385     state_fini(&self->state);
2386     Py_XDECREF(self->pattern);
2387     PyObject_DEL(self);
2388 }
2389 
2390 /*[clinic input]
2391 _sre.SRE_Scanner.match
2392 
2393 [clinic start generated code]*/
2394 
2395 static PyObject *
_sre_SRE_Scanner_match_impl(ScannerObject * self)2396 _sre_SRE_Scanner_match_impl(ScannerObject *self)
2397 /*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
2398 {
2399     SRE_STATE* state = &self->state;
2400     PyObject* match;
2401     Py_ssize_t status;
2402 
2403     if (state->start == NULL)
2404         Py_RETURN_NONE;
2405 
2406     state_reset(state);
2407 
2408     state->ptr = state->start;
2409 
2410     status = sre_match(state, PatternObject_GetCode(self->pattern));
2411     if (PyErr_Occurred())
2412         return NULL;
2413 
2414     match = pattern_new_match((PatternObject*) self->pattern,
2415                                state, status);
2416 
2417     if (status == 0)
2418         state->start = NULL;
2419     else {
2420         state->must_advance = (state->ptr == state->start);
2421         state->start = state->ptr;
2422     }
2423 
2424     return match;
2425 }
2426 
2427 
2428 /*[clinic input]
2429 _sre.SRE_Scanner.search
2430 
2431 [clinic start generated code]*/
2432 
2433 static PyObject *
_sre_SRE_Scanner_search_impl(ScannerObject * self)2434 _sre_SRE_Scanner_search_impl(ScannerObject *self)
2435 /*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
2436 {
2437     SRE_STATE* state = &self->state;
2438     PyObject* match;
2439     Py_ssize_t status;
2440 
2441     if (state->start == NULL)
2442         Py_RETURN_NONE;
2443 
2444     state_reset(state);
2445 
2446     state->ptr = state->start;
2447 
2448     status = sre_search(state, PatternObject_GetCode(self->pattern));
2449     if (PyErr_Occurred())
2450         return NULL;
2451 
2452     match = pattern_new_match((PatternObject*) self->pattern,
2453                                state, status);
2454 
2455     if (status == 0)
2456         state->start = NULL;
2457     else {
2458         state->must_advance = (state->ptr == state->start);
2459         state->start = state->ptr;
2460     }
2461 
2462     return match;
2463 }
2464 
2465 static PyObject *
pattern_scanner(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)2466 pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
2467 {
2468     ScannerObject* scanner;
2469 
2470     /* create scanner object */
2471     scanner = PyObject_New(ScannerObject, &Scanner_Type);
2472     if (!scanner)
2473         return NULL;
2474     scanner->pattern = NULL;
2475 
2476     /* create search state object */
2477     if (!state_init(&scanner->state, self, string, pos, endpos)) {
2478         Py_DECREF(scanner);
2479         return NULL;
2480     }
2481 
2482     Py_INCREF(self);
2483     scanner->pattern = (PyObject*) self;
2484 
2485     return (PyObject*) scanner;
2486 }
2487 
2488 static Py_hash_t
pattern_hash(PatternObject * self)2489 pattern_hash(PatternObject *self)
2490 {
2491     Py_hash_t hash, hash2;
2492 
2493     hash = PyObject_Hash(self->pattern);
2494     if (hash == -1) {
2495         return -1;
2496     }
2497 
2498     hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2499     hash ^= hash2;
2500 
2501     hash ^= self->flags;
2502     hash ^= self->isbytes;
2503     hash ^= self->codesize;
2504 
2505     if (hash == -1) {
2506         hash = -2;
2507     }
2508     return hash;
2509 }
2510 
2511 static PyObject*
pattern_richcompare(PyObject * lefto,PyObject * righto,int op)2512 pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2513 {
2514     PatternObject *left, *right;
2515     int cmp;
2516 
2517     if (op != Py_EQ && op != Py_NE) {
2518         Py_RETURN_NOTIMPLEMENTED;
2519     }
2520 
2521     if (!Py_IS_TYPE(lefto, &Pattern_Type) || !Py_IS_TYPE(righto, &Pattern_Type)) {
2522         Py_RETURN_NOTIMPLEMENTED;
2523     }
2524 
2525     if (lefto == righto) {
2526         /* a pattern is equal to itself */
2527         return PyBool_FromLong(op == Py_EQ);
2528     }
2529 
2530     left = (PatternObject *)lefto;
2531     right = (PatternObject *)righto;
2532 
2533     cmp = (left->flags == right->flags
2534            && left->isbytes == right->isbytes
2535            && left->codesize == right->codesize);
2536     if (cmp) {
2537         /* Compare the code and the pattern because the same pattern can
2538            produce different codes depending on the locale used to compile the
2539            pattern when the re.LOCALE flag is used. Don't compare groups,
2540            indexgroup nor groupindex: they are derivated from the pattern. */
2541         cmp = (memcmp(left->code, right->code,
2542                       sizeof(left->code[0]) * left->codesize) == 0);
2543     }
2544     if (cmp) {
2545         cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2546                                        Py_EQ);
2547         if (cmp < 0) {
2548             return NULL;
2549         }
2550     }
2551     if (op == Py_NE) {
2552         cmp = !cmp;
2553     }
2554     return PyBool_FromLong(cmp);
2555 }
2556 
2557 #include "clinic/_sre.c.h"
2558 
2559 static PyMethodDef pattern_methods[] = {
2560     _SRE_SRE_PATTERN_MATCH_METHODDEF
2561     _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2562     _SRE_SRE_PATTERN_SEARCH_METHODDEF
2563     _SRE_SRE_PATTERN_SUB_METHODDEF
2564     _SRE_SRE_PATTERN_SUBN_METHODDEF
2565     _SRE_SRE_PATTERN_FINDALL_METHODDEF
2566     _SRE_SRE_PATTERN_SPLIT_METHODDEF
2567     _SRE_SRE_PATTERN_FINDITER_METHODDEF
2568     _SRE_SRE_PATTERN_SCANNER_METHODDEF
2569     _SRE_SRE_PATTERN___COPY___METHODDEF
2570     _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2571     {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS,
2572      PyDoc_STR("See PEP 585")},
2573     {NULL, NULL}
2574 };
2575 
2576 static PyGetSetDef pattern_getset[] = {
2577     {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2578       "A dictionary mapping group names to group numbers."},
2579     {NULL}  /* Sentinel */
2580 };
2581 
2582 #define PAT_OFF(x) offsetof(PatternObject, x)
2583 static PyMemberDef pattern_members[] = {
2584     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY,
2585      "The pattern string from which the RE object was compiled."},
2586     {"flags",      T_INT,       PAT_OFF(flags),         READONLY,
2587      "The regex matching flags."},
2588     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY,
2589      "The number of capturing groups in the pattern."},
2590     {NULL}  /* Sentinel */
2591 };
2592 
2593 static PyTypeObject Pattern_Type = {
2594     PyVarObject_HEAD_INIT(NULL, 0)
2595     "re.Pattern",
2596     sizeof(PatternObject), sizeof(SRE_CODE),
2597     (destructor)pattern_dealloc,        /* tp_dealloc */
2598     0,                                  /* tp_vectorcall_offset */
2599     0,                                  /* tp_getattr */
2600     0,                                  /* tp_setattr */
2601     0,                                  /* tp_as_async */
2602     (reprfunc)pattern_repr,             /* tp_repr */
2603     0,                                  /* tp_as_number */
2604     0,                                  /* tp_as_sequence */
2605     0,                                  /* tp_as_mapping */
2606     (hashfunc)pattern_hash,             /* tp_hash */
2607     0,                                  /* tp_call */
2608     0,                                  /* tp_str */
2609     0,                                  /* tp_getattro */
2610     0,                                  /* tp_setattro */
2611     0,                                  /* tp_as_buffer */
2612     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
2613     pattern_doc,                        /* tp_doc */
2614     0,                                  /* tp_traverse */
2615     0,                                  /* tp_clear */
2616     pattern_richcompare,                /* tp_richcompare */
2617     offsetof(PatternObject, weakreflist),       /* tp_weaklistoffset */
2618     0,                                  /* tp_iter */
2619     0,                                  /* tp_iternext */
2620     pattern_methods,                    /* tp_methods */
2621     pattern_members,                    /* tp_members */
2622     pattern_getset,                     /* tp_getset */
2623 };
2624 
2625 /* Match objects do not support length or assignment, but do support
2626    __getitem__. */
2627 static PyMappingMethods match_as_mapping = {
2628     NULL,
2629     (binaryfunc)match_getitem,
2630     NULL
2631 };
2632 
2633 static PyMethodDef match_methods[] = {
2634     {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2635     _SRE_SRE_MATCH_START_METHODDEF
2636     _SRE_SRE_MATCH_END_METHODDEF
2637     _SRE_SRE_MATCH_SPAN_METHODDEF
2638     _SRE_SRE_MATCH_GROUPS_METHODDEF
2639     _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2640     _SRE_SRE_MATCH_EXPAND_METHODDEF
2641     _SRE_SRE_MATCH___COPY___METHODDEF
2642     _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2643     {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS,
2644      PyDoc_STR("See PEP 585")},
2645     {NULL, NULL}
2646 };
2647 
2648 static PyGetSetDef match_getset[] = {
2649     {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2650      "The integer index of the last matched capturing group."},
2651     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2652      "The name of the last matched capturing group."},
2653     {"regs",      (getter)match_regs_get,      (setter)NULL},
2654     {NULL}
2655 };
2656 
2657 #define MATCH_OFF(x) offsetof(MatchObject, x)
2658 static PyMemberDef match_members[] = {
2659     {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY,
2660      "The string passed to match() or search()."},
2661     {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY,
2662      "The regular expression object."},
2663     {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY,
2664      "The index into the string at which the RE engine started looking for a match."},
2665     {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY,
2666      "The index into the string beyond which the RE engine will not go."},
2667     {NULL}
2668 };
2669 
2670 /* FIXME: implement setattr("string", None) as a special case (to
2671    detach the associated string, if any */
2672 
2673 static PyTypeObject Match_Type = {
2674     PyVarObject_HEAD_INIT(NULL,0)
2675     "re.Match",
2676     sizeof(MatchObject), sizeof(Py_ssize_t),
2677     (destructor)match_dealloc,  /* tp_dealloc */
2678     0,                          /* tp_vectorcall_offset */
2679     0,                          /* tp_getattr */
2680     0,                          /* tp_setattr */
2681     0,                          /* tp_as_async */
2682     (reprfunc)match_repr,       /* tp_repr */
2683     0,                          /* tp_as_number */
2684     0,                          /* tp_as_sequence */
2685     &match_as_mapping,          /* tp_as_mapping */
2686     0,                          /* tp_hash */
2687     0,                          /* tp_call */
2688     0,                          /* tp_str */
2689     0,                          /* tp_getattro */
2690     0,                          /* tp_setattro */
2691     0,                          /* tp_as_buffer */
2692     Py_TPFLAGS_DEFAULT,         /* tp_flags */
2693     match_doc,                  /* tp_doc */
2694     0,                          /* tp_traverse */
2695     0,                          /* tp_clear */
2696     0,                          /* tp_richcompare */
2697     0,                          /* tp_weaklistoffset */
2698     0,                          /* tp_iter */
2699     0,                          /* tp_iternext */
2700     match_methods,              /* tp_methods */
2701     match_members,              /* tp_members */
2702     match_getset,               /* tp_getset */
2703 };
2704 
2705 static PyMethodDef scanner_methods[] = {
2706     _SRE_SRE_SCANNER_MATCH_METHODDEF
2707     _SRE_SRE_SCANNER_SEARCH_METHODDEF
2708     {NULL, NULL}
2709 };
2710 
2711 #define SCAN_OFF(x) offsetof(ScannerObject, x)
2712 static PyMemberDef scanner_members[] = {
2713     {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2714     {NULL}  /* Sentinel */
2715 };
2716 
2717 static PyTypeObject Scanner_Type = {
2718     PyVarObject_HEAD_INIT(NULL, 0)
2719     "_" SRE_MODULE ".SRE_Scanner",
2720     sizeof(ScannerObject), 0,
2721     (destructor)scanner_dealloc,/* tp_dealloc */
2722     0,                          /* tp_vectorcall_offset */
2723     0,                          /* tp_getattr */
2724     0,                          /* tp_setattr */
2725     0,                          /* tp_as_async */
2726     0,                          /* tp_repr */
2727     0,                          /* tp_as_number */
2728     0,                          /* tp_as_sequence */
2729     0,                          /* tp_as_mapping */
2730     0,                          /* tp_hash */
2731     0,                          /* tp_call */
2732     0,                          /* tp_str */
2733     0,                          /* tp_getattro */
2734     0,                          /* tp_setattro */
2735     0,                          /* tp_as_buffer */
2736     Py_TPFLAGS_DEFAULT,         /* tp_flags */
2737     0,                          /* tp_doc */
2738     0,                          /* tp_traverse */
2739     0,                          /* tp_clear */
2740     0,                          /* tp_richcompare */
2741     0,                          /* tp_weaklistoffset */
2742     0,                          /* tp_iter */
2743     0,                          /* tp_iternext */
2744     scanner_methods,            /* tp_methods */
2745     scanner_members,            /* tp_members */
2746     0,                          /* tp_getset */
2747 };
2748 
2749 static PyMethodDef _functions[] = {
2750     _SRE_COMPILE_METHODDEF
2751     _SRE_GETCODESIZE_METHODDEF
2752     _SRE_ASCII_ISCASED_METHODDEF
2753     _SRE_UNICODE_ISCASED_METHODDEF
2754     _SRE_ASCII_TOLOWER_METHODDEF
2755     _SRE_UNICODE_TOLOWER_METHODDEF
2756     {NULL, NULL}
2757 };
2758 
2759 static struct PyModuleDef sremodule = {
2760         PyModuleDef_HEAD_INIT,
2761         "_" SRE_MODULE,
2762         NULL,
2763         -1,
2764         _functions,
2765         NULL,
2766         NULL,
2767         NULL,
2768         NULL
2769 };
2770 
PyInit__sre(void)2771 PyMODINIT_FUNC PyInit__sre(void)
2772 {
2773     PyObject* m;
2774     PyObject* d;
2775     PyObject* x;
2776 
2777     /* Patch object types */
2778     if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2779         PyType_Ready(&Scanner_Type))
2780         return NULL;
2781 
2782     m = PyModule_Create(&sremodule);
2783     if (m == NULL)
2784         return NULL;
2785     d = PyModule_GetDict(m);
2786 
2787     x = PyLong_FromLong(SRE_MAGIC);
2788     if (x) {
2789         PyDict_SetItemString(d, "MAGIC", x);
2790         Py_DECREF(x);
2791     }
2792 
2793     x = PyLong_FromLong(sizeof(SRE_CODE));
2794     if (x) {
2795         PyDict_SetItemString(d, "CODESIZE", x);
2796         Py_DECREF(x);
2797     }
2798 
2799     x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2800     if (x) {
2801         PyDict_SetItemString(d, "MAXREPEAT", x);
2802         Py_DECREF(x);
2803     }
2804 
2805     x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2806     if (x) {
2807         PyDict_SetItemString(d, "MAXGROUPS", x);
2808         Py_DECREF(x);
2809     }
2810 
2811     x = PyUnicode_FromString(copyright);
2812     if (x) {
2813         PyDict_SetItemString(d, "copyright", x);
2814         Py_DECREF(x);
2815     }
2816     return m;
2817 }
2818 
2819 /* vim:ts=4:sw=4:et
2820 */
2821