1 /*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * partial history:
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
26 *
27 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
28 *
29 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
33 * Portions of this engine have been developed in cooperation with
34 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
35 * other compatibility work.
36 */
37
38 static const char copyright[] =
39 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41 #define PY_SSIZE_T_CLEAN
42
43 #include "Python.h"
44 #include "structmember.h" // PyMemberDef
45
46 #include "sre.h"
47
48 #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
50 #include <ctype.h>
51
52 /* name of this module, minus the leading underscore */
53 #if !defined(SRE_MODULE)
54 #define SRE_MODULE "sre"
55 #endif
56
57 #define SRE_PY_MODULE "re"
58
59 /* defining this one enables tracing */
60 #undef VERBOSE
61
62 /* -------------------------------------------------------------------- */
63
64 #if defined(_MSC_VER)
65 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
66 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
67 /* fastest possible local call under MSVC */
68 #define LOCAL(type) static __inline type __fastcall
69 #else
70 #define LOCAL(type) static inline type
71 #endif
72
73 /* error codes */
74 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
75 #define SRE_ERROR_STATE -2 /* illegal state */
76 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
77 #define SRE_ERROR_MEMORY -9 /* out of memory */
78 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
79
80 #if defined(VERBOSE)
81 #define TRACE(v) printf v
82 #else
83 #define TRACE(v)
84 #endif
85
86 /* -------------------------------------------------------------------- */
87 /* search engine state */
88
89 #define SRE_IS_DIGIT(ch)\
90 ((ch) <= '9' && Py_ISDIGIT(ch))
91 #define SRE_IS_SPACE(ch)\
92 ((ch) <= ' ' && Py_ISSPACE(ch))
93 #define SRE_IS_LINEBREAK(ch)\
94 ((ch) == '\n')
95 #define SRE_IS_WORD(ch)\
96 ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
97
sre_lower_ascii(unsigned int ch)98 static unsigned int sre_lower_ascii(unsigned int ch)
99 {
100 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
101 }
102
103 /* locale-specific character predicates */
104 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
105 * warnings when c's type supports only numbers < N+1 */
106 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
107 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
108
sre_lower_locale(unsigned int ch)109 static unsigned int sre_lower_locale(unsigned int ch)
110 {
111 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
112 }
113
sre_upper_locale(unsigned int ch)114 static unsigned int sre_upper_locale(unsigned int ch)
115 {
116 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
117 }
118
119 /* unicode-specific character predicates */
120
121 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
122 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
123 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
124 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
125 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
126
sre_lower_unicode(unsigned int ch)127 static unsigned int sre_lower_unicode(unsigned int ch)
128 {
129 return (unsigned int) Py_UNICODE_TOLOWER(ch);
130 }
131
sre_upper_unicode(unsigned int ch)132 static unsigned int sre_upper_unicode(unsigned int ch)
133 {
134 return (unsigned int) Py_UNICODE_TOUPPER(ch);
135 }
136
137 LOCAL(int)
sre_category(SRE_CODE category,unsigned int ch)138 sre_category(SRE_CODE category, unsigned int ch)
139 {
140 switch (category) {
141
142 case SRE_CATEGORY_DIGIT:
143 return SRE_IS_DIGIT(ch);
144 case SRE_CATEGORY_NOT_DIGIT:
145 return !SRE_IS_DIGIT(ch);
146 case SRE_CATEGORY_SPACE:
147 return SRE_IS_SPACE(ch);
148 case SRE_CATEGORY_NOT_SPACE:
149 return !SRE_IS_SPACE(ch);
150 case SRE_CATEGORY_WORD:
151 return SRE_IS_WORD(ch);
152 case SRE_CATEGORY_NOT_WORD:
153 return !SRE_IS_WORD(ch);
154 case SRE_CATEGORY_LINEBREAK:
155 return SRE_IS_LINEBREAK(ch);
156 case SRE_CATEGORY_NOT_LINEBREAK:
157 return !SRE_IS_LINEBREAK(ch);
158
159 case SRE_CATEGORY_LOC_WORD:
160 return SRE_LOC_IS_WORD(ch);
161 case SRE_CATEGORY_LOC_NOT_WORD:
162 return !SRE_LOC_IS_WORD(ch);
163
164 case SRE_CATEGORY_UNI_DIGIT:
165 return SRE_UNI_IS_DIGIT(ch);
166 case SRE_CATEGORY_UNI_NOT_DIGIT:
167 return !SRE_UNI_IS_DIGIT(ch);
168 case SRE_CATEGORY_UNI_SPACE:
169 return SRE_UNI_IS_SPACE(ch);
170 case SRE_CATEGORY_UNI_NOT_SPACE:
171 return !SRE_UNI_IS_SPACE(ch);
172 case SRE_CATEGORY_UNI_WORD:
173 return SRE_UNI_IS_WORD(ch);
174 case SRE_CATEGORY_UNI_NOT_WORD:
175 return !SRE_UNI_IS_WORD(ch);
176 case SRE_CATEGORY_UNI_LINEBREAK:
177 return SRE_UNI_IS_LINEBREAK(ch);
178 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
179 return !SRE_UNI_IS_LINEBREAK(ch);
180 }
181 return 0;
182 }
183
184 LOCAL(int)
char_loc_ignore(SRE_CODE pattern,SRE_CODE ch)185 char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
186 {
187 return ch == pattern
188 || (SRE_CODE) sre_lower_locale(ch) == pattern
189 || (SRE_CODE) sre_upper_locale(ch) == pattern;
190 }
191
192
193 /* helpers */
194
195 static void
data_stack_dealloc(SRE_STATE * state)196 data_stack_dealloc(SRE_STATE* state)
197 {
198 if (state->data_stack) {
199 PyMem_FREE(state->data_stack);
200 state->data_stack = NULL;
201 }
202 state->data_stack_size = state->data_stack_base = 0;
203 }
204
205 static int
data_stack_grow(SRE_STATE * state,Py_ssize_t size)206 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
207 {
208 Py_ssize_t minsize, cursize;
209 minsize = state->data_stack_base+size;
210 cursize = state->data_stack_size;
211 if (cursize < minsize) {
212 void* stack;
213 cursize = minsize+minsize/4+1024;
214 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
215 stack = PyMem_REALLOC(state->data_stack, cursize);
216 if (!stack) {
217 data_stack_dealloc(state);
218 return SRE_ERROR_MEMORY;
219 }
220 state->data_stack = (char *)stack;
221 state->data_stack_size = cursize;
222 }
223 return 0;
224 }
225
226 /* generate 8-bit version */
227
228 #define SRE_CHAR Py_UCS1
229 #define SIZEOF_SRE_CHAR 1
230 #define SRE(F) sre_ucs1_##F
231 #include "sre_lib.h"
232
233 /* generate 16-bit unicode version */
234
235 #define SRE_CHAR Py_UCS2
236 #define SIZEOF_SRE_CHAR 2
237 #define SRE(F) sre_ucs2_##F
238 #include "sre_lib.h"
239
240 /* generate 32-bit unicode version */
241
242 #define SRE_CHAR Py_UCS4
243 #define SIZEOF_SRE_CHAR 4
244 #define SRE(F) sre_ucs4_##F
245 #include "sre_lib.h"
246
247 /* -------------------------------------------------------------------- */
248 /* factories and destructors */
249
250 /* see sre.h for object declarations */
251 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
252 static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
253
254
255 /*[clinic input]
256 module _sre
257 class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
258 class _sre.SRE_Match "MatchObject *" "&Match_Type"
259 class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
260 [clinic start generated code]*/
261 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
262
263 static PyTypeObject Pattern_Type;
264 static PyTypeObject Match_Type;
265 static PyTypeObject Scanner_Type;
266
267 /*[clinic input]
268 _sre.getcodesize -> int
269 [clinic start generated code]*/
270
271 static int
_sre_getcodesize_impl(PyObject * module)272 _sre_getcodesize_impl(PyObject *module)
273 /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
274 {
275 return sizeof(SRE_CODE);
276 }
277
278 /*[clinic input]
279 _sre.ascii_iscased -> bool
280
281 character: int
282 /
283
284 [clinic start generated code]*/
285
286 static int
_sre_ascii_iscased_impl(PyObject * module,int character)287 _sre_ascii_iscased_impl(PyObject *module, int character)
288 /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
289 {
290 unsigned int ch = (unsigned int)character;
291 return ch < 128 && Py_ISALPHA(ch);
292 }
293
294 /*[clinic input]
295 _sre.unicode_iscased -> bool
296
297 character: int
298 /
299
300 [clinic start generated code]*/
301
302 static int
_sre_unicode_iscased_impl(PyObject * module,int character)303 _sre_unicode_iscased_impl(PyObject *module, int character)
304 /*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
305 {
306 unsigned int ch = (unsigned int)character;
307 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
308 }
309
310 /*[clinic input]
311 _sre.ascii_tolower -> int
312
313 character: int
314 /
315
316 [clinic start generated code]*/
317
318 static int
_sre_ascii_tolower_impl(PyObject * module,int character)319 _sre_ascii_tolower_impl(PyObject *module, int character)
320 /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
321 {
322 return sre_lower_ascii(character);
323 }
324
325 /*[clinic input]
326 _sre.unicode_tolower -> int
327
328 character: int
329 /
330
331 [clinic start generated code]*/
332
333 static int
_sre_unicode_tolower_impl(PyObject * module,int character)334 _sre_unicode_tolower_impl(PyObject *module, int character)
335 /*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
336 {
337 return sre_lower_unicode(character);
338 }
339
340 LOCAL(void)
state_reset(SRE_STATE * state)341 state_reset(SRE_STATE* state)
342 {
343 /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
344 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
345
346 state->lastmark = -1;
347 state->lastindex = -1;
348
349 state->repeat = NULL;
350
351 data_stack_dealloc(state);
352 }
353
354 static const void*
getstring(PyObject * string,Py_ssize_t * p_length,int * p_isbytes,int * p_charsize,Py_buffer * view)355 getstring(PyObject* string, Py_ssize_t* p_length,
356 int* p_isbytes, int* p_charsize,
357 Py_buffer *view)
358 {
359 /* given a python object, return a data pointer, a length (in
360 characters), and a character size. return NULL if the object
361 is not a string (or not compatible) */
362
363 /* Unicode objects do not support the buffer API. So, get the data
364 directly instead. */
365 if (PyUnicode_Check(string)) {
366 if (PyUnicode_READY(string) == -1)
367 return NULL;
368 *p_length = PyUnicode_GET_LENGTH(string);
369 *p_charsize = PyUnicode_KIND(string);
370 *p_isbytes = 0;
371 return PyUnicode_DATA(string);
372 }
373
374 /* get pointer to byte string buffer */
375 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
376 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
377 return NULL;
378 }
379
380 *p_length = view->len;
381 *p_charsize = 1;
382 *p_isbytes = 1;
383
384 if (view->buf == NULL) {
385 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
386 PyBuffer_Release(view);
387 view->buf = NULL;
388 return NULL;
389 }
390 return view->buf;
391 }
392
393 LOCAL(PyObject*)
state_init(SRE_STATE * state,PatternObject * pattern,PyObject * string,Py_ssize_t start,Py_ssize_t end)394 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
395 Py_ssize_t start, Py_ssize_t end)
396 {
397 /* prepare state object */
398
399 Py_ssize_t length;
400 int isbytes, charsize;
401 const void* ptr;
402
403 memset(state, 0, sizeof(SRE_STATE));
404
405 state->mark = PyMem_New(const void *, pattern->groups * 2);
406 if (!state->mark) {
407 PyErr_NoMemory();
408 goto err;
409 }
410 state->lastmark = -1;
411 state->lastindex = -1;
412
413 state->buffer.buf = NULL;
414 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
415 if (!ptr)
416 goto err;
417
418 if (isbytes && pattern->isbytes == 0) {
419 PyErr_SetString(PyExc_TypeError,
420 "cannot use a string pattern on a bytes-like object");
421 goto err;
422 }
423 if (!isbytes && pattern->isbytes > 0) {
424 PyErr_SetString(PyExc_TypeError,
425 "cannot use a bytes pattern on a string-like object");
426 goto err;
427 }
428
429 /* adjust boundaries */
430 if (start < 0)
431 start = 0;
432 else if (start > length)
433 start = length;
434
435 if (end < 0)
436 end = 0;
437 else if (end > length)
438 end = length;
439
440 state->isbytes = isbytes;
441 state->charsize = charsize;
442 state->match_all = 0;
443 state->must_advance = 0;
444
445 state->beginning = ptr;
446
447 state->start = (void*) ((char*) ptr + start * state->charsize);
448 state->end = (void*) ((char*) ptr + end * state->charsize);
449
450 Py_INCREF(string);
451 state->string = string;
452 state->pos = start;
453 state->endpos = end;
454
455 return string;
456 err:
457 PyMem_Del(state->mark);
458 state->mark = NULL;
459 if (state->buffer.buf)
460 PyBuffer_Release(&state->buffer);
461 return NULL;
462 }
463
464 LOCAL(void)
state_fini(SRE_STATE * state)465 state_fini(SRE_STATE* state)
466 {
467 if (state->buffer.buf)
468 PyBuffer_Release(&state->buffer);
469 Py_XDECREF(state->string);
470 data_stack_dealloc(state);
471 PyMem_Del(state->mark);
472 state->mark = NULL;
473 }
474
475 /* calculate offset from start of string */
476 #define STATE_OFFSET(state, member)\
477 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
478
479 LOCAL(PyObject*)
getslice(int isbytes,const void * ptr,PyObject * string,Py_ssize_t start,Py_ssize_t end)480 getslice(int isbytes, const void *ptr,
481 PyObject* string, Py_ssize_t start, Py_ssize_t end)
482 {
483 if (isbytes) {
484 if (PyBytes_CheckExact(string) &&
485 start == 0 && end == PyBytes_GET_SIZE(string)) {
486 Py_INCREF(string);
487 return string;
488 }
489 return PyBytes_FromStringAndSize(
490 (const char *)ptr + start, end - start);
491 }
492 else {
493 return PyUnicode_Substring(string, start, end);
494 }
495 }
496
497 LOCAL(PyObject*)
state_getslice(SRE_STATE * state,Py_ssize_t index,PyObject * string,int empty)498 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
499 {
500 Py_ssize_t i, j;
501
502 index = (index - 1) * 2;
503
504 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
505 if (empty)
506 /* want empty string */
507 i = j = 0;
508 else {
509 Py_RETURN_NONE;
510 }
511 } else {
512 i = STATE_OFFSET(state, state->mark[index]);
513 j = STATE_OFFSET(state, state->mark[index+1]);
514 }
515
516 return getslice(state->isbytes, state->beginning, string, i, j);
517 }
518
519 static void
pattern_error(Py_ssize_t status)520 pattern_error(Py_ssize_t status)
521 {
522 switch (status) {
523 case SRE_ERROR_RECURSION_LIMIT:
524 /* This error code seems to be unused. */
525 PyErr_SetString(
526 PyExc_RecursionError,
527 "maximum recursion limit exceeded"
528 );
529 break;
530 case SRE_ERROR_MEMORY:
531 PyErr_NoMemory();
532 break;
533 case SRE_ERROR_INTERRUPTED:
534 /* An exception has already been raised, so let it fly */
535 break;
536 default:
537 /* other error codes indicate compiler/engine bugs */
538 PyErr_SetString(
539 PyExc_RuntimeError,
540 "internal error in regular expression engine"
541 );
542 }
543 }
544
545 static void
pattern_dealloc(PatternObject * self)546 pattern_dealloc(PatternObject* self)
547 {
548 if (self->weakreflist != NULL)
549 PyObject_ClearWeakRefs((PyObject *) self);
550 Py_XDECREF(self->pattern);
551 Py_XDECREF(self->groupindex);
552 Py_XDECREF(self->indexgroup);
553 PyObject_DEL(self);
554 }
555
556 LOCAL(Py_ssize_t)
sre_match(SRE_STATE * state,SRE_CODE * pattern)557 sre_match(SRE_STATE* state, SRE_CODE* pattern)
558 {
559 if (state->charsize == 1)
560 return sre_ucs1_match(state, pattern, 1);
561 if (state->charsize == 2)
562 return sre_ucs2_match(state, pattern, 1);
563 assert(state->charsize == 4);
564 return sre_ucs4_match(state, pattern, 1);
565 }
566
567 LOCAL(Py_ssize_t)
sre_search(SRE_STATE * state,SRE_CODE * pattern)568 sre_search(SRE_STATE* state, SRE_CODE* pattern)
569 {
570 if (state->charsize == 1)
571 return sre_ucs1_search(state, pattern);
572 if (state->charsize == 2)
573 return sre_ucs2_search(state, pattern);
574 assert(state->charsize == 4);
575 return sre_ucs4_search(state, pattern);
576 }
577
578 /*[clinic input]
579 _sre.SRE_Pattern.match
580
581 string: object
582 pos: Py_ssize_t = 0
583 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
584
585 Matches zero or more characters at the beginning of the string.
586 [clinic start generated code]*/
587
588 static PyObject *
_sre_SRE_Pattern_match_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)589 _sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
590 Py_ssize_t pos, Py_ssize_t endpos)
591 /*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
592 {
593 SRE_STATE state;
594 Py_ssize_t status;
595 PyObject *match;
596
597 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
598 return NULL;
599
600 state.ptr = state.start;
601
602 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
603
604 status = sre_match(&state, PatternObject_GetCode(self));
605
606 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
607 if (PyErr_Occurred()) {
608 state_fini(&state);
609 return NULL;
610 }
611
612 match = pattern_new_match(self, &state, status);
613 state_fini(&state);
614 return match;
615 }
616
617 /*[clinic input]
618 _sre.SRE_Pattern.fullmatch
619
620 string: object
621 pos: Py_ssize_t = 0
622 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
623
624 Matches against all of the string.
625 [clinic start generated code]*/
626
627 static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)628 _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
629 Py_ssize_t pos, Py_ssize_t endpos)
630 /*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
631 {
632 SRE_STATE state;
633 Py_ssize_t status;
634 PyObject *match;
635
636 if (!state_init(&state, self, string, pos, endpos))
637 return NULL;
638
639 state.ptr = state.start;
640
641 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
642
643 state.match_all = 1;
644 status = sre_match(&state, PatternObject_GetCode(self));
645
646 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
647 if (PyErr_Occurred()) {
648 state_fini(&state);
649 return NULL;
650 }
651
652 match = pattern_new_match(self, &state, status);
653 state_fini(&state);
654 return match;
655 }
656
657 /*[clinic input]
658 _sre.SRE_Pattern.search
659
660 string: object
661 pos: Py_ssize_t = 0
662 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
663
664 Scan through string looking for a match, and return a corresponding match object instance.
665
666 Return None if no position in the string matches.
667 [clinic start generated code]*/
668
669 static PyObject *
_sre_SRE_Pattern_search_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)670 _sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
671 Py_ssize_t pos, Py_ssize_t endpos)
672 /*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
673 {
674 SRE_STATE state;
675 Py_ssize_t status;
676 PyObject *match;
677
678 if (!state_init(&state, self, string, pos, endpos))
679 return NULL;
680
681 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
682
683 status = sre_search(&state, PatternObject_GetCode(self));
684
685 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
686
687 if (PyErr_Occurred()) {
688 state_fini(&state);
689 return NULL;
690 }
691
692 match = pattern_new_match(self, &state, status);
693 state_fini(&state);
694 return match;
695 }
696
697 static PyObject*
call(const char * module,const char * function,PyObject * args)698 call(const char* module, const char* function, PyObject* args)
699 {
700 PyObject* name;
701 PyObject* mod;
702 PyObject* func;
703 PyObject* result;
704
705 if (!args)
706 return NULL;
707 name = PyUnicode_FromString(module);
708 if (!name)
709 return NULL;
710 mod = PyImport_Import(name);
711 Py_DECREF(name);
712 if (!mod)
713 return NULL;
714 func = PyObject_GetAttrString(mod, function);
715 Py_DECREF(mod);
716 if (!func)
717 return NULL;
718 result = PyObject_CallObject(func, args);
719 Py_DECREF(func);
720 Py_DECREF(args);
721 return result;
722 }
723
724 /*[clinic input]
725 _sre.SRE_Pattern.findall
726
727 string: object
728 pos: Py_ssize_t = 0
729 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
730
731 Return a list of all non-overlapping matches of pattern in string.
732 [clinic start generated code]*/
733
734 static PyObject *
_sre_SRE_Pattern_findall_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)735 _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
736 Py_ssize_t pos, Py_ssize_t endpos)
737 /*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
738 {
739 SRE_STATE state;
740 PyObject* list;
741 Py_ssize_t status;
742 Py_ssize_t i, b, e;
743
744 if (!state_init(&state, self, string, pos, endpos))
745 return NULL;
746
747 list = PyList_New(0);
748 if (!list) {
749 state_fini(&state);
750 return NULL;
751 }
752
753 while (state.start <= state.end) {
754
755 PyObject* item;
756
757 state_reset(&state);
758
759 state.ptr = state.start;
760
761 status = sre_search(&state, PatternObject_GetCode(self));
762 if (PyErr_Occurred())
763 goto error;
764
765 if (status <= 0) {
766 if (status == 0)
767 break;
768 pattern_error(status);
769 goto error;
770 }
771
772 /* don't bother to build a match object */
773 switch (self->groups) {
774 case 0:
775 b = STATE_OFFSET(&state, state.start);
776 e = STATE_OFFSET(&state, state.ptr);
777 item = getslice(state.isbytes, state.beginning,
778 string, b, e);
779 if (!item)
780 goto error;
781 break;
782 case 1:
783 item = state_getslice(&state, 1, string, 1);
784 if (!item)
785 goto error;
786 break;
787 default:
788 item = PyTuple_New(self->groups);
789 if (!item)
790 goto error;
791 for (i = 0; i < self->groups; i++) {
792 PyObject* o = state_getslice(&state, i+1, string, 1);
793 if (!o) {
794 Py_DECREF(item);
795 goto error;
796 }
797 PyTuple_SET_ITEM(item, i, o);
798 }
799 break;
800 }
801
802 status = PyList_Append(list, item);
803 Py_DECREF(item);
804 if (status < 0)
805 goto error;
806
807 state.must_advance = (state.ptr == state.start);
808 state.start = state.ptr;
809 }
810
811 state_fini(&state);
812 return list;
813
814 error:
815 Py_DECREF(list);
816 state_fini(&state);
817 return NULL;
818
819 }
820
821 /*[clinic input]
822 _sre.SRE_Pattern.finditer
823
824 string: object
825 pos: Py_ssize_t = 0
826 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
827
828 Return an iterator over all non-overlapping matches for the RE pattern in string.
829
830 For each match, the iterator returns a match object.
831 [clinic start generated code]*/
832
833 static PyObject *
_sre_SRE_Pattern_finditer_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)834 _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
835 Py_ssize_t pos, Py_ssize_t endpos)
836 /*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
837 {
838 PyObject* scanner;
839 PyObject* search;
840 PyObject* iterator;
841
842 scanner = pattern_scanner(self, string, pos, endpos);
843 if (!scanner)
844 return NULL;
845
846 search = PyObject_GetAttrString(scanner, "search");
847 Py_DECREF(scanner);
848 if (!search)
849 return NULL;
850
851 iterator = PyCallIter_New(search, Py_None);
852 Py_DECREF(search);
853
854 return iterator;
855 }
856
857 /*[clinic input]
858 _sre.SRE_Pattern.scanner
859
860 string: object
861 pos: Py_ssize_t = 0
862 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
863
864 [clinic start generated code]*/
865
866 static PyObject *
_sre_SRE_Pattern_scanner_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)867 _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
868 Py_ssize_t pos, Py_ssize_t endpos)
869 /*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
870 {
871 return pattern_scanner(self, string, pos, endpos);
872 }
873
874 /*[clinic input]
875 _sre.SRE_Pattern.split
876
877 string: object
878 maxsplit: Py_ssize_t = 0
879
880 Split string by the occurrences of pattern.
881 [clinic start generated code]*/
882
883 static PyObject *
_sre_SRE_Pattern_split_impl(PatternObject * self,PyObject * string,Py_ssize_t maxsplit)884 _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
885 Py_ssize_t maxsplit)
886 /*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
887 {
888 SRE_STATE state;
889 PyObject* list;
890 PyObject* item;
891 Py_ssize_t status;
892 Py_ssize_t n;
893 Py_ssize_t i;
894 const void* last;
895
896 assert(self->codesize != 0);
897
898 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
899 return NULL;
900
901 list = PyList_New(0);
902 if (!list) {
903 state_fini(&state);
904 return NULL;
905 }
906
907 n = 0;
908 last = state.start;
909
910 while (!maxsplit || n < maxsplit) {
911
912 state_reset(&state);
913
914 state.ptr = state.start;
915
916 status = sre_search(&state, PatternObject_GetCode(self));
917 if (PyErr_Occurred())
918 goto error;
919
920 if (status <= 0) {
921 if (status == 0)
922 break;
923 pattern_error(status);
924 goto error;
925 }
926
927 /* get segment before this match */
928 item = getslice(state.isbytes, state.beginning,
929 string, STATE_OFFSET(&state, last),
930 STATE_OFFSET(&state, state.start)
931 );
932 if (!item)
933 goto error;
934 status = PyList_Append(list, item);
935 Py_DECREF(item);
936 if (status < 0)
937 goto error;
938
939 /* add groups (if any) */
940 for (i = 0; i < self->groups; i++) {
941 item = state_getslice(&state, i+1, string, 0);
942 if (!item)
943 goto error;
944 status = PyList_Append(list, item);
945 Py_DECREF(item);
946 if (status < 0)
947 goto error;
948 }
949
950 n = n + 1;
951 state.must_advance = (state.ptr == state.start);
952 last = state.start = state.ptr;
953
954 }
955
956 /* get segment following last match (even if empty) */
957 item = getslice(state.isbytes, state.beginning,
958 string, STATE_OFFSET(&state, last), state.endpos
959 );
960 if (!item)
961 goto error;
962 status = PyList_Append(list, item);
963 Py_DECREF(item);
964 if (status < 0)
965 goto error;
966
967 state_fini(&state);
968 return list;
969
970 error:
971 Py_DECREF(list);
972 state_fini(&state);
973 return NULL;
974
975 }
976
977 static PyObject*
pattern_subx(PatternObject * self,PyObject * ptemplate,PyObject * string,Py_ssize_t count,Py_ssize_t subn)978 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
979 Py_ssize_t count, Py_ssize_t subn)
980 {
981 SRE_STATE state;
982 PyObject* list;
983 PyObject* joiner;
984 PyObject* item;
985 PyObject* filter;
986 PyObject* match;
987 const void* ptr;
988 Py_ssize_t status;
989 Py_ssize_t n;
990 Py_ssize_t i, b, e;
991 int isbytes, charsize;
992 int filter_is_callable;
993 Py_buffer view;
994
995 if (PyCallable_Check(ptemplate)) {
996 /* sub/subn takes either a function or a template */
997 filter = ptemplate;
998 Py_INCREF(filter);
999 filter_is_callable = 1;
1000 } else {
1001 /* if not callable, check if it's a literal string */
1002 int literal;
1003 view.buf = NULL;
1004 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1005 if (ptr) {
1006 if (charsize == 1)
1007 literal = memchr(ptr, '\\', n) == NULL;
1008 else
1009 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1010 } else {
1011 PyErr_Clear();
1012 literal = 0;
1013 }
1014 if (view.buf)
1015 PyBuffer_Release(&view);
1016 if (literal) {
1017 filter = ptemplate;
1018 Py_INCREF(filter);
1019 filter_is_callable = 0;
1020 } else {
1021 /* not a literal; hand it over to the template compiler */
1022 filter = call(
1023 SRE_PY_MODULE, "_subx",
1024 PyTuple_Pack(2, self, ptemplate)
1025 );
1026 if (!filter)
1027 return NULL;
1028 filter_is_callable = PyCallable_Check(filter);
1029 }
1030 }
1031
1032 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1033 Py_DECREF(filter);
1034 return NULL;
1035 }
1036
1037 list = PyList_New(0);
1038 if (!list) {
1039 Py_DECREF(filter);
1040 state_fini(&state);
1041 return NULL;
1042 }
1043
1044 n = i = 0;
1045
1046 while (!count || n < count) {
1047
1048 state_reset(&state);
1049
1050 state.ptr = state.start;
1051
1052 status = sre_search(&state, PatternObject_GetCode(self));
1053 if (PyErr_Occurred())
1054 goto error;
1055
1056 if (status <= 0) {
1057 if (status == 0)
1058 break;
1059 pattern_error(status);
1060 goto error;
1061 }
1062
1063 b = STATE_OFFSET(&state, state.start);
1064 e = STATE_OFFSET(&state, state.ptr);
1065
1066 if (i < b) {
1067 /* get segment before this match */
1068 item = getslice(state.isbytes, state.beginning,
1069 string, i, b);
1070 if (!item)
1071 goto error;
1072 status = PyList_Append(list, item);
1073 Py_DECREF(item);
1074 if (status < 0)
1075 goto error;
1076
1077 }
1078
1079 if (filter_is_callable) {
1080 /* pass match object through filter */
1081 match = pattern_new_match(self, &state, 1);
1082 if (!match)
1083 goto error;
1084 item = PyObject_CallOneArg(filter, match);
1085 Py_DECREF(match);
1086 if (!item)
1087 goto error;
1088 } else {
1089 /* filter is literal string */
1090 item = filter;
1091 Py_INCREF(item);
1092 }
1093
1094 /* add to list */
1095 if (item != Py_None) {
1096 status = PyList_Append(list, item);
1097 Py_DECREF(item);
1098 if (status < 0)
1099 goto error;
1100 }
1101
1102 i = e;
1103 n = n + 1;
1104 state.must_advance = (state.ptr == state.start);
1105 state.start = state.ptr;
1106 }
1107
1108 /* get segment following last match */
1109 if (i < state.endpos) {
1110 item = getslice(state.isbytes, state.beginning,
1111 string, i, state.endpos);
1112 if (!item)
1113 goto error;
1114 status = PyList_Append(list, item);
1115 Py_DECREF(item);
1116 if (status < 0)
1117 goto error;
1118 }
1119
1120 state_fini(&state);
1121
1122 Py_DECREF(filter);
1123
1124 /* convert list to single string (also removes list) */
1125 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1126 if (!joiner) {
1127 Py_DECREF(list);
1128 return NULL;
1129 }
1130 if (PyList_GET_SIZE(list) == 0) {
1131 Py_DECREF(list);
1132 item = joiner;
1133 }
1134 else {
1135 if (state.isbytes)
1136 item = _PyBytes_Join(joiner, list);
1137 else
1138 item = PyUnicode_Join(joiner, list);
1139 Py_DECREF(joiner);
1140 Py_DECREF(list);
1141 if (!item)
1142 return NULL;
1143 }
1144
1145 if (subn)
1146 return Py_BuildValue("Nn", item, n);
1147
1148 return item;
1149
1150 error:
1151 Py_DECREF(list);
1152 state_fini(&state);
1153 Py_DECREF(filter);
1154 return NULL;
1155
1156 }
1157
1158 /*[clinic input]
1159 _sre.SRE_Pattern.sub
1160
1161 repl: object
1162 string: object
1163 count: Py_ssize_t = 0
1164
1165 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1166 [clinic start generated code]*/
1167
1168 static PyObject *
_sre_SRE_Pattern_sub_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1169 _sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1170 PyObject *string, Py_ssize_t count)
1171 /*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1172 {
1173 return pattern_subx(self, repl, string, count, 0);
1174 }
1175
1176 /*[clinic input]
1177 _sre.SRE_Pattern.subn
1178
1179 repl: object
1180 string: object
1181 count: Py_ssize_t = 0
1182
1183 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1184 [clinic start generated code]*/
1185
1186 static PyObject *
_sre_SRE_Pattern_subn_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1187 _sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1188 PyObject *string, Py_ssize_t count)
1189 /*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1190 {
1191 return pattern_subx(self, repl, string, count, 1);
1192 }
1193
1194 /*[clinic input]
1195 _sre.SRE_Pattern.__copy__
1196
1197 [clinic start generated code]*/
1198
1199 static PyObject *
_sre_SRE_Pattern___copy___impl(PatternObject * self)1200 _sre_SRE_Pattern___copy___impl(PatternObject *self)
1201 /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1202 {
1203 Py_INCREF(self);
1204 return (PyObject *)self;
1205 }
1206
1207 /*[clinic input]
1208 _sre.SRE_Pattern.__deepcopy__
1209
1210 memo: object
1211 /
1212
1213 [clinic start generated code]*/
1214
1215 static PyObject *
_sre_SRE_Pattern___deepcopy__(PatternObject * self,PyObject * memo)1216 _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1217 /*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1218 {
1219 Py_INCREF(self);
1220 return (PyObject *)self;
1221 }
1222
1223 static PyObject *
pattern_repr(PatternObject * obj)1224 pattern_repr(PatternObject *obj)
1225 {
1226 static const struct {
1227 const char *name;
1228 int value;
1229 } flag_names[] = {
1230 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1231 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1232 {"re.LOCALE", SRE_FLAG_LOCALE},
1233 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1234 {"re.DOTALL", SRE_FLAG_DOTALL},
1235 {"re.UNICODE", SRE_FLAG_UNICODE},
1236 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1237 {"re.DEBUG", SRE_FLAG_DEBUG},
1238 {"re.ASCII", SRE_FLAG_ASCII},
1239 };
1240 PyObject *result = NULL;
1241 PyObject *flag_items;
1242 size_t i;
1243 int flags = obj->flags;
1244
1245 /* Omit re.UNICODE for valid string patterns. */
1246 if (obj->isbytes == 0 &&
1247 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1248 SRE_FLAG_UNICODE)
1249 flags &= ~SRE_FLAG_UNICODE;
1250
1251 flag_items = PyList_New(0);
1252 if (!flag_items)
1253 return NULL;
1254
1255 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1256 if (flags & flag_names[i].value) {
1257 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1258 if (!item)
1259 goto done;
1260
1261 if (PyList_Append(flag_items, item) < 0) {
1262 Py_DECREF(item);
1263 goto done;
1264 }
1265 Py_DECREF(item);
1266 flags &= ~flag_names[i].value;
1267 }
1268 }
1269 if (flags) {
1270 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1271 if (!item)
1272 goto done;
1273
1274 if (PyList_Append(flag_items, item) < 0) {
1275 Py_DECREF(item);
1276 goto done;
1277 }
1278 Py_DECREF(item);
1279 }
1280
1281 if (PyList_Size(flag_items) > 0) {
1282 PyObject *flags_result;
1283 PyObject *sep = PyUnicode_FromString("|");
1284 if (!sep)
1285 goto done;
1286 flags_result = PyUnicode_Join(sep, flag_items);
1287 Py_DECREF(sep);
1288 if (!flags_result)
1289 goto done;
1290 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1291 obj->pattern, flags_result);
1292 Py_DECREF(flags_result);
1293 }
1294 else {
1295 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1296 }
1297
1298 done:
1299 Py_DECREF(flag_items);
1300 return result;
1301 }
1302
1303 PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1304
1305 /* PatternObject's 'groupindex' method. */
1306 static PyObject *
pattern_groupindex(PatternObject * self,void * Py_UNUSED (ignored))1307 pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1308 {
1309 if (self->groupindex == NULL)
1310 return PyDict_New();
1311 return PyDictProxy_New(self->groupindex);
1312 }
1313
1314 static int _validate(PatternObject *self); /* Forward */
1315
1316 /*[clinic input]
1317 _sre.compile
1318
1319 pattern: object
1320 flags: int
1321 code: object(subclass_of='&PyList_Type')
1322 groups: Py_ssize_t
1323 groupindex: object(subclass_of='&PyDict_Type')
1324 indexgroup: object(subclass_of='&PyTuple_Type')
1325
1326 [clinic start generated code]*/
1327
1328 static PyObject *
_sre_compile_impl(PyObject * module,PyObject * pattern,int flags,PyObject * code,Py_ssize_t groups,PyObject * groupindex,PyObject * indexgroup)1329 _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1330 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1331 PyObject *indexgroup)
1332 /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1333 {
1334 /* "compile" pattern descriptor to pattern object */
1335
1336 PatternObject* self;
1337 Py_ssize_t i, n;
1338
1339 n = PyList_GET_SIZE(code);
1340 /* coverity[ampersand_in_size] */
1341 self = PyObject_NewVar(PatternObject, &Pattern_Type, n);
1342 if (!self)
1343 return NULL;
1344 self->weakreflist = NULL;
1345 self->pattern = NULL;
1346 self->groupindex = NULL;
1347 self->indexgroup = NULL;
1348
1349 self->codesize = n;
1350
1351 for (i = 0; i < n; i++) {
1352 PyObject *o = PyList_GET_ITEM(code, i);
1353 unsigned long value = PyLong_AsUnsignedLong(o);
1354 self->code[i] = (SRE_CODE) value;
1355 if ((unsigned long) self->code[i] != value) {
1356 PyErr_SetString(PyExc_OverflowError,
1357 "regular expression code size limit exceeded");
1358 break;
1359 }
1360 }
1361
1362 if (PyErr_Occurred()) {
1363 Py_DECREF(self);
1364 return NULL;
1365 }
1366
1367 if (pattern == Py_None) {
1368 self->isbytes = -1;
1369 }
1370 else {
1371 Py_ssize_t p_length;
1372 int charsize;
1373 Py_buffer view;
1374 view.buf = NULL;
1375 if (!getstring(pattern, &p_length, &self->isbytes,
1376 &charsize, &view)) {
1377 Py_DECREF(self);
1378 return NULL;
1379 }
1380 if (view.buf)
1381 PyBuffer_Release(&view);
1382 }
1383
1384 Py_INCREF(pattern);
1385 self->pattern = pattern;
1386
1387 self->flags = flags;
1388
1389 self->groups = groups;
1390
1391 if (PyDict_GET_SIZE(groupindex) > 0) {
1392 Py_INCREF(groupindex);
1393 self->groupindex = groupindex;
1394 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1395 Py_INCREF(indexgroup);
1396 self->indexgroup = indexgroup;
1397 }
1398 }
1399
1400 if (!_validate(self)) {
1401 Py_DECREF(self);
1402 return NULL;
1403 }
1404
1405 return (PyObject*) self;
1406 }
1407
1408 /* -------------------------------------------------------------------- */
1409 /* Code validation */
1410
1411 /* To learn more about this code, have a look at the _compile() function in
1412 Lib/sre_compile.py. The validation functions below checks the code array
1413 for conformance with the code patterns generated there.
1414
1415 The nice thing about the generated code is that it is position-independent:
1416 all jumps are relative jumps forward. Also, jumps don't cross each other:
1417 the target of a later jump is always earlier than the target of an earlier
1418 jump. IOW, this is okay:
1419
1420 J---------J-------T--------T
1421 \ \_____/ /
1422 \______________________/
1423
1424 but this is not:
1425
1426 J---------J-------T--------T
1427 \_________\_____/ /
1428 \____________/
1429
1430 It also helps that SRE_CODE is always an unsigned type.
1431 */
1432
1433 /* Defining this one enables tracing of the validator */
1434 #undef VVERBOSE
1435
1436 /* Trace macro for the validator */
1437 #if defined(VVERBOSE)
1438 #define VTRACE(v) printf v
1439 #else
1440 #define VTRACE(v) do {} while(0) /* do nothing */
1441 #endif
1442
1443 /* Report failure */
1444 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1445
1446 /* Extract opcode, argument, or skip count from code array */
1447 #define GET_OP \
1448 do { \
1449 VTRACE(("%p: ", code)); \
1450 if (code >= end) FAIL; \
1451 op = *code++; \
1452 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1453 } while (0)
1454 #define GET_ARG \
1455 do { \
1456 VTRACE(("%p= ", code)); \
1457 if (code >= end) FAIL; \
1458 arg = *code++; \
1459 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1460 } while (0)
1461 #define GET_SKIP_ADJ(adj) \
1462 do { \
1463 VTRACE(("%p= ", code)); \
1464 if (code >= end) FAIL; \
1465 skip = *code; \
1466 VTRACE(("%lu (skip to %p)\n", \
1467 (unsigned long)skip, code+skip)); \
1468 if (skip-adj > (uintptr_t)(end - code)) \
1469 FAIL; \
1470 code++; \
1471 } while (0)
1472 #define GET_SKIP GET_SKIP_ADJ(0)
1473
1474 static int
_validate_charset(SRE_CODE * code,SRE_CODE * end)1475 _validate_charset(SRE_CODE *code, SRE_CODE *end)
1476 {
1477 /* Some variables are manipulated by the macros above */
1478 SRE_CODE op;
1479 SRE_CODE arg;
1480 SRE_CODE offset;
1481 int i;
1482
1483 while (code < end) {
1484 GET_OP;
1485 switch (op) {
1486
1487 case SRE_OP_NEGATE:
1488 break;
1489
1490 case SRE_OP_LITERAL:
1491 GET_ARG;
1492 break;
1493
1494 case SRE_OP_RANGE:
1495 case SRE_OP_RANGE_UNI_IGNORE:
1496 GET_ARG;
1497 GET_ARG;
1498 break;
1499
1500 case SRE_OP_CHARSET:
1501 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1502 if (offset > (uintptr_t)(end - code))
1503 FAIL;
1504 code += offset;
1505 break;
1506
1507 case SRE_OP_BIGCHARSET:
1508 GET_ARG; /* Number of blocks */
1509 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1510 if (offset > (uintptr_t)(end - code))
1511 FAIL;
1512 /* Make sure that each byte points to a valid block */
1513 for (i = 0; i < 256; i++) {
1514 if (((unsigned char *)code)[i] >= arg)
1515 FAIL;
1516 }
1517 code += offset;
1518 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1519 if (offset > (uintptr_t)(end - code))
1520 FAIL;
1521 code += offset;
1522 break;
1523
1524 case SRE_OP_CATEGORY:
1525 GET_ARG;
1526 switch (arg) {
1527 case SRE_CATEGORY_DIGIT:
1528 case SRE_CATEGORY_NOT_DIGIT:
1529 case SRE_CATEGORY_SPACE:
1530 case SRE_CATEGORY_NOT_SPACE:
1531 case SRE_CATEGORY_WORD:
1532 case SRE_CATEGORY_NOT_WORD:
1533 case SRE_CATEGORY_LINEBREAK:
1534 case SRE_CATEGORY_NOT_LINEBREAK:
1535 case SRE_CATEGORY_LOC_WORD:
1536 case SRE_CATEGORY_LOC_NOT_WORD:
1537 case SRE_CATEGORY_UNI_DIGIT:
1538 case SRE_CATEGORY_UNI_NOT_DIGIT:
1539 case SRE_CATEGORY_UNI_SPACE:
1540 case SRE_CATEGORY_UNI_NOT_SPACE:
1541 case SRE_CATEGORY_UNI_WORD:
1542 case SRE_CATEGORY_UNI_NOT_WORD:
1543 case SRE_CATEGORY_UNI_LINEBREAK:
1544 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1545 break;
1546 default:
1547 FAIL;
1548 }
1549 break;
1550
1551 default:
1552 FAIL;
1553
1554 }
1555 }
1556
1557 return 1;
1558 }
1559
1560 static int
_validate_inner(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1561 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1562 {
1563 /* Some variables are manipulated by the macros above */
1564 SRE_CODE op;
1565 SRE_CODE arg;
1566 SRE_CODE skip;
1567
1568 VTRACE(("code=%p, end=%p\n", code, end));
1569
1570 if (code > end)
1571 FAIL;
1572
1573 while (code < end) {
1574 GET_OP;
1575 switch (op) {
1576
1577 case SRE_OP_MARK:
1578 /* We don't check whether marks are properly nested; the
1579 sre_match() code is robust even if they don't, and the worst
1580 you can get is nonsensical match results. */
1581 GET_ARG;
1582 if (arg > 2 * (size_t)groups + 1) {
1583 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1584 FAIL;
1585 }
1586 break;
1587
1588 case SRE_OP_LITERAL:
1589 case SRE_OP_NOT_LITERAL:
1590 case SRE_OP_LITERAL_IGNORE:
1591 case SRE_OP_NOT_LITERAL_IGNORE:
1592 case SRE_OP_LITERAL_UNI_IGNORE:
1593 case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1594 case SRE_OP_LITERAL_LOC_IGNORE:
1595 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1596 GET_ARG;
1597 /* The arg is just a character, nothing to check */
1598 break;
1599
1600 case SRE_OP_SUCCESS:
1601 case SRE_OP_FAILURE:
1602 /* Nothing to check; these normally end the matching process */
1603 break;
1604
1605 case SRE_OP_AT:
1606 GET_ARG;
1607 switch (arg) {
1608 case SRE_AT_BEGINNING:
1609 case SRE_AT_BEGINNING_STRING:
1610 case SRE_AT_BEGINNING_LINE:
1611 case SRE_AT_END:
1612 case SRE_AT_END_LINE:
1613 case SRE_AT_END_STRING:
1614 case SRE_AT_BOUNDARY:
1615 case SRE_AT_NON_BOUNDARY:
1616 case SRE_AT_LOC_BOUNDARY:
1617 case SRE_AT_LOC_NON_BOUNDARY:
1618 case SRE_AT_UNI_BOUNDARY:
1619 case SRE_AT_UNI_NON_BOUNDARY:
1620 break;
1621 default:
1622 FAIL;
1623 }
1624 break;
1625
1626 case SRE_OP_ANY:
1627 case SRE_OP_ANY_ALL:
1628 /* These have no operands */
1629 break;
1630
1631 case SRE_OP_IN:
1632 case SRE_OP_IN_IGNORE:
1633 case SRE_OP_IN_UNI_IGNORE:
1634 case SRE_OP_IN_LOC_IGNORE:
1635 GET_SKIP;
1636 /* Stop 1 before the end; we check the FAILURE below */
1637 if (!_validate_charset(code, code+skip-2))
1638 FAIL;
1639 if (code[skip-2] != SRE_OP_FAILURE)
1640 FAIL;
1641 code += skip-1;
1642 break;
1643
1644 case SRE_OP_INFO:
1645 {
1646 /* A minimal info field is
1647 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1648 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1649 more follows. */
1650 SRE_CODE flags, i;
1651 SRE_CODE *newcode;
1652 GET_SKIP;
1653 newcode = code+skip-1;
1654 GET_ARG; flags = arg;
1655 GET_ARG;
1656 GET_ARG;
1657 /* Check that only valid flags are present */
1658 if ((flags & ~(SRE_INFO_PREFIX |
1659 SRE_INFO_LITERAL |
1660 SRE_INFO_CHARSET)) != 0)
1661 FAIL;
1662 /* PREFIX and CHARSET are mutually exclusive */
1663 if ((flags & SRE_INFO_PREFIX) &&
1664 (flags & SRE_INFO_CHARSET))
1665 FAIL;
1666 /* LITERAL implies PREFIX */
1667 if ((flags & SRE_INFO_LITERAL) &&
1668 !(flags & SRE_INFO_PREFIX))
1669 FAIL;
1670 /* Validate the prefix */
1671 if (flags & SRE_INFO_PREFIX) {
1672 SRE_CODE prefix_len;
1673 GET_ARG; prefix_len = arg;
1674 GET_ARG;
1675 /* Here comes the prefix string */
1676 if (prefix_len > (uintptr_t)(newcode - code))
1677 FAIL;
1678 code += prefix_len;
1679 /* And here comes the overlap table */
1680 if (prefix_len > (uintptr_t)(newcode - code))
1681 FAIL;
1682 /* Each overlap value should be < prefix_len */
1683 for (i = 0; i < prefix_len; i++) {
1684 if (code[i] >= prefix_len)
1685 FAIL;
1686 }
1687 code += prefix_len;
1688 }
1689 /* Validate the charset */
1690 if (flags & SRE_INFO_CHARSET) {
1691 if (!_validate_charset(code, newcode-1))
1692 FAIL;
1693 if (newcode[-1] != SRE_OP_FAILURE)
1694 FAIL;
1695 code = newcode;
1696 }
1697 else if (code != newcode) {
1698 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1699 FAIL;
1700 }
1701 }
1702 break;
1703
1704 case SRE_OP_BRANCH:
1705 {
1706 SRE_CODE *target = NULL;
1707 for (;;) {
1708 GET_SKIP;
1709 if (skip == 0)
1710 break;
1711 /* Stop 2 before the end; we check the JUMP below */
1712 if (!_validate_inner(code, code+skip-3, groups))
1713 FAIL;
1714 code += skip-3;
1715 /* Check that it ends with a JUMP, and that each JUMP
1716 has the same target */
1717 GET_OP;
1718 if (op != SRE_OP_JUMP)
1719 FAIL;
1720 GET_SKIP;
1721 if (target == NULL)
1722 target = code+skip-1;
1723 else if (code+skip-1 != target)
1724 FAIL;
1725 }
1726 }
1727 break;
1728
1729 case SRE_OP_REPEAT_ONE:
1730 case SRE_OP_MIN_REPEAT_ONE:
1731 {
1732 SRE_CODE min, max;
1733 GET_SKIP;
1734 GET_ARG; min = arg;
1735 GET_ARG; max = arg;
1736 if (min > max)
1737 FAIL;
1738 if (max > SRE_MAXREPEAT)
1739 FAIL;
1740 if (!_validate_inner(code, code+skip-4, groups))
1741 FAIL;
1742 code += skip-4;
1743 GET_OP;
1744 if (op != SRE_OP_SUCCESS)
1745 FAIL;
1746 }
1747 break;
1748
1749 case SRE_OP_REPEAT:
1750 {
1751 SRE_CODE min, max;
1752 GET_SKIP;
1753 GET_ARG; min = arg;
1754 GET_ARG; max = arg;
1755 if (min > max)
1756 FAIL;
1757 if (max > SRE_MAXREPEAT)
1758 FAIL;
1759 if (!_validate_inner(code, code+skip-3, groups))
1760 FAIL;
1761 code += skip-3;
1762 GET_OP;
1763 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1764 FAIL;
1765 }
1766 break;
1767
1768 case SRE_OP_GROUPREF:
1769 case SRE_OP_GROUPREF_IGNORE:
1770 case SRE_OP_GROUPREF_UNI_IGNORE:
1771 case SRE_OP_GROUPREF_LOC_IGNORE:
1772 GET_ARG;
1773 if (arg >= (size_t)groups)
1774 FAIL;
1775 break;
1776
1777 case SRE_OP_GROUPREF_EXISTS:
1778 /* The regex syntax for this is: '(?(group)then|else)', where
1779 'group' is either an integer group number or a group name,
1780 'then' and 'else' are sub-regexes, and 'else' is optional. */
1781 GET_ARG;
1782 if (arg >= (size_t)groups)
1783 FAIL;
1784 GET_SKIP_ADJ(1);
1785 code--; /* The skip is relative to the first arg! */
1786 /* There are two possibilities here: if there is both a 'then'
1787 part and an 'else' part, the generated code looks like:
1788
1789 GROUPREF_EXISTS
1790 <group>
1791 <skipyes>
1792 ...then part...
1793 JUMP
1794 <skipno>
1795 (<skipyes> jumps here)
1796 ...else part...
1797 (<skipno> jumps here)
1798
1799 If there is only a 'then' part, it looks like:
1800
1801 GROUPREF_EXISTS
1802 <group>
1803 <skip>
1804 ...then part...
1805 (<skip> jumps here)
1806
1807 There is no direct way to decide which it is, and we don't want
1808 to allow arbitrary jumps anywhere in the code; so we just look
1809 for a JUMP opcode preceding our skip target.
1810 */
1811 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
1812 code[skip-3] == SRE_OP_JUMP)
1813 {
1814 VTRACE(("both then and else parts present\n"));
1815 if (!_validate_inner(code+1, code+skip-3, groups))
1816 FAIL;
1817 code += skip-2; /* Position after JUMP, at <skipno> */
1818 GET_SKIP;
1819 if (!_validate_inner(code, code+skip-1, groups))
1820 FAIL;
1821 code += skip-1;
1822 }
1823 else {
1824 VTRACE(("only a then part present\n"));
1825 if (!_validate_inner(code+1, code+skip-1, groups))
1826 FAIL;
1827 code += skip-1;
1828 }
1829 break;
1830
1831 case SRE_OP_ASSERT:
1832 case SRE_OP_ASSERT_NOT:
1833 GET_SKIP;
1834 GET_ARG; /* 0 for lookahead, width for lookbehind */
1835 code--; /* Back up over arg to simplify math below */
1836 if (arg & 0x80000000)
1837 FAIL; /* Width too large */
1838 /* Stop 1 before the end; we check the SUCCESS below */
1839 if (!_validate_inner(code+1, code+skip-2, groups))
1840 FAIL;
1841 code += skip-2;
1842 GET_OP;
1843 if (op != SRE_OP_SUCCESS)
1844 FAIL;
1845 break;
1846
1847 default:
1848 FAIL;
1849
1850 }
1851 }
1852
1853 VTRACE(("okay\n"));
1854 return 1;
1855 }
1856
1857 static int
_validate_outer(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1858 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1859 {
1860 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1861 code >= end || end[-1] != SRE_OP_SUCCESS)
1862 FAIL;
1863 return _validate_inner(code, end-1, groups);
1864 }
1865
1866 static int
_validate(PatternObject * self)1867 _validate(PatternObject *self)
1868 {
1869 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1870 {
1871 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1872 return 0;
1873 }
1874 else
1875 VTRACE(("Success!\n"));
1876 return 1;
1877 }
1878
1879 /* -------------------------------------------------------------------- */
1880 /* match methods */
1881
1882 static void
match_dealloc(MatchObject * self)1883 match_dealloc(MatchObject* self)
1884 {
1885 Py_XDECREF(self->regs);
1886 Py_XDECREF(self->string);
1887 Py_DECREF(self->pattern);
1888 PyObject_DEL(self);
1889 }
1890
1891 static PyObject*
match_getslice_by_index(MatchObject * self,Py_ssize_t index,PyObject * def)1892 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
1893 {
1894 Py_ssize_t length;
1895 int isbytes, charsize;
1896 Py_buffer view;
1897 PyObject *result;
1898 const void* ptr;
1899 Py_ssize_t i, j;
1900
1901 assert(0 <= index && index < self->groups);
1902 index *= 2;
1903
1904 if (self->string == Py_None || self->mark[index] < 0) {
1905 /* return default value if the string or group is undefined */
1906 Py_INCREF(def);
1907 return def;
1908 }
1909
1910 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
1911 if (ptr == NULL)
1912 return NULL;
1913
1914 i = self->mark[index];
1915 j = self->mark[index+1];
1916 i = Py_MIN(i, length);
1917 j = Py_MIN(j, length);
1918 result = getslice(isbytes, ptr, self->string, i, j);
1919 if (isbytes && view.buf != NULL)
1920 PyBuffer_Release(&view);
1921 return result;
1922 }
1923
1924 static Py_ssize_t
match_getindex(MatchObject * self,PyObject * index)1925 match_getindex(MatchObject* self, PyObject* index)
1926 {
1927 Py_ssize_t i;
1928
1929 if (index == NULL)
1930 /* Default value */
1931 return 0;
1932
1933 if (PyIndex_Check(index)) {
1934 i = PyNumber_AsSsize_t(index, NULL);
1935 }
1936 else {
1937 i = -1;
1938
1939 if (self->pattern->groupindex) {
1940 index = PyDict_GetItemWithError(self->pattern->groupindex, index);
1941 if (index && PyLong_Check(index)) {
1942 i = PyLong_AsSsize_t(index);
1943 }
1944 }
1945 }
1946 if (i < 0 || i >= self->groups) {
1947 /* raise IndexError if we were given a bad group number */
1948 if (!PyErr_Occurred()) {
1949 PyErr_SetString(PyExc_IndexError, "no such group");
1950 }
1951 return -1;
1952 }
1953
1954 return i;
1955 }
1956
1957 static PyObject*
match_getslice(MatchObject * self,PyObject * index,PyObject * def)1958 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
1959 {
1960 Py_ssize_t i = match_getindex(self, index);
1961
1962 if (i < 0) {
1963 return NULL;
1964 }
1965
1966 return match_getslice_by_index(self, i, def);
1967 }
1968
1969 /*[clinic input]
1970 _sre.SRE_Match.expand
1971
1972 template: object
1973
1974 Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1975 [clinic start generated code]*/
1976
1977 static PyObject *
_sre_SRE_Match_expand_impl(MatchObject * self,PyObject * template)1978 _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1979 /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
1980 {
1981 /* delegate to Python code */
1982 return call(
1983 SRE_PY_MODULE, "_expand",
1984 PyTuple_Pack(3, self->pattern, self, template)
1985 );
1986 }
1987
1988 static PyObject*
match_group(MatchObject * self,PyObject * args)1989 match_group(MatchObject* self, PyObject* args)
1990 {
1991 PyObject* result;
1992 Py_ssize_t i, size;
1993
1994 size = PyTuple_GET_SIZE(args);
1995
1996 switch (size) {
1997 case 0:
1998 result = match_getslice(self, _PyLong_Zero, Py_None);
1999 break;
2000 case 1:
2001 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2002 break;
2003 default:
2004 /* fetch multiple items */
2005 result = PyTuple_New(size);
2006 if (!result)
2007 return NULL;
2008 for (i = 0; i < size; i++) {
2009 PyObject* item = match_getslice(
2010 self, PyTuple_GET_ITEM(args, i), Py_None
2011 );
2012 if (!item) {
2013 Py_DECREF(result);
2014 return NULL;
2015 }
2016 PyTuple_SET_ITEM(result, i, item);
2017 }
2018 break;
2019 }
2020 return result;
2021 }
2022
2023 static PyObject*
match_getitem(MatchObject * self,PyObject * name)2024 match_getitem(MatchObject* self, PyObject* name)
2025 {
2026 return match_getslice(self, name, Py_None);
2027 }
2028
2029 /*[clinic input]
2030 _sre.SRE_Match.groups
2031
2032 default: object = None
2033 Is used for groups that did not participate in the match.
2034
2035 Return a tuple containing all the subgroups of the match, from 1.
2036 [clinic start generated code]*/
2037
2038 static PyObject *
_sre_SRE_Match_groups_impl(MatchObject * self,PyObject * default_value)2039 _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2040 /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2041 {
2042 PyObject* result;
2043 Py_ssize_t index;
2044
2045 result = PyTuple_New(self->groups-1);
2046 if (!result)
2047 return NULL;
2048
2049 for (index = 1; index < self->groups; index++) {
2050 PyObject* item;
2051 item = match_getslice_by_index(self, index, default_value);
2052 if (!item) {
2053 Py_DECREF(result);
2054 return NULL;
2055 }
2056 PyTuple_SET_ITEM(result, index-1, item);
2057 }
2058
2059 return result;
2060 }
2061
2062 /*[clinic input]
2063 _sre.SRE_Match.groupdict
2064
2065 default: object = None
2066 Is used for groups that did not participate in the match.
2067
2068 Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2069 [clinic start generated code]*/
2070
2071 static PyObject *
_sre_SRE_Match_groupdict_impl(MatchObject * self,PyObject * default_value)2072 _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2073 /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2074 {
2075 PyObject *result;
2076 PyObject *key;
2077 PyObject *value;
2078 Py_ssize_t pos = 0;
2079 Py_hash_t hash;
2080
2081 result = PyDict_New();
2082 if (!result || !self->pattern->groupindex)
2083 return result;
2084
2085 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2086 int status;
2087 Py_INCREF(key);
2088 value = match_getslice(self, key, default_value);
2089 if (!value) {
2090 Py_DECREF(key);
2091 goto failed;
2092 }
2093 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2094 Py_DECREF(value);
2095 Py_DECREF(key);
2096 if (status < 0)
2097 goto failed;
2098 }
2099
2100 return result;
2101
2102 failed:
2103 Py_DECREF(result);
2104 return NULL;
2105 }
2106
2107 /*[clinic input]
2108 _sre.SRE_Match.start -> Py_ssize_t
2109
2110 group: object(c_default="NULL") = 0
2111 /
2112
2113 Return index of the start of the substring matched by group.
2114 [clinic start generated code]*/
2115
2116 static Py_ssize_t
_sre_SRE_Match_start_impl(MatchObject * self,PyObject * group)2117 _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2118 /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2119 {
2120 Py_ssize_t index = match_getindex(self, group);
2121
2122 if (index < 0) {
2123 return -1;
2124 }
2125
2126 /* mark is -1 if group is undefined */
2127 return self->mark[index*2];
2128 }
2129
2130 /*[clinic input]
2131 _sre.SRE_Match.end -> Py_ssize_t
2132
2133 group: object(c_default="NULL") = 0
2134 /
2135
2136 Return index of the end of the substring matched by group.
2137 [clinic start generated code]*/
2138
2139 static Py_ssize_t
_sre_SRE_Match_end_impl(MatchObject * self,PyObject * group)2140 _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2141 /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2142 {
2143 Py_ssize_t index = match_getindex(self, group);
2144
2145 if (index < 0) {
2146 return -1;
2147 }
2148
2149 /* mark is -1 if group is undefined */
2150 return self->mark[index*2+1];
2151 }
2152
2153 LOCAL(PyObject*)
_pair(Py_ssize_t i1,Py_ssize_t i2)2154 _pair(Py_ssize_t i1, Py_ssize_t i2)
2155 {
2156 PyObject* pair;
2157 PyObject* item;
2158
2159 pair = PyTuple_New(2);
2160 if (!pair)
2161 return NULL;
2162
2163 item = PyLong_FromSsize_t(i1);
2164 if (!item)
2165 goto error;
2166 PyTuple_SET_ITEM(pair, 0, item);
2167
2168 item = PyLong_FromSsize_t(i2);
2169 if (!item)
2170 goto error;
2171 PyTuple_SET_ITEM(pair, 1, item);
2172
2173 return pair;
2174
2175 error:
2176 Py_DECREF(pair);
2177 return NULL;
2178 }
2179
2180 /*[clinic input]
2181 _sre.SRE_Match.span
2182
2183 group: object(c_default="NULL") = 0
2184 /
2185
2186 For match object m, return the 2-tuple (m.start(group), m.end(group)).
2187 [clinic start generated code]*/
2188
2189 static PyObject *
_sre_SRE_Match_span_impl(MatchObject * self,PyObject * group)2190 _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2191 /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2192 {
2193 Py_ssize_t index = match_getindex(self, group);
2194
2195 if (index < 0) {
2196 return NULL;
2197 }
2198
2199 /* marks are -1 if group is undefined */
2200 return _pair(self->mark[index*2], self->mark[index*2+1]);
2201 }
2202
2203 static PyObject*
match_regs(MatchObject * self)2204 match_regs(MatchObject* self)
2205 {
2206 PyObject* regs;
2207 PyObject* item;
2208 Py_ssize_t index;
2209
2210 regs = PyTuple_New(self->groups);
2211 if (!regs)
2212 return NULL;
2213
2214 for (index = 0; index < self->groups; index++) {
2215 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2216 if (!item) {
2217 Py_DECREF(regs);
2218 return NULL;
2219 }
2220 PyTuple_SET_ITEM(regs, index, item);
2221 }
2222
2223 Py_INCREF(regs);
2224 self->regs = regs;
2225
2226 return regs;
2227 }
2228
2229 /*[clinic input]
2230 _sre.SRE_Match.__copy__
2231
2232 [clinic start generated code]*/
2233
2234 static PyObject *
_sre_SRE_Match___copy___impl(MatchObject * self)2235 _sre_SRE_Match___copy___impl(MatchObject *self)
2236 /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2237 {
2238 Py_INCREF(self);
2239 return (PyObject *)self;
2240 }
2241
2242 /*[clinic input]
2243 _sre.SRE_Match.__deepcopy__
2244
2245 memo: object
2246 /
2247
2248 [clinic start generated code]*/
2249
2250 static PyObject *
_sre_SRE_Match___deepcopy__(MatchObject * self,PyObject * memo)2251 _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2252 /*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2253 {
2254 Py_INCREF(self);
2255 return (PyObject *)self;
2256 }
2257
2258 PyDoc_STRVAR(match_doc,
2259 "The result of re.match() and re.search().\n\
2260 Match objects always have a boolean value of True.");
2261
2262 PyDoc_STRVAR(match_group_doc,
2263 "group([group1, ...]) -> str or tuple.\n\
2264 Return subgroup(s) of the match by indices or names.\n\
2265 For 0 returns the entire match.");
2266
2267 static PyObject *
match_lastindex_get(MatchObject * self,void * Py_UNUSED (ignored))2268 match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2269 {
2270 if (self->lastindex >= 0)
2271 return PyLong_FromSsize_t(self->lastindex);
2272 Py_RETURN_NONE;
2273 }
2274
2275 static PyObject *
match_lastgroup_get(MatchObject * self,void * Py_UNUSED (ignored))2276 match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2277 {
2278 if (self->pattern->indexgroup &&
2279 self->lastindex >= 0 &&
2280 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2281 {
2282 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2283 self->lastindex);
2284 Py_INCREF(result);
2285 return result;
2286 }
2287 Py_RETURN_NONE;
2288 }
2289
2290 static PyObject *
match_regs_get(MatchObject * self,void * Py_UNUSED (ignored))2291 match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2292 {
2293 if (self->regs) {
2294 Py_INCREF(self->regs);
2295 return self->regs;
2296 } else
2297 return match_regs(self);
2298 }
2299
2300 static PyObject *
match_repr(MatchObject * self)2301 match_repr(MatchObject *self)
2302 {
2303 PyObject *result;
2304 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2305 if (group0 == NULL)
2306 return NULL;
2307 result = PyUnicode_FromFormat(
2308 "<%s object; span=(%zd, %zd), match=%.50R>",
2309 Py_TYPE(self)->tp_name,
2310 self->mark[0], self->mark[1], group0);
2311 Py_DECREF(group0);
2312 return result;
2313 }
2314
2315
2316 static PyObject*
pattern_new_match(PatternObject * pattern,SRE_STATE * state,Py_ssize_t status)2317 pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
2318 {
2319 /* create match object (from state object) */
2320
2321 MatchObject* match;
2322 Py_ssize_t i, j;
2323 char* base;
2324 int n;
2325
2326 if (status > 0) {
2327
2328 /* create match object (with room for extra group marks) */
2329 /* coverity[ampersand_in_size] */
2330 match = PyObject_NewVar(MatchObject, &Match_Type,
2331 2*(pattern->groups+1));
2332 if (!match)
2333 return NULL;
2334
2335 Py_INCREF(pattern);
2336 match->pattern = pattern;
2337
2338 Py_INCREF(state->string);
2339 match->string = state->string;
2340
2341 match->regs = NULL;
2342 match->groups = pattern->groups+1;
2343
2344 /* fill in group slices */
2345
2346 base = (char*) state->beginning;
2347 n = state->charsize;
2348
2349 match->mark[0] = ((char*) state->start - base) / n;
2350 match->mark[1] = ((char*) state->ptr - base) / n;
2351
2352 for (i = j = 0; i < pattern->groups; i++, j+=2)
2353 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2354 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2355 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2356 } else
2357 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2358
2359 match->pos = state->pos;
2360 match->endpos = state->endpos;
2361
2362 match->lastindex = state->lastindex;
2363
2364 return (PyObject*) match;
2365
2366 } else if (status == 0) {
2367
2368 /* no match */
2369 Py_RETURN_NONE;
2370
2371 }
2372
2373 /* internal error */
2374 pattern_error(status);
2375 return NULL;
2376 }
2377
2378
2379 /* -------------------------------------------------------------------- */
2380 /* scanner methods (experimental) */
2381
2382 static void
scanner_dealloc(ScannerObject * self)2383 scanner_dealloc(ScannerObject* self)
2384 {
2385 state_fini(&self->state);
2386 Py_XDECREF(self->pattern);
2387 PyObject_DEL(self);
2388 }
2389
2390 /*[clinic input]
2391 _sre.SRE_Scanner.match
2392
2393 [clinic start generated code]*/
2394
2395 static PyObject *
_sre_SRE_Scanner_match_impl(ScannerObject * self)2396 _sre_SRE_Scanner_match_impl(ScannerObject *self)
2397 /*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
2398 {
2399 SRE_STATE* state = &self->state;
2400 PyObject* match;
2401 Py_ssize_t status;
2402
2403 if (state->start == NULL)
2404 Py_RETURN_NONE;
2405
2406 state_reset(state);
2407
2408 state->ptr = state->start;
2409
2410 status = sre_match(state, PatternObject_GetCode(self->pattern));
2411 if (PyErr_Occurred())
2412 return NULL;
2413
2414 match = pattern_new_match((PatternObject*) self->pattern,
2415 state, status);
2416
2417 if (status == 0)
2418 state->start = NULL;
2419 else {
2420 state->must_advance = (state->ptr == state->start);
2421 state->start = state->ptr;
2422 }
2423
2424 return match;
2425 }
2426
2427
2428 /*[clinic input]
2429 _sre.SRE_Scanner.search
2430
2431 [clinic start generated code]*/
2432
2433 static PyObject *
_sre_SRE_Scanner_search_impl(ScannerObject * self)2434 _sre_SRE_Scanner_search_impl(ScannerObject *self)
2435 /*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
2436 {
2437 SRE_STATE* state = &self->state;
2438 PyObject* match;
2439 Py_ssize_t status;
2440
2441 if (state->start == NULL)
2442 Py_RETURN_NONE;
2443
2444 state_reset(state);
2445
2446 state->ptr = state->start;
2447
2448 status = sre_search(state, PatternObject_GetCode(self->pattern));
2449 if (PyErr_Occurred())
2450 return NULL;
2451
2452 match = pattern_new_match((PatternObject*) self->pattern,
2453 state, status);
2454
2455 if (status == 0)
2456 state->start = NULL;
2457 else {
2458 state->must_advance = (state->ptr == state->start);
2459 state->start = state->ptr;
2460 }
2461
2462 return match;
2463 }
2464
2465 static PyObject *
pattern_scanner(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)2466 pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
2467 {
2468 ScannerObject* scanner;
2469
2470 /* create scanner object */
2471 scanner = PyObject_New(ScannerObject, &Scanner_Type);
2472 if (!scanner)
2473 return NULL;
2474 scanner->pattern = NULL;
2475
2476 /* create search state object */
2477 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2478 Py_DECREF(scanner);
2479 return NULL;
2480 }
2481
2482 Py_INCREF(self);
2483 scanner->pattern = (PyObject*) self;
2484
2485 return (PyObject*) scanner;
2486 }
2487
2488 static Py_hash_t
pattern_hash(PatternObject * self)2489 pattern_hash(PatternObject *self)
2490 {
2491 Py_hash_t hash, hash2;
2492
2493 hash = PyObject_Hash(self->pattern);
2494 if (hash == -1) {
2495 return -1;
2496 }
2497
2498 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2499 hash ^= hash2;
2500
2501 hash ^= self->flags;
2502 hash ^= self->isbytes;
2503 hash ^= self->codesize;
2504
2505 if (hash == -1) {
2506 hash = -2;
2507 }
2508 return hash;
2509 }
2510
2511 static PyObject*
pattern_richcompare(PyObject * lefto,PyObject * righto,int op)2512 pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2513 {
2514 PatternObject *left, *right;
2515 int cmp;
2516
2517 if (op != Py_EQ && op != Py_NE) {
2518 Py_RETURN_NOTIMPLEMENTED;
2519 }
2520
2521 if (!Py_IS_TYPE(lefto, &Pattern_Type) || !Py_IS_TYPE(righto, &Pattern_Type)) {
2522 Py_RETURN_NOTIMPLEMENTED;
2523 }
2524
2525 if (lefto == righto) {
2526 /* a pattern is equal to itself */
2527 return PyBool_FromLong(op == Py_EQ);
2528 }
2529
2530 left = (PatternObject *)lefto;
2531 right = (PatternObject *)righto;
2532
2533 cmp = (left->flags == right->flags
2534 && left->isbytes == right->isbytes
2535 && left->codesize == right->codesize);
2536 if (cmp) {
2537 /* Compare the code and the pattern because the same pattern can
2538 produce different codes depending on the locale used to compile the
2539 pattern when the re.LOCALE flag is used. Don't compare groups,
2540 indexgroup nor groupindex: they are derivated from the pattern. */
2541 cmp = (memcmp(left->code, right->code,
2542 sizeof(left->code[0]) * left->codesize) == 0);
2543 }
2544 if (cmp) {
2545 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2546 Py_EQ);
2547 if (cmp < 0) {
2548 return NULL;
2549 }
2550 }
2551 if (op == Py_NE) {
2552 cmp = !cmp;
2553 }
2554 return PyBool_FromLong(cmp);
2555 }
2556
2557 #include "clinic/_sre.c.h"
2558
2559 static PyMethodDef pattern_methods[] = {
2560 _SRE_SRE_PATTERN_MATCH_METHODDEF
2561 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2562 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2563 _SRE_SRE_PATTERN_SUB_METHODDEF
2564 _SRE_SRE_PATTERN_SUBN_METHODDEF
2565 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2566 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2567 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2568 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2569 _SRE_SRE_PATTERN___COPY___METHODDEF
2570 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2571 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS,
2572 PyDoc_STR("See PEP 585")},
2573 {NULL, NULL}
2574 };
2575
2576 static PyGetSetDef pattern_getset[] = {
2577 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2578 "A dictionary mapping group names to group numbers."},
2579 {NULL} /* Sentinel */
2580 };
2581
2582 #define PAT_OFF(x) offsetof(PatternObject, x)
2583 static PyMemberDef pattern_members[] = {
2584 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
2585 "The pattern string from which the RE object was compiled."},
2586 {"flags", T_INT, PAT_OFF(flags), READONLY,
2587 "The regex matching flags."},
2588 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
2589 "The number of capturing groups in the pattern."},
2590 {NULL} /* Sentinel */
2591 };
2592
2593 static PyTypeObject Pattern_Type = {
2594 PyVarObject_HEAD_INIT(NULL, 0)
2595 "re.Pattern",
2596 sizeof(PatternObject), sizeof(SRE_CODE),
2597 (destructor)pattern_dealloc, /* tp_dealloc */
2598 0, /* tp_vectorcall_offset */
2599 0, /* tp_getattr */
2600 0, /* tp_setattr */
2601 0, /* tp_as_async */
2602 (reprfunc)pattern_repr, /* tp_repr */
2603 0, /* tp_as_number */
2604 0, /* tp_as_sequence */
2605 0, /* tp_as_mapping */
2606 (hashfunc)pattern_hash, /* tp_hash */
2607 0, /* tp_call */
2608 0, /* tp_str */
2609 0, /* tp_getattro */
2610 0, /* tp_setattro */
2611 0, /* tp_as_buffer */
2612 Py_TPFLAGS_DEFAULT, /* tp_flags */
2613 pattern_doc, /* tp_doc */
2614 0, /* tp_traverse */
2615 0, /* tp_clear */
2616 pattern_richcompare, /* tp_richcompare */
2617 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2618 0, /* tp_iter */
2619 0, /* tp_iternext */
2620 pattern_methods, /* tp_methods */
2621 pattern_members, /* tp_members */
2622 pattern_getset, /* tp_getset */
2623 };
2624
2625 /* Match objects do not support length or assignment, but do support
2626 __getitem__. */
2627 static PyMappingMethods match_as_mapping = {
2628 NULL,
2629 (binaryfunc)match_getitem,
2630 NULL
2631 };
2632
2633 static PyMethodDef match_methods[] = {
2634 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2635 _SRE_SRE_MATCH_START_METHODDEF
2636 _SRE_SRE_MATCH_END_METHODDEF
2637 _SRE_SRE_MATCH_SPAN_METHODDEF
2638 _SRE_SRE_MATCH_GROUPS_METHODDEF
2639 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2640 _SRE_SRE_MATCH_EXPAND_METHODDEF
2641 _SRE_SRE_MATCH___COPY___METHODDEF
2642 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2643 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS,
2644 PyDoc_STR("See PEP 585")},
2645 {NULL, NULL}
2646 };
2647
2648 static PyGetSetDef match_getset[] = {
2649 {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2650 "The integer index of the last matched capturing group."},
2651 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2652 "The name of the last matched capturing group."},
2653 {"regs", (getter)match_regs_get, (setter)NULL},
2654 {NULL}
2655 };
2656
2657 #define MATCH_OFF(x) offsetof(MatchObject, x)
2658 static PyMemberDef match_members[] = {
2659 {"string", T_OBJECT, MATCH_OFF(string), READONLY,
2660 "The string passed to match() or search()."},
2661 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
2662 "The regular expression object."},
2663 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
2664 "The index into the string at which the RE engine started looking for a match."},
2665 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
2666 "The index into the string beyond which the RE engine will not go."},
2667 {NULL}
2668 };
2669
2670 /* FIXME: implement setattr("string", None) as a special case (to
2671 detach the associated string, if any */
2672
2673 static PyTypeObject Match_Type = {
2674 PyVarObject_HEAD_INIT(NULL,0)
2675 "re.Match",
2676 sizeof(MatchObject), sizeof(Py_ssize_t),
2677 (destructor)match_dealloc, /* tp_dealloc */
2678 0, /* tp_vectorcall_offset */
2679 0, /* tp_getattr */
2680 0, /* tp_setattr */
2681 0, /* tp_as_async */
2682 (reprfunc)match_repr, /* tp_repr */
2683 0, /* tp_as_number */
2684 0, /* tp_as_sequence */
2685 &match_as_mapping, /* tp_as_mapping */
2686 0, /* tp_hash */
2687 0, /* tp_call */
2688 0, /* tp_str */
2689 0, /* tp_getattro */
2690 0, /* tp_setattro */
2691 0, /* tp_as_buffer */
2692 Py_TPFLAGS_DEFAULT, /* tp_flags */
2693 match_doc, /* tp_doc */
2694 0, /* tp_traverse */
2695 0, /* tp_clear */
2696 0, /* tp_richcompare */
2697 0, /* tp_weaklistoffset */
2698 0, /* tp_iter */
2699 0, /* tp_iternext */
2700 match_methods, /* tp_methods */
2701 match_members, /* tp_members */
2702 match_getset, /* tp_getset */
2703 };
2704
2705 static PyMethodDef scanner_methods[] = {
2706 _SRE_SRE_SCANNER_MATCH_METHODDEF
2707 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2708 {NULL, NULL}
2709 };
2710
2711 #define SCAN_OFF(x) offsetof(ScannerObject, x)
2712 static PyMemberDef scanner_members[] = {
2713 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2714 {NULL} /* Sentinel */
2715 };
2716
2717 static PyTypeObject Scanner_Type = {
2718 PyVarObject_HEAD_INIT(NULL, 0)
2719 "_" SRE_MODULE ".SRE_Scanner",
2720 sizeof(ScannerObject), 0,
2721 (destructor)scanner_dealloc,/* tp_dealloc */
2722 0, /* tp_vectorcall_offset */
2723 0, /* tp_getattr */
2724 0, /* tp_setattr */
2725 0, /* tp_as_async */
2726 0, /* tp_repr */
2727 0, /* tp_as_number */
2728 0, /* tp_as_sequence */
2729 0, /* tp_as_mapping */
2730 0, /* tp_hash */
2731 0, /* tp_call */
2732 0, /* tp_str */
2733 0, /* tp_getattro */
2734 0, /* tp_setattro */
2735 0, /* tp_as_buffer */
2736 Py_TPFLAGS_DEFAULT, /* tp_flags */
2737 0, /* tp_doc */
2738 0, /* tp_traverse */
2739 0, /* tp_clear */
2740 0, /* tp_richcompare */
2741 0, /* tp_weaklistoffset */
2742 0, /* tp_iter */
2743 0, /* tp_iternext */
2744 scanner_methods, /* tp_methods */
2745 scanner_members, /* tp_members */
2746 0, /* tp_getset */
2747 };
2748
2749 static PyMethodDef _functions[] = {
2750 _SRE_COMPILE_METHODDEF
2751 _SRE_GETCODESIZE_METHODDEF
2752 _SRE_ASCII_ISCASED_METHODDEF
2753 _SRE_UNICODE_ISCASED_METHODDEF
2754 _SRE_ASCII_TOLOWER_METHODDEF
2755 _SRE_UNICODE_TOLOWER_METHODDEF
2756 {NULL, NULL}
2757 };
2758
2759 static struct PyModuleDef sremodule = {
2760 PyModuleDef_HEAD_INIT,
2761 "_" SRE_MODULE,
2762 NULL,
2763 -1,
2764 _functions,
2765 NULL,
2766 NULL,
2767 NULL,
2768 NULL
2769 };
2770
PyInit__sre(void)2771 PyMODINIT_FUNC PyInit__sre(void)
2772 {
2773 PyObject* m;
2774 PyObject* d;
2775 PyObject* x;
2776
2777 /* Patch object types */
2778 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2779 PyType_Ready(&Scanner_Type))
2780 return NULL;
2781
2782 m = PyModule_Create(&sremodule);
2783 if (m == NULL)
2784 return NULL;
2785 d = PyModule_GetDict(m);
2786
2787 x = PyLong_FromLong(SRE_MAGIC);
2788 if (x) {
2789 PyDict_SetItemString(d, "MAGIC", x);
2790 Py_DECREF(x);
2791 }
2792
2793 x = PyLong_FromLong(sizeof(SRE_CODE));
2794 if (x) {
2795 PyDict_SetItemString(d, "CODESIZE", x);
2796 Py_DECREF(x);
2797 }
2798
2799 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2800 if (x) {
2801 PyDict_SetItemString(d, "MAXREPEAT", x);
2802 Py_DECREF(x);
2803 }
2804
2805 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2806 if (x) {
2807 PyDict_SetItemString(d, "MAXGROUPS", x);
2808 Py_DECREF(x);
2809 }
2810
2811 x = PyUnicode_FromString(copyright);
2812 if (x) {
2813 PyDict_SetItemString(d, "copyright", x);
2814 Py_DECREF(x);
2815 }
2816 return m;
2817 }
2818
2819 /* vim:ts=4:sw=4:et
2820 */
2821