1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2014 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15     * Redistributions of source code must retain the above copyright notice,
16       this list of conditions and the following disclaimer.
17 
18     * Redistributions in binary form must reproduce the above copyright
19       notice, this list of conditions and the following disclaimer in the
20       documentation and/or other materials provided with the distribution.
21 
22     * Neither the name of the University of Cambridge nor the names of its
23       contributors may be used to endorse or promote products derived from
24       this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 /* This module contains pcre_exec(), the externally visible function that does
41 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42 possible. There are also some static supporting functions. */
43 
44 #ifdef HAVE_CONFIG_H
45 #include "config.h"
46 #endif
47 
48 #define NLBLOCK md             /* Block containing newline information */
49 #define PSSTART start_subject  /* Field containing processed string start */
50 #define PSEND   end_subject    /* Field containing processed string end */
51 
52 #include "pcre_internal.h"
53 
54 /* Undefine some potentially clashing cpp symbols */
55 
56 #undef min
57 #undef max
58 
59 /* The md->capture_last field uses the lower 16 bits for the last captured
60 substring (which can never be greater than 65535) and a bit in the top half
61 to mean "capture vector overflowed". This odd way of doing things was
62 implemented when it was realized that preserving and restoring the overflow bit
63 whenever the last capture number was saved/restored made for a neater
64 interface, and doing it this way saved on (a) another variable, which would
65 have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66 separate set of save/restore instructions. The following defines are used in
67 implementing this. */
68 
69 #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
70 #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
71 #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
72 
73 /* Values for setting in md->match_function_type to indicate two special types
74 of call to match(). We do it this way to save on using another stack variable,
75 as stack usage is to be discouraged. */
76 
77 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
78 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
79 
80 /* Non-error returns from the match() function. Error returns are externally
81 defined PCRE_ERROR_xxx codes, which are all negative. */
82 
83 #define MATCH_MATCH        1
84 #define MATCH_NOMATCH      0
85 
86 /* Special internal returns from the match() function. Make them sufficiently
87 negative to avoid the external error codes. */
88 
89 #define MATCH_ACCEPT       (-999)
90 #define MATCH_KETRPOS      (-998)
91 #define MATCH_ONCE         (-997)
92 /* The next 5 must be kept together and in sequence so that a test that checks
93 for any one of them can use a range. */
94 #define MATCH_COMMIT       (-996)
95 #define MATCH_PRUNE        (-995)
96 #define MATCH_SKIP         (-994)
97 #define MATCH_SKIP_ARG     (-993)
98 #define MATCH_THEN         (-992)
99 #define MATCH_BACKTRACK_MAX MATCH_THEN
100 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
101 
102 /* Maximum number of ints of offset to save on the stack for recursive calls.
103 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
104 because the offset vector is always a multiple of 3 long. */
105 
106 #define REC_STACK_SAVE_MAX 30
107 
108 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
109 
110 static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
111 static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
112 
113 #ifdef PCRE_DEBUG
114 /*************************************************
115 *        Debugging function to print chars       *
116 *************************************************/
117 
118 /* Print a sequence of chars in printable format, stopping at the end of the
119 subject if the requested.
120 
121 Arguments:
122   p           points to characters
123   length      number to print
124   is_subject  TRUE if printing from within md->start_subject
125   md          pointer to matching data block, if is_subject is TRUE
126 
127 Returns:     nothing
128 */
129 
130 static void
pchars(const pcre_uchar * p,int length,BOOL is_subject,match_data * md)131 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
132 {
133 pcre_uint32 c;
134 BOOL utf = md->utf;
135 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
136 while (length-- > 0)
137   if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
138 }
139 #endif
140 
141 
142 
143 /*************************************************
144 *          Match a back-reference                *
145 *************************************************/
146 
147 /* Normally, if a back reference hasn't been set, the length that is passed is
148 negative, so the match always fails. However, in JavaScript compatibility mode,
149 the length passed is zero. Note that in caseless UTF-8 mode, the number of
150 subject bytes matched may be different to the number of reference bytes.
151 
152 Arguments:
153   offset      index into the offset vector
154   eptr        pointer into the subject
155   length      length of reference to be matched (number of bytes)
156   md          points to match data block
157   caseless    TRUE if caseless
158 
159 Returns:      >= 0 the number of subject bytes matched
160               -1 no match
161               -2 partial match; always given if at end subject
162 */
163 
164 static int
match_ref(int offset,register PCRE_PUCHAR eptr,int length,match_data * md,BOOL caseless)165 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
166   BOOL caseless)
167 {
168 PCRE_PUCHAR eptr_start = eptr;
169 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
170 #if defined SUPPORT_UTF && defined SUPPORT_UCP
171 BOOL utf = md->utf;
172 #endif
173 
174 #ifdef PCRE_DEBUG
175 if (eptr >= md->end_subject)
176   printf("matching subject <null>");
177 else
178   {
179   printf("matching subject ");
180   pchars(eptr, length, TRUE, md);
181   }
182 printf(" against backref ");
183 pchars(p, length, FALSE, md);
184 printf("\n");
185 #endif
186 
187 /* Always fail if reference not set (and not JavaScript compatible - in that
188 case the length is passed as zero). */
189 
190 if (length < 0) return -1;
191 
192 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
193 properly if Unicode properties are supported. Otherwise, we can check only
194 ASCII characters. */
195 
196 if (caseless)
197   {
198 #if defined SUPPORT_UTF && defined SUPPORT_UCP
199   if (utf)
200     {
201     /* Match characters up to the end of the reference. NOTE: the number of
202     data units matched may differ, because in UTF-8 there are some characters
203     whose upper and lower case versions code have different numbers of bytes.
204     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
205     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
206     sequence of two of the latter. It is important, therefore, to check the
207     length along the reference, not along the subject (earlier code did this
208     wrong). */
209 
210     PCRE_PUCHAR endptr = p + length;
211     while (p < endptr)
212       {
213       pcre_uint32 c, d;
214       const ucd_record *ur;
215       if (eptr >= md->end_subject) return -2;   /* Partial match */
216       GETCHARINC(c, eptr);
217       GETCHARINC(d, p);
218       ur = GET_UCD(d);
219       if (c != d && c != d + ur->other_case)
220         {
221         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
222         for (;;)
223           {
224           if (c < *pp) return -1;
225           if (c == *pp++) break;
226           }
227         }
228       }
229     }
230   else
231 #endif
232 
233   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
234   is no UCP support. */
235     {
236     while (length-- > 0)
237       {
238       pcre_uint32 cc, cp;
239       if (eptr >= md->end_subject) return -2;   /* Partial match */
240       cc = UCHAR21TEST(eptr);
241       cp = UCHAR21TEST(p);
242       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
243       p++;
244       eptr++;
245       }
246     }
247   }
248 
249 /* In the caseful case, we can just compare the bytes, whether or not we
250 are in UTF-8 mode. */
251 
252 else
253   {
254   while (length-- > 0)
255     {
256     if (eptr >= md->end_subject) return -2;   /* Partial match */
257     if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
258     }
259   }
260 
261 return (int)(eptr - eptr_start);
262 }
263 
264 
265 
266 /***************************************************************************
267 ****************************************************************************
268                    RECURSION IN THE match() FUNCTION
269 
270 The match() function is highly recursive, though not every recursive call
271 increases the recursive depth. Nevertheless, some regular expressions can cause
272 it to recurse to a great depth. I was writing for Unix, so I just let it call
273 itself recursively. This uses the stack for saving everything that has to be
274 saved for a recursive call. On Unix, the stack can be large, and this works
275 fine.
276 
277 It turns out that on some non-Unix-like systems there are problems with
278 programs that use a lot of stack. (This despite the fact that every last chip
279 has oodles of memory these days, and techniques for extending the stack have
280 been known for decades.) So....
281 
282 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
283 calls by keeping local variables that need to be preserved in blocks of memory
284 obtained from malloc() instead instead of on the stack. Macros are used to
285 achieve this so that the actual code doesn't look very different to what it
286 always used to.
287 
288 The original heap-recursive code used longjmp(). However, it seems that this
289 can be very slow on some operating systems. Following a suggestion from Stan
290 Switzer, the use of longjmp() has been abolished, at the cost of having to
291 provide a unique number for each call to RMATCH. There is no way of generating
292 a sequence of numbers at compile time in C. I have given them names, to make
293 them stand out more clearly.
294 
295 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
296 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
297 tests. Furthermore, not using longjmp() means that local dynamic variables
298 don't have indeterminate values; this has meant that the frame size can be
299 reduced because the result can be "passed back" by straight setting of the
300 variable instead of being passed in the frame.
301 ****************************************************************************
302 ***************************************************************************/
303 
304 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
305 below must be updated in sync.  */
306 
307 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
308        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
309        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
310        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
311        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
312        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
313        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
314 
315 /* These versions of the macros use the stack, as normal. There are debugging
316 versions and production versions. Note that the "rw" argument of RMATCH isn't
317 actually used in this definition. */
318 
319 #ifndef NO_RECURSE
320 #define REGISTER register
321 
322 #ifdef PCRE_DEBUG
323 #define RMATCH(ra,rb,rc,rd,re,rw) \
324   { \
325   printf("match() called in line %d\n", __LINE__); \
326   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
327   printf("to line %d\n", __LINE__); \
328   }
329 #define RRETURN(ra) \
330   { \
331   printf("match() returned %d from line %d\n", ra, __LINE__); \
332   return ra; \
333   }
334 #else
335 #define RMATCH(ra,rb,rc,rd,re,rw) \
336   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
337 #define RRETURN(ra) return ra
338 #endif
339 
340 #else
341 
342 
343 /* These versions of the macros manage a private stack on the heap. Note that
344 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
345 argument of match(), which never changes. */
346 
347 #define REGISTER
348 
349 #define RMATCH(ra,rb,rc,rd,re,rw)\
350   {\
351   heapframe *newframe = frame->Xnextframe;\
352   if (newframe == NULL)\
353     {\
354     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
355     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
356     newframe->Xnextframe = NULL;\
357     frame->Xnextframe = newframe;\
358     }\
359   frame->Xwhere = rw;\
360   newframe->Xeptr = ra;\
361   newframe->Xecode = rb;\
362   newframe->Xmstart = mstart;\
363   newframe->Xoffset_top = rc;\
364   newframe->Xeptrb = re;\
365   newframe->Xrdepth = frame->Xrdepth + 1;\
366   newframe->Xprevframe = frame;\
367   frame = newframe;\
368   DPRINTF(("restarting from line %d\n", __LINE__));\
369   goto HEAP_RECURSE;\
370   L_##rw:\
371   DPRINTF(("jumped back to line %d\n", __LINE__));\
372   }
373 
374 #define RRETURN(ra)\
375   {\
376   heapframe *oldframe = frame;\
377   frame = oldframe->Xprevframe;\
378   if (frame != NULL)\
379     {\
380     rrc = ra;\
381     goto HEAP_RETURN;\
382     }\
383   return ra;\
384   }
385 
386 
387 /* Structure for remembering the local variables in a private frame */
388 
389 typedef struct heapframe {
390   struct heapframe *Xprevframe;
391   struct heapframe *Xnextframe;
392 
393   /* Function arguments that may change */
394 
395   PCRE_PUCHAR Xeptr;
396   const pcre_uchar *Xecode;
397   PCRE_PUCHAR Xmstart;
398   int Xoffset_top;
399   eptrblock *Xeptrb;
400   unsigned int Xrdepth;
401 
402   /* Function local variables */
403 
404   PCRE_PUCHAR Xcallpat;
405 #ifdef SUPPORT_UTF
406   PCRE_PUCHAR Xcharptr;
407 #endif
408   PCRE_PUCHAR Xdata;
409   PCRE_PUCHAR Xnext;
410   PCRE_PUCHAR Xpp;
411   PCRE_PUCHAR Xprev;
412   PCRE_PUCHAR Xsaved_eptr;
413 
414   recursion_info Xnew_recursive;
415 
416   BOOL Xcur_is_word;
417   BOOL Xcondition;
418   BOOL Xprev_is_word;
419 
420 #ifdef SUPPORT_UCP
421   int Xprop_type;
422   unsigned int Xprop_value;
423   int Xprop_fail_result;
424   int Xoclength;
425   pcre_uchar Xocchars[6];
426 #endif
427 
428   int Xcodelink;
429   int Xctype;
430   unsigned int Xfc;
431   int Xfi;
432   int Xlength;
433   int Xmax;
434   int Xmin;
435   unsigned int Xnumber;
436   int Xoffset;
437   unsigned int Xop;
438   pcre_int32 Xsave_capture_last;
439   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
440   int Xstacksave[REC_STACK_SAVE_MAX];
441 
442   eptrblock Xnewptrb;
443 
444   /* Where to jump back to */
445 
446   int Xwhere;
447 
448 } heapframe;
449 
450 #endif
451 
452 
453 /***************************************************************************
454 ***************************************************************************/
455 
456 
457 
458 /*************************************************
459 *         Match from current position            *
460 *************************************************/
461 
462 /* This function is called recursively in many circumstances. Whenever it
463 returns a negative (error) response, the outer incarnation must also return the
464 same response. */
465 
466 /* These macros pack up tests that are used for partial matching, and which
467 appear several times in the code. We set the "hit end" flag if the pointer is
468 at the end of the subject and also past the start of the subject (i.e.
469 something has been matched). For hard partial matching, we then return
470 immediately. The second one is used when we already know we are past the end of
471 the subject. */
472 
473 #define CHECK_PARTIAL()\
474   if (md->partial != 0 && eptr >= md->end_subject && \
475       eptr > md->start_used_ptr) \
476     { \
477     md->hitend = TRUE; \
478     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
479     }
480 
481 #define SCHECK_PARTIAL()\
482   if (md->partial != 0 && eptr > md->start_used_ptr) \
483     { \
484     md->hitend = TRUE; \
485     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
486     }
487 
488 
489 /* Performance note: It might be tempting to extract commonly used fields from
490 the md structure (e.g. utf, end_subject) into individual variables to improve
491 performance. Tests using gcc on a SPARC disproved this; in the first case, it
492 made performance worse.
493 
494 Arguments:
495    eptr        pointer to current character in subject
496    ecode       pointer to current position in compiled code
497    mstart      pointer to the current match start position (can be modified
498                  by encountering \K)
499    offset_top  current top pointer
500    md          pointer to "static" info for the match
501    eptrb       pointer to chain of blocks containing eptr at start of
502                  brackets - for testing for empty matches
503    rdepth      the recursion depth
504 
505 Returns:       MATCH_MATCH if matched            )  these values are >= 0
506                MATCH_NOMATCH if failed to match  )
507                a negative MATCH_xxx value for PRUNE, SKIP, etc
508                a negative PCRE_ERROR_xxx value if aborted by an error condition
509                  (e.g. stopped by repeated call or recursion limit)
510 */
511 
512 static int
match(REGISTER PCRE_PUCHAR eptr,REGISTER const pcre_uchar * ecode,PCRE_PUCHAR mstart,int offset_top,match_data * md,eptrblock * eptrb,unsigned int rdepth)513 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
514   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
515   unsigned int rdepth)
516 {
517 /* These variables do not need to be preserved over recursion in this function,
518 so they can be ordinary variables in all cases. Mark some of them with
519 "register" because they are used a lot in loops. */
520 
521 register int  rrc;         /* Returns from recursive calls */
522 register int  i;           /* Used for loops not involving calls to RMATCH() */
523 register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
524 register BOOL utf;         /* Local copy of UTF flag for speed */
525 
526 BOOL minimize, possessive; /* Quantifier options */
527 BOOL caseless;
528 int condcode;
529 
530 /* When recursion is not being used, all "local" variables that have to be
531 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
532 frame on the stack here; subsequent instantiations are obtained from the heap
533 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
534 the top-level on the stack rather than malloc-ing them all gives a performance
535 boost in many cases where there is not much "recursion". */
536 
537 #ifdef NO_RECURSE
538 heapframe *frame = (heapframe *)md->match_frames_base;
539 
540 /* Copy in the original argument variables */
541 
542 frame->Xeptr = eptr;
543 frame->Xecode = ecode;
544 frame->Xmstart = mstart;
545 frame->Xoffset_top = offset_top;
546 frame->Xeptrb = eptrb;
547 frame->Xrdepth = rdepth;
548 
549 /* This is where control jumps back to to effect "recursion" */
550 
551 HEAP_RECURSE:
552 
553 /* Macros make the argument variables come from the current frame */
554 
555 #define eptr               frame->Xeptr
556 #define ecode              frame->Xecode
557 #define mstart             frame->Xmstart
558 #define offset_top         frame->Xoffset_top
559 #define eptrb              frame->Xeptrb
560 #define rdepth             frame->Xrdepth
561 
562 /* Ditto for the local variables */
563 
564 #ifdef SUPPORT_UTF
565 #define charptr            frame->Xcharptr
566 #endif
567 #define callpat            frame->Xcallpat
568 #define codelink           frame->Xcodelink
569 #define data               frame->Xdata
570 #define next               frame->Xnext
571 #define pp                 frame->Xpp
572 #define prev               frame->Xprev
573 #define saved_eptr         frame->Xsaved_eptr
574 
575 #define new_recursive      frame->Xnew_recursive
576 
577 #define cur_is_word        frame->Xcur_is_word
578 #define condition          frame->Xcondition
579 #define prev_is_word       frame->Xprev_is_word
580 
581 #ifdef SUPPORT_UCP
582 #define prop_type          frame->Xprop_type
583 #define prop_value         frame->Xprop_value
584 #define prop_fail_result   frame->Xprop_fail_result
585 #define oclength           frame->Xoclength
586 #define occhars            frame->Xocchars
587 #endif
588 
589 #define ctype              frame->Xctype
590 #define fc                 frame->Xfc
591 #define fi                 frame->Xfi
592 #define length             frame->Xlength
593 #define max                frame->Xmax
594 #define min                frame->Xmin
595 #define number             frame->Xnumber
596 #define offset             frame->Xoffset
597 #define op                 frame->Xop
598 #define save_capture_last  frame->Xsave_capture_last
599 #define save_offset1       frame->Xsave_offset1
600 #define save_offset2       frame->Xsave_offset2
601 #define save_offset3       frame->Xsave_offset3
602 #define stacksave          frame->Xstacksave
603 
604 #define newptrb            frame->Xnewptrb
605 
606 /* When recursion is being used, local variables are allocated on the stack and
607 get preserved during recursion in the normal way. In this environment, fi and
608 i, and fc and c, can be the same variables. */
609 
610 #else         /* NO_RECURSE not defined */
611 #define fi i
612 #define fc c
613 
614 /* Many of the following variables are used only in small blocks of the code.
615 My normal style of coding would have declared them within each of those blocks.
616 However, in order to accommodate the version of this code that uses an external
617 "stack" implemented on the heap, it is easier to declare them all here, so the
618 declarations can be cut out in a block. The only declarations within blocks
619 below are for variables that do not have to be preserved over a recursive call
620 to RMATCH(). */
621 
622 #ifdef SUPPORT_UTF
623 const pcre_uchar *charptr;
624 #endif
625 const pcre_uchar *callpat;
626 const pcre_uchar *data;
627 const pcre_uchar *next;
628 PCRE_PUCHAR       pp;
629 const pcre_uchar *prev;
630 PCRE_PUCHAR       saved_eptr;
631 
632 recursion_info new_recursive;
633 
634 BOOL cur_is_word;
635 BOOL condition;
636 BOOL prev_is_word;
637 
638 #ifdef SUPPORT_UCP
639 int prop_type;
640 unsigned int prop_value;
641 int prop_fail_result;
642 int oclength;
643 pcre_uchar occhars[6];
644 #endif
645 
646 int codelink;
647 int ctype;
648 int length;
649 int max;
650 int min;
651 unsigned int number;
652 int offset;
653 unsigned int op;
654 pcre_int32 save_capture_last;
655 int save_offset1, save_offset2, save_offset3;
656 int stacksave[REC_STACK_SAVE_MAX];
657 
658 eptrblock newptrb;
659 
660 /* There is a special fudge for calling match() in a way that causes it to
661 measure the size of its basic stack frame when the stack is being used for
662 recursion. The second argument (ecode) being NULL triggers this behaviour. It
663 cannot normally ever be NULL. The return is the negated value of the frame
664 size. */
665 
666 if (ecode == NULL)
667   {
668   if (rdepth == 0)
669     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
670   else
671     {
672     int len = (char *)&rdepth - (char *)eptr;
673     return (len > 0)? -len : len;
674     }
675   }
676 #endif     /* NO_RECURSE */
677 
678 /* To save space on the stack and in the heap frame, I have doubled up on some
679 of the local variables that are used only in localised parts of the code, but
680 still need to be preserved over recursive calls of match(). These macros define
681 the alternative names that are used. */
682 
683 #define allow_zero    cur_is_word
684 #define cbegroup      condition
685 #define code_offset   codelink
686 #define condassert    condition
687 #define matched_once  prev_is_word
688 #define foc           number
689 #define save_mark     data
690 
691 /* These statements are here to stop the compiler complaining about unitialized
692 variables. */
693 
694 #ifdef SUPPORT_UCP
695 prop_value = 0;
696 prop_fail_result = 0;
697 #endif
698 
699 
700 /* This label is used for tail recursion, which is used in a few cases even
701 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
702 used. Thanks to Ian Taylor for noticing this possibility and sending the
703 original patch. */
704 
705 TAIL_RECURSE:
706 
707 /* OK, now we can get on with the real code of the function. Recursive calls
708 are specified by the macro RMATCH and RRETURN is used to return. When
709 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
710 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
711 defined). However, RMATCH isn't like a function call because it's quite a
712 complicated macro. It has to be used in one particular way. This shouldn't,
713 however, impact performance when true recursion is being used. */
714 
715 #ifdef SUPPORT_UTF
716 utf = md->utf;       /* Local copy of the flag */
717 #else
718 utf = FALSE;
719 #endif
720 
721 /* First check that we haven't called match() too many times, or that we
722 haven't exceeded the recursive call limit. */
723 
724 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
725 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
726 
727 /* At the start of a group with an unlimited repeat that may match an empty
728 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
729 done this way to save having to use another function argument, which would take
730 up space on the stack. See also MATCH_CONDASSERT below.
731 
732 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
733 such remembered pointers, to be checked when we hit the closing ket, in order
734 to break infinite loops that match no characters. When match() is called in
735 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
736 NOT be used with tail recursion, because the memory block that is used is on
737 the stack, so a new one may be required for each match(). */
738 
739 if (md->match_function_type == MATCH_CBEGROUP)
740   {
741   newptrb.epb_saved_eptr = eptr;
742   newptrb.epb_prev = eptrb;
743   eptrb = &newptrb;
744   md->match_function_type = 0;
745   }
746 
747 /* Now start processing the opcodes. */
748 
749 for (;;)
750   {
751   minimize = possessive = FALSE;
752   op = *ecode;
753 
754   switch(op)
755     {
756     case OP_MARK:
757     md->nomatch_mark = ecode + 2;
758     md->mark = NULL;    /* In case previously set by assertion */
759     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
760       eptrb, RM55);
761     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
762          md->mark == NULL) md->mark = ecode + 2;
763 
764     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
765     argument, and we must check whether that argument matches this MARK's
766     argument. It is passed back in md->start_match_ptr (an overloading of that
767     variable). If it does match, we reset that variable to the current subject
768     position and return MATCH_SKIP. Otherwise, pass back the return code
769     unaltered. */
770 
771     else if (rrc == MATCH_SKIP_ARG &&
772         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
773       {
774       md->start_match_ptr = eptr;
775       RRETURN(MATCH_SKIP);
776       }
777     RRETURN(rrc);
778 
779     case OP_FAIL:
780     RRETURN(MATCH_NOMATCH);
781 
782     case OP_COMMIT:
783     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
784       eptrb, RM52);
785     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
786     RRETURN(MATCH_COMMIT);
787 
788     case OP_PRUNE:
789     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
790       eptrb, RM51);
791     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
792     RRETURN(MATCH_PRUNE);
793 
794     case OP_PRUNE_ARG:
795     md->nomatch_mark = ecode + 2;
796     md->mark = NULL;    /* In case previously set by assertion */
797     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
798       eptrb, RM56);
799     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
800          md->mark == NULL) md->mark = ecode + 2;
801     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
802     RRETURN(MATCH_PRUNE);
803 
804     case OP_SKIP:
805     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
806       eptrb, RM53);
807     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
808     md->start_match_ptr = eptr;   /* Pass back current position */
809     RRETURN(MATCH_SKIP);
810 
811     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
812     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
813     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
814     that failed and any that precede it (either they also failed, or were not
815     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
816     SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
817     set to the count of the one that failed. */
818 
819     case OP_SKIP_ARG:
820     md->skip_arg_count++;
821     if (md->skip_arg_count <= md->ignore_skip_arg)
822       {
823       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
824       break;
825       }
826     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
827       eptrb, RM57);
828     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
829 
830     /* Pass back the current skip name by overloading md->start_match_ptr and
831     returning the special MATCH_SKIP_ARG return code. This will either be
832     caught by a matching MARK, or get to the top, where it causes a rematch
833     with md->ignore_skip_arg set to the value of md->skip_arg_count. */
834 
835     md->start_match_ptr = ecode + 2;
836     RRETURN(MATCH_SKIP_ARG);
837 
838     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
839     the branch in which it occurs can be determined. Overload the start of
840     match pointer to do this. */
841 
842     case OP_THEN:
843     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
844       eptrb, RM54);
845     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
846     md->start_match_ptr = ecode;
847     RRETURN(MATCH_THEN);
848 
849     case OP_THEN_ARG:
850     md->nomatch_mark = ecode + 2;
851     md->mark = NULL;    /* In case previously set by assertion */
852     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
853       md, eptrb, RM58);
854     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
855          md->mark == NULL) md->mark = ecode + 2;
856     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
857     md->start_match_ptr = ecode;
858     RRETURN(MATCH_THEN);
859 
860     /* Handle an atomic group that does not contain any capturing parentheses.
861     This can be handled like an assertion. Prior to 8.13, all atomic groups
862     were handled this way. In 8.13, the code was changed as below for ONCE, so
863     that backups pass through the group and thereby reset captured values.
864     However, this uses a lot more stack, so in 8.20, atomic groups that do not
865     contain any captures generate OP_ONCE_NC, which can be handled in the old,
866     less stack intensive way.
867 
868     Check the alternative branches in turn - the matching won't pass the KET
869     for this kind of subpattern. If any one branch matches, we carry on as at
870     the end of a normal bracket, leaving the subject pointer, but resetting
871     the start-of-match value in case it was changed by \K. */
872 
873     case OP_ONCE_NC:
874     prev = ecode;
875     saved_eptr = eptr;
876     save_mark = md->mark;
877     do
878       {
879       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
880       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
881         {
882         mstart = md->start_match_ptr;
883         break;
884         }
885       if (rrc == MATCH_THEN)
886         {
887         next = ecode + GET(ecode,1);
888         if (md->start_match_ptr < next &&
889             (*ecode == OP_ALT || *next == OP_ALT))
890           rrc = MATCH_NOMATCH;
891         }
892 
893       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
894       ecode += GET(ecode,1);
895       md->mark = save_mark;
896       }
897     while (*ecode == OP_ALT);
898 
899     /* If hit the end of the group (which could be repeated), fail */
900 
901     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
902 
903     /* Continue as from after the group, updating the offsets high water
904     mark, since extracts may have been taken. */
905 
906     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
907 
908     offset_top = md->end_offset_top;
909     eptr = md->end_match_ptr;
910 
911     /* For a non-repeating ket, just continue at this level. This also
912     happens for a repeating ket if no characters were matched in the group.
913     This is the forcible breaking of infinite loops as implemented in Perl
914     5.005. */
915 
916     if (*ecode == OP_KET || eptr == saved_eptr)
917       {
918       ecode += 1+LINK_SIZE;
919       break;
920       }
921 
922     /* The repeating kets try the rest of the pattern or restart from the
923     preceding bracket, in the appropriate order. The second "call" of match()
924     uses tail recursion, to avoid using another stack frame. */
925 
926     if (*ecode == OP_KETRMIN)
927       {
928       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
929       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
930       ecode = prev;
931       goto TAIL_RECURSE;
932       }
933     else  /* OP_KETRMAX */
934       {
935       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
936       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
937       ecode += 1 + LINK_SIZE;
938       goto TAIL_RECURSE;
939       }
940     /* Control never gets here */
941 
942     /* Handle a capturing bracket, other than those that are possessive with an
943     unlimited repeat. If there is space in the offset vector, save the current
944     subject position in the working slot at the top of the vector. We mustn't
945     change the current values of the data slot, because they may be set from a
946     previous iteration of this group, and be referred to by a reference inside
947     the group. A failure to match might occur after the group has succeeded,
948     if something later on doesn't match. For this reason, we need to restore
949     the working value and also the values of the final offsets, in case they
950     were set by a previous iteration of the same bracket.
951 
952     If there isn't enough space in the offset vector, treat this as if it were
953     a non-capturing bracket. Don't worry about setting the flag for the error
954     case here; that is handled in the code for KET. */
955 
956     case OP_CBRA:
957     case OP_SCBRA:
958     number = GET2(ecode, 1+LINK_SIZE);
959     offset = number << 1;
960 
961 #ifdef PCRE_DEBUG
962     printf("start bracket %d\n", number);
963     printf("subject=");
964     pchars(eptr, 16, TRUE, md);
965     printf("\n");
966 #endif
967 
968     if (offset < md->offset_max)
969       {
970       save_offset1 = md->offset_vector[offset];
971       save_offset2 = md->offset_vector[offset+1];
972       save_offset3 = md->offset_vector[md->offset_end - number];
973       save_capture_last = md->capture_last;
974       save_mark = md->mark;
975 
976       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
977       md->offset_vector[md->offset_end - number] =
978         (int)(eptr - md->start_subject);
979 
980       for (;;)
981         {
982         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
983         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
984           eptrb, RM1);
985         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
986 
987         /* If we backed up to a THEN, check whether it is within the current
988         branch by comparing the address of the THEN that is passed back with
989         the end of the branch. If it is within the current branch, and the
990         branch is one of two or more alternatives (it either starts or ends
991         with OP_ALT), we have reached the limit of THEN's action, so convert
992         the return code to NOMATCH, which will cause normal backtracking to
993         happen from now on. Otherwise, THEN is passed back to an outer
994         alternative. This implements Perl's treatment of parenthesized groups,
995         where a group not containing | does not affect the current alternative,
996         that is, (X) is NOT the same as (X|(*F)). */
997 
998         if (rrc == MATCH_THEN)
999           {
1000           next = ecode + GET(ecode,1);
1001           if (md->start_match_ptr < next &&
1002               (*ecode == OP_ALT || *next == OP_ALT))
1003             rrc = MATCH_NOMATCH;
1004           }
1005 
1006         /* Anything other than NOMATCH is passed back. */
1007 
1008         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1009         md->capture_last = save_capture_last;
1010         ecode += GET(ecode, 1);
1011         md->mark = save_mark;
1012         if (*ecode != OP_ALT) break;
1013         }
1014 
1015       DPRINTF(("bracket %d failed\n", number));
1016       md->offset_vector[offset] = save_offset1;
1017       md->offset_vector[offset+1] = save_offset2;
1018       md->offset_vector[md->offset_end - number] = save_offset3;
1019 
1020       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1021 
1022       RRETURN(rrc);
1023       }
1024 
1025     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1026     as a non-capturing bracket. */
1027 
1028     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1029     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1030 
1031     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1032 
1033     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1034     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1035 
1036     /* Non-capturing or atomic group, except for possessive with unlimited
1037     repeat and ONCE group with no captures. Loop for all the alternatives.
1038 
1039     When we get to the final alternative within the brackets, we used to return
1040     the result of a recursive call to match() whatever happened so it was
1041     possible to reduce stack usage by turning this into a tail recursion,
1042     except in the case of a possibly empty group. However, now that there is
1043     the possiblity of (*THEN) occurring in the final alternative, this
1044     optimization is no longer always possible.
1045 
1046     We can optimize if we know there are no (*THEN)s in the pattern; at present
1047     this is the best that can be done.
1048 
1049     MATCH_ONCE is returned when the end of an atomic group is successfully
1050     reached, but subsequent matching fails. It passes back up the tree (causing
1051     captured values to be reset) until the original atomic group level is
1052     reached. This is tested by comparing md->once_target with the start of the
1053     group. At this point, the return is converted into MATCH_NOMATCH so that
1054     previous backup points can be taken. */
1055 
1056     case OP_ONCE:
1057     case OP_BRA:
1058     case OP_SBRA:
1059     DPRINTF(("start non-capturing bracket\n"));
1060 
1061     for (;;)
1062       {
1063       if (op >= OP_SBRA || op == OP_ONCE)
1064         md->match_function_type = MATCH_CBEGROUP;
1065 
1066       /* If this is not a possibly empty group, and there are no (*THEN)s in
1067       the pattern, and this is the final alternative, optimize as described
1068       above. */
1069 
1070       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1071         {
1072         ecode += PRIV(OP_lengths)[*ecode];
1073         goto TAIL_RECURSE;
1074         }
1075 
1076       /* In all other cases, we have to make another call to match(). */
1077 
1078       save_mark = md->mark;
1079       save_capture_last = md->capture_last;
1080       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1081         RM2);
1082 
1083       /* See comment in the code for capturing groups above about handling
1084       THEN. */
1085 
1086       if (rrc == MATCH_THEN)
1087         {
1088         next = ecode + GET(ecode,1);
1089         if (md->start_match_ptr < next &&
1090             (*ecode == OP_ALT || *next == OP_ALT))
1091           rrc = MATCH_NOMATCH;
1092         }
1093 
1094       if (rrc != MATCH_NOMATCH)
1095         {
1096         if (rrc == MATCH_ONCE)
1097           {
1098           const pcre_uchar *scode = ecode;
1099           if (*scode != OP_ONCE)           /* If not at start, find it */
1100             {
1101             while (*scode == OP_ALT) scode += GET(scode, 1);
1102             scode -= GET(scode, 1);
1103             }
1104           if (md->once_target == scode) rrc = MATCH_NOMATCH;
1105           }
1106         RRETURN(rrc);
1107         }
1108       ecode += GET(ecode, 1);
1109       md->mark = save_mark;
1110       if (*ecode != OP_ALT) break;
1111       md->capture_last = save_capture_last;
1112       }
1113 
1114     RRETURN(MATCH_NOMATCH);
1115 
1116     /* Handle possessive capturing brackets with an unlimited repeat. We come
1117     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1118     handled similarly to the normal case above. However, the matching is
1119     different. The end of these brackets will always be OP_KETRPOS, which
1120     returns MATCH_KETRPOS without going further in the pattern. By this means
1121     we can handle the group by iteration rather than recursion, thereby
1122     reducing the amount of stack needed. */
1123 
1124     case OP_CBRAPOS:
1125     case OP_SCBRAPOS:
1126     allow_zero = FALSE;
1127 
1128     POSSESSIVE_CAPTURE:
1129     number = GET2(ecode, 1+LINK_SIZE);
1130     offset = number << 1;
1131 
1132 #ifdef PCRE_DEBUG
1133     printf("start possessive bracket %d\n", number);
1134     printf("subject=");
1135     pchars(eptr, 16, TRUE, md);
1136     printf("\n");
1137 #endif
1138 
1139     if (offset < md->offset_max)
1140       {
1141       matched_once = FALSE;
1142       code_offset = (int)(ecode - md->start_code);
1143 
1144       save_offset1 = md->offset_vector[offset];
1145       save_offset2 = md->offset_vector[offset+1];
1146       save_offset3 = md->offset_vector[md->offset_end - number];
1147       save_capture_last = md->capture_last;
1148 
1149       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1150 
1151       /* Each time round the loop, save the current subject position for use
1152       when the group matches. For MATCH_MATCH, the group has matched, so we
1153       restart it with a new subject starting position, remembering that we had
1154       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1155       usual. If we haven't matched any alternatives in any iteration, check to
1156       see if a previous iteration matched. If so, the group has matched;
1157       continue from afterwards. Otherwise it has failed; restore the previous
1158       capture values before returning NOMATCH. */
1159 
1160       for (;;)
1161         {
1162         md->offset_vector[md->offset_end - number] =
1163           (int)(eptr - md->start_subject);
1164         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1165         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1166           eptrb, RM63);
1167         if (rrc == MATCH_KETRPOS)
1168           {
1169           offset_top = md->end_offset_top;
1170           ecode = md->start_code + code_offset;
1171           save_capture_last = md->capture_last;
1172           matched_once = TRUE;
1173           mstart = md->start_match_ptr;    /* In case \K changed it */
1174           if (eptr == md->end_match_ptr)   /* Matched an empty string */
1175             {
1176             do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1177             break;
1178             }
1179           eptr = md->end_match_ptr;
1180           continue;
1181           }
1182 
1183         /* See comment in the code for capturing groups above about handling
1184         THEN. */
1185 
1186         if (rrc == MATCH_THEN)
1187           {
1188           next = ecode + GET(ecode,1);
1189           if (md->start_match_ptr < next &&
1190               (*ecode == OP_ALT || *next == OP_ALT))
1191             rrc = MATCH_NOMATCH;
1192           }
1193 
1194         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1195         md->capture_last = save_capture_last;
1196         ecode += GET(ecode, 1);
1197         if (*ecode != OP_ALT) break;
1198         }
1199 
1200       if (!matched_once)
1201         {
1202         md->offset_vector[offset] = save_offset1;
1203         md->offset_vector[offset+1] = save_offset2;
1204         md->offset_vector[md->offset_end - number] = save_offset3;
1205         }
1206 
1207       if (allow_zero || matched_once)
1208         {
1209         ecode += 1 + LINK_SIZE;
1210         break;
1211         }
1212 
1213       RRETURN(MATCH_NOMATCH);
1214       }
1215 
1216     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1217     as a non-capturing bracket. */
1218 
1219     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1220     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1221 
1222     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1223 
1224     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1225     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1226 
1227     /* Non-capturing possessive bracket with unlimited repeat. We come here
1228     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1229     without the capturing complication. It is written out separately for speed
1230     and cleanliness. */
1231 
1232     case OP_BRAPOS:
1233     case OP_SBRAPOS:
1234     allow_zero = FALSE;
1235 
1236     POSSESSIVE_NON_CAPTURE:
1237     matched_once = FALSE;
1238     code_offset = (int)(ecode - md->start_code);
1239     save_capture_last = md->capture_last;
1240 
1241     for (;;)
1242       {
1243       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1244       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1245         eptrb, RM48);
1246       if (rrc == MATCH_KETRPOS)
1247         {
1248         offset_top = md->end_offset_top;
1249         ecode = md->start_code + code_offset;
1250         matched_once = TRUE;
1251         mstart = md->start_match_ptr;   /* In case \K reset it */
1252         if (eptr == md->end_match_ptr)  /* Matched an empty string */
1253           {
1254           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1255           break;
1256           }
1257         eptr = md->end_match_ptr;
1258         continue;
1259         }
1260 
1261       /* See comment in the code for capturing groups above about handling
1262       THEN. */
1263 
1264       if (rrc == MATCH_THEN)
1265         {
1266         next = ecode + GET(ecode,1);
1267         if (md->start_match_ptr < next &&
1268             (*ecode == OP_ALT || *next == OP_ALT))
1269           rrc = MATCH_NOMATCH;
1270         }
1271 
1272       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1273       ecode += GET(ecode, 1);
1274       if (*ecode != OP_ALT) break;
1275       md->capture_last = save_capture_last;
1276       }
1277 
1278     if (matched_once || allow_zero)
1279       {
1280       ecode += 1 + LINK_SIZE;
1281       break;
1282       }
1283     RRETURN(MATCH_NOMATCH);
1284 
1285     /* Control never reaches here. */
1286 
1287     /* Conditional group: compilation checked that there are no more than two
1288     branches. If the condition is false, skipping the first branch takes us
1289     past the end of the item if there is only one branch, but that's exactly
1290     what we want. */
1291 
1292     case OP_COND:
1293     case OP_SCOND:
1294 
1295     /* The variable codelink will be added to ecode when the condition is
1296     false, to get to the second branch. Setting it to the offset to the ALT
1297     or KET, then incrementing ecode achieves this effect. We now have ecode
1298     pointing to the condition or callout. */
1299 
1300     codelink = GET(ecode, 1);   /* Offset to the second branch */
1301     ecode += 1 + LINK_SIZE;     /* From this opcode */
1302 
1303     /* Because of the way auto-callout works during compile, a callout item is
1304     inserted between OP_COND and an assertion condition. */
1305 
1306     if (*ecode == OP_CALLOUT)
1307       {
1308       if (PUBL(callout) != NULL)
1309         {
1310         PUBL(callout_block) cb;
1311         cb.version          = 2;   /* Version 1 of the callout block */
1312         cb.callout_number   = ecode[1];
1313         cb.offset_vector    = md->offset_vector;
1314 #if defined COMPILE_PCRE8
1315         cb.subject          = (PCRE_SPTR)md->start_subject;
1316 #elif defined COMPILE_PCRE16
1317         cb.subject          = (PCRE_SPTR16)md->start_subject;
1318 #elif defined COMPILE_PCRE32
1319         cb.subject          = (PCRE_SPTR32)md->start_subject;
1320 #endif
1321         cb.subject_length   = (int)(md->end_subject - md->start_subject);
1322         cb.start_match      = (int)(mstart - md->start_subject);
1323         cb.current_position = (int)(eptr - md->start_subject);
1324         cb.pattern_position = GET(ecode, 2);
1325         cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1326         cb.capture_top      = offset_top/2;
1327         cb.capture_last     = md->capture_last & CAPLMASK;
1328         /* Internal change requires this for API compatibility. */
1329         if (cb.capture_last == 0) cb.capture_last = -1;
1330         cb.callout_data     = md->callout_data;
1331         cb.mark             = md->nomatch_mark;
1332         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1333         if (rrc < 0) RRETURN(rrc);
1334         }
1335 
1336       /* Advance ecode past the callout, so it now points to the condition. We
1337       must adjust codelink so that the value of ecode+codelink is unchanged. */
1338 
1339       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1340       codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1341       }
1342 
1343     /* Test the various possible conditions */
1344 
1345     condition = FALSE;
1346     switch(condcode = *ecode)
1347       {
1348       case OP_RREF:         /* Numbered group recursion test */
1349       if (md->recursive != NULL)     /* Not recursing => FALSE */
1350         {
1351         unsigned int recno = GET2(ecode, 1);   /* Recursion group number*/
1352         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1353         }
1354       break;
1355 
1356       case OP_DNRREF:       /* Duplicate named group recursion test */
1357       if (md->recursive != NULL)
1358         {
1359         int count = GET2(ecode, 1 + IMM2_SIZE);
1360         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1361         while (count-- > 0)
1362           {
1363           unsigned int recno = GET2(slot, 0);
1364           condition = recno == md->recursive->group_num;
1365           if (condition) break;
1366           slot += md->name_entry_size;
1367           }
1368         }
1369       break;
1370 
1371       case OP_CREF:         /* Numbered group used test */
1372       offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
1373       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1374       break;
1375 
1376       case OP_DNCREF:      /* Duplicate named group used test */
1377         {
1378         int count = GET2(ecode, 1 + IMM2_SIZE);
1379         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1380         while (count-- > 0)
1381           {
1382           offset = GET2(slot, 0) << 1;
1383           condition = offset < offset_top && md->offset_vector[offset] >= 0;
1384           if (condition) break;
1385           slot += md->name_entry_size;
1386           }
1387         }
1388       break;
1389 
1390       case OP_DEF:     /* DEFINE - always false */
1391       break;
1392 
1393       /* The condition is an assertion. Call match() to evaluate it - setting
1394       md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
1395       of an assertion. */
1396 
1397       default:
1398       md->match_function_type = MATCH_CONDASSERT;
1399       RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
1400       if (rrc == MATCH_MATCH)
1401         {
1402         if (md->end_offset_top > offset_top)
1403           offset_top = md->end_offset_top;  /* Captures may have happened */
1404         condition = TRUE;
1405 
1406         /* Advance ecode past the assertion to the start of the first branch,
1407         but adjust it so that the general choosing code below works. */
1408 
1409         ecode += GET(ecode, 1);
1410         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1411         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
1412         }
1413 
1414       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1415       assertion; it is therefore treated as NOMATCH. Any other return is an
1416       error. */
1417 
1418       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1419         {
1420         RRETURN(rrc);         /* Need braces because of following else */
1421         }
1422       break;
1423       }
1424 
1425     /* Choose branch according to the condition */
1426 
1427     ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
1428 
1429     /* We are now at the branch that is to be obeyed. As there is only one, we
1430     can use tail recursion to avoid using another stack frame, except when
1431     there is unlimited repeat of a possibly empty group. In the latter case, a
1432     recursive call to match() is always required, unless the second alternative
1433     doesn't exist, in which case we can just plough on. Note that, for
1434     compatibility with Perl, the | in a conditional group is NOT treated as
1435     creating two alternatives. If a THEN is encountered in the branch, it
1436     propagates out to the enclosing alternative (unless nested in a deeper set
1437     of alternatives, of course). */
1438 
1439     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
1440       {
1441       if (op != OP_SCOND)
1442         {
1443         goto TAIL_RECURSE;
1444         }
1445 
1446       md->match_function_type = MATCH_CBEGROUP;
1447       RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
1448       RRETURN(rrc);
1449       }
1450 
1451      /* Condition false & no alternative; continue after the group. */
1452 
1453     else
1454       {
1455       }
1456     break;
1457 
1458 
1459     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1460     to close any currently open capturing brackets. */
1461 
1462     case OP_CLOSE:
1463     number = GET2(ecode, 1);   /* Must be less than 65536 */
1464     offset = number << 1;
1465 
1466 #ifdef PCRE_DEBUG
1467       printf("end bracket %d at *ACCEPT", number);
1468       printf("\n");
1469 #endif
1470 
1471     md->capture_last = (md->capture_last & OVFLMASK) | number;
1472     if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1473       {
1474       md->offset_vector[offset] =
1475         md->offset_vector[md->offset_end - number];
1476       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1477       if (offset_top <= offset) offset_top = offset + 2;
1478       }
1479     ecode += 1 + IMM2_SIZE;
1480     break;
1481 
1482 
1483     /* End of the pattern, either real or forced. */
1484 
1485     case OP_END:
1486     case OP_ACCEPT:
1487     case OP_ASSERT_ACCEPT:
1488 
1489     /* If we have matched an empty string, fail if not in an assertion and not
1490     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1491     is set and we have matched at the start of the subject. In both cases,
1492     backtracking will then try other alternatives, if any. */
1493 
1494     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1495          md->recursive == NULL &&
1496          (md->notempty ||
1497            (md->notempty_atstart &&
1498              mstart == md->start_subject + md->start_offset)))
1499       RRETURN(MATCH_NOMATCH);
1500 
1501     /* Otherwise, we have a match. */
1502 
1503     md->end_match_ptr = eptr;           /* Record where we ended */
1504     md->end_offset_top = offset_top;    /* and how many extracts were taken */
1505     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1506 
1507     /* For some reason, the macros don't work properly if an expression is
1508     given as the argument to RRETURN when the heap is in use. */
1509 
1510     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1511     RRETURN(rrc);
1512 
1513     /* Assertion brackets. Check the alternative branches in turn - the
1514     matching won't pass the KET for an assertion. If any one branch matches,
1515     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1516     start of each branch to move the current point backwards, so the code at
1517     this level is identical to the lookahead case. When the assertion is part
1518     of a condition, we want to return immediately afterwards. The caller of
1519     this incarnation of the match() function will have set MATCH_CONDASSERT in
1520     md->match_function type, and one of these opcodes will be the first opcode
1521     that is processed. We use a local variable that is preserved over calls to
1522     match() to remember this case. */
1523 
1524     case OP_ASSERT:
1525     case OP_ASSERTBACK:
1526     save_mark = md->mark;
1527     if (md->match_function_type == MATCH_CONDASSERT)
1528       {
1529       condassert = TRUE;
1530       md->match_function_type = 0;
1531       }
1532     else condassert = FALSE;
1533 
1534     /* Loop for each branch */
1535 
1536     do
1537       {
1538       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1539 
1540       /* A match means that the assertion is true; break out of the loop
1541       that matches its alternatives. */
1542 
1543       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1544         {
1545         mstart = md->start_match_ptr;   /* In case \K reset it */
1546         break;
1547         }
1548 
1549       /* If not matched, restore the previous mark setting. */
1550 
1551       md->mark = save_mark;
1552 
1553       /* See comment in the code for capturing groups above about handling
1554       THEN. */
1555 
1556       if (rrc == MATCH_THEN)
1557         {
1558         next = ecode + GET(ecode,1);
1559         if (md->start_match_ptr < next &&
1560             (*ecode == OP_ALT || *next == OP_ALT))
1561           rrc = MATCH_NOMATCH;
1562         }
1563 
1564       /* Anything other than NOMATCH causes the entire assertion to fail,
1565       passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1566       uncaptured THEN, which means they take their normal effect. This
1567       consistent approach does not always have exactly the same effect as in
1568       Perl. */
1569 
1570       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1571       ecode += GET(ecode, 1);
1572       }
1573     while (*ecode == OP_ALT);   /* Continue for next alternative */
1574 
1575     /* If we have tried all the alternative branches, the assertion has
1576     failed. If not, we broke out after a match. */
1577 
1578     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1579 
1580     /* If checking an assertion for a condition, return MATCH_MATCH. */
1581 
1582     if (condassert) RRETURN(MATCH_MATCH);
1583 
1584     /* Continue from after a successful assertion, updating the offsets high
1585     water mark, since extracts may have been taken during the assertion. */
1586 
1587     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1588     ecode += 1 + LINK_SIZE;
1589     offset_top = md->end_offset_top;
1590     continue;
1591 
1592     /* Negative assertion: all branches must fail to match for the assertion to
1593     succeed. */
1594 
1595     case OP_ASSERT_NOT:
1596     case OP_ASSERTBACK_NOT:
1597     save_mark = md->mark;
1598     if (md->match_function_type == MATCH_CONDASSERT)
1599       {
1600       condassert = TRUE;
1601       md->match_function_type = 0;
1602       }
1603     else condassert = FALSE;
1604 
1605     /* Loop for each alternative branch. */
1606 
1607     do
1608       {
1609       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1610       md->mark = save_mark;   /* Always restore the mark setting */
1611 
1612       switch(rrc)
1613         {
1614         case MATCH_MATCH:            /* A successful match means */
1615         case MATCH_ACCEPT:           /* the assertion has failed. */
1616         RRETURN(MATCH_NOMATCH);
1617 
1618         case MATCH_NOMATCH:          /* Carry on with next branch */
1619         break;
1620 
1621         /* See comment in the code for capturing groups above about handling
1622         THEN. */
1623 
1624         case MATCH_THEN:
1625         next = ecode + GET(ecode,1);
1626         if (md->start_match_ptr < next &&
1627             (*ecode == OP_ALT || *next == OP_ALT))
1628           {
1629           rrc = MATCH_NOMATCH;
1630           break;
1631           }
1632         /* Otherwise fall through. */
1633 
1634         /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1635         assertion to fail to match, without considering any more alternatives.
1636         Failing to match means the assertion is true. This is a consistent
1637         approach, but does not always have the same effect as in Perl. */
1638 
1639         case MATCH_COMMIT:
1640         case MATCH_SKIP:
1641         case MATCH_SKIP_ARG:
1642         case MATCH_PRUNE:
1643         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1644         goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
1645 
1646         /* Anything else is an error */
1647 
1648         default:
1649         RRETURN(rrc);
1650         }
1651 
1652       /* Continue with next branch */
1653 
1654       ecode += GET(ecode,1);
1655       }
1656     while (*ecode == OP_ALT);
1657 
1658     /* All branches in the assertion failed to match. */
1659 
1660     NEG_ASSERT_TRUE:
1661     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1662     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
1663     continue;
1664 
1665     /* Move the subject pointer back. This occurs only at the start of
1666     each branch of a lookbehind assertion. If we are too close to the start to
1667     move back, this match function fails. When working with UTF-8 we move
1668     back a number of characters, not bytes. */
1669 
1670     case OP_REVERSE:
1671 #ifdef SUPPORT_UTF
1672     if (utf)
1673       {
1674       i = GET(ecode, 1);
1675       while (i-- > 0)
1676         {
1677         eptr--;
1678         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1679         BACKCHAR(eptr);
1680         }
1681       }
1682     else
1683 #endif
1684 
1685     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1686 
1687       {
1688       eptr -= GET(ecode, 1);
1689       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1690       }
1691 
1692     /* Save the earliest consulted character, then skip to next op code */
1693 
1694     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1695     ecode += 1 + LINK_SIZE;
1696     break;
1697 
1698     /* The callout item calls an external function, if one is provided, passing
1699     details of the match so far. This is mainly for debugging, though the
1700     function is able to force a failure. */
1701 
1702     case OP_CALLOUT:
1703     if (PUBL(callout) != NULL)
1704       {
1705       PUBL(callout_block) cb;
1706       cb.version          = 2;   /* Version 1 of the callout block */
1707       cb.callout_number   = ecode[1];
1708       cb.offset_vector    = md->offset_vector;
1709 #if defined COMPILE_PCRE8
1710       cb.subject          = (PCRE_SPTR)md->start_subject;
1711 #elif defined COMPILE_PCRE16
1712       cb.subject          = (PCRE_SPTR16)md->start_subject;
1713 #elif defined COMPILE_PCRE32
1714       cb.subject          = (PCRE_SPTR32)md->start_subject;
1715 #endif
1716       cb.subject_length   = (int)(md->end_subject - md->start_subject);
1717       cb.start_match      = (int)(mstart - md->start_subject);
1718       cb.current_position = (int)(eptr - md->start_subject);
1719       cb.pattern_position = GET(ecode, 2);
1720       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1721       cb.capture_top      = offset_top/2;
1722       cb.capture_last     = md->capture_last & CAPLMASK;
1723       /* Internal change requires this for API compatibility. */
1724       if (cb.capture_last == 0) cb.capture_last = -1;
1725       cb.callout_data     = md->callout_data;
1726       cb.mark             = md->nomatch_mark;
1727       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1728       if (rrc < 0) RRETURN(rrc);
1729       }
1730     ecode += 2 + 2*LINK_SIZE;
1731     break;
1732 
1733     /* Recursion either matches the current regex, or some subexpression. The
1734     offset data is the offset to the starting bracket from the start of the
1735     whole pattern. (This is so that it works from duplicated subpatterns.)
1736 
1737     The state of the capturing groups is preserved over recursion, and
1738     re-instated afterwards. We don't know how many are started and not yet
1739     finished (offset_top records the completed total) so we just have to save
1740     all the potential data. There may be up to 65535 such values, which is too
1741     large to put on the stack, but using malloc for small numbers seems
1742     expensive. As a compromise, the stack is used when there are no more than
1743     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1744 
1745     There are also other values that have to be saved. We use a chained
1746     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1747     for the original version of this logic. It has, however, been hacked around
1748     a lot, so he is not to blame for the current way it works. */
1749 
1750     case OP_RECURSE:
1751       {
1752       recursion_info *ri;
1753       unsigned int recno;
1754 
1755       callpat = md->start_code + GET(ecode, 1);
1756       recno = (callpat == md->start_code)? 0 :
1757         GET2(callpat, 1 + LINK_SIZE);
1758 
1759       /* Check for repeating a recursion without advancing the subject pointer.
1760       This should catch convoluted mutual recursions. (Some simple cases are
1761       caught at compile time.) */
1762 
1763       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1764         if (recno == ri->group_num && eptr == ri->subject_position)
1765           RRETURN(PCRE_ERROR_RECURSELOOP);
1766 
1767       /* Add to "recursing stack" */
1768 
1769       new_recursive.group_num = recno;
1770       new_recursive.saved_capture_last = md->capture_last;
1771       new_recursive.subject_position = eptr;
1772       new_recursive.prevrec = md->recursive;
1773       md->recursive = &new_recursive;
1774 
1775       /* Where to continue from afterwards */
1776 
1777       ecode += 1 + LINK_SIZE;
1778 
1779       /* Now save the offset data */
1780 
1781       new_recursive.saved_max = md->offset_end;
1782       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1783         new_recursive.offset_save = stacksave;
1784       else
1785         {
1786         new_recursive.offset_save =
1787           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1788         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1789         }
1790       memcpy(new_recursive.offset_save, md->offset_vector,
1791             new_recursive.saved_max * sizeof(int));
1792 
1793       /* OK, now we can do the recursion. After processing each alternative,
1794       restore the offset data and the last captured value. If there were nested
1795       recursions, md->recursive might be changed, so reset it before looping.
1796       */
1797 
1798       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1799       cbegroup = (*callpat >= OP_SBRA);
1800       do
1801         {
1802         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1803         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1804           md, eptrb, RM6);
1805         memcpy(md->offset_vector, new_recursive.offset_save,
1806             new_recursive.saved_max * sizeof(int));
1807         md->capture_last = new_recursive.saved_capture_last;
1808         md->recursive = new_recursive.prevrec;
1809         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1810           {
1811           DPRINTF(("Recursion matched\n"));
1812           if (new_recursive.offset_save != stacksave)
1813             (PUBL(free))(new_recursive.offset_save);
1814 
1815           /* Set where we got to in the subject, and reset the start in case
1816           it was changed by \K. This *is* propagated back out of a recursion,
1817           for Perl compatibility. */
1818 
1819           eptr = md->end_match_ptr;
1820           mstart = md->start_match_ptr;
1821           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1822           }
1823 
1824         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1825         recursion; they cause a NOMATCH for the entire recursion. These codes
1826         are defined in a range that can be tested for. */
1827 
1828         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1829           RRETURN(MATCH_NOMATCH);
1830 
1831         /* Any return code other than NOMATCH is an error. */
1832 
1833         if (rrc != MATCH_NOMATCH)
1834           {
1835           DPRINTF(("Recursion gave error %d\n", rrc));
1836           if (new_recursive.offset_save != stacksave)
1837             (PUBL(free))(new_recursive.offset_save);
1838           RRETURN(rrc);
1839           }
1840 
1841         md->recursive = &new_recursive;
1842         callpat += GET(callpat, 1);
1843         }
1844       while (*callpat == OP_ALT);
1845 
1846       DPRINTF(("Recursion didn't match\n"));
1847       md->recursive = new_recursive.prevrec;
1848       if (new_recursive.offset_save != stacksave)
1849         (PUBL(free))(new_recursive.offset_save);
1850       RRETURN(MATCH_NOMATCH);
1851       }
1852 
1853     RECURSION_MATCHED:
1854     break;
1855 
1856     /* An alternation is the end of a branch; scan along to find the end of the
1857     bracketed group and go to there. */
1858 
1859     case OP_ALT:
1860     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1861     break;
1862 
1863     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1864     indicating that it may occur zero times. It may repeat infinitely, or not
1865     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1866     with fixed upper repeat limits are compiled as a number of copies, with the
1867     optional ones preceded by BRAZERO or BRAMINZERO. */
1868 
1869     case OP_BRAZERO:
1870     next = ecode + 1;
1871     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1872     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1873     do next += GET(next, 1); while (*next == OP_ALT);
1874     ecode = next + 1 + LINK_SIZE;
1875     break;
1876 
1877     case OP_BRAMINZERO:
1878     next = ecode + 1;
1879     do next += GET(next, 1); while (*next == OP_ALT);
1880     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1881     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1882     ecode++;
1883     break;
1884 
1885     case OP_SKIPZERO:
1886     next = ecode+1;
1887     do next += GET(next,1); while (*next == OP_ALT);
1888     ecode = next + 1 + LINK_SIZE;
1889     break;
1890 
1891     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1892     here; just jump to the group, with allow_zero set TRUE. */
1893 
1894     case OP_BRAPOSZERO:
1895     op = *(++ecode);
1896     allow_zero = TRUE;
1897     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1898       goto POSSESSIVE_NON_CAPTURE;
1899 
1900     /* End of a group, repeated or non-repeating. */
1901 
1902     case OP_KET:
1903     case OP_KETRMIN:
1904     case OP_KETRMAX:
1905     case OP_KETRPOS:
1906     prev = ecode - GET(ecode, 1);
1907 
1908     /* If this was a group that remembered the subject start, in order to break
1909     infinite repeats of empty string matches, retrieve the subject start from
1910     the chain. Otherwise, set it NULL. */
1911 
1912     if (*prev >= OP_SBRA || *prev == OP_ONCE)
1913       {
1914       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1915       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1916       }
1917     else saved_eptr = NULL;
1918 
1919     /* If we are at the end of an assertion group or a non-capturing atomic
1920     group, stop matching and return MATCH_MATCH, but record the current high
1921     water mark for use by positive assertions. We also need to record the match
1922     start in case it was changed by \K. */
1923 
1924     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1925          *prev == OP_ONCE_NC)
1926       {
1927       md->end_match_ptr = eptr;      /* For ONCE_NC */
1928       md->end_offset_top = offset_top;
1929       md->start_match_ptr = mstart;
1930       RRETURN(MATCH_MATCH);         /* Sets md->mark */
1931       }
1932 
1933     /* For capturing groups we have to check the group number back at the start
1934     and if necessary complete handling an extraction by setting the offsets and
1935     bumping the high water mark. Whole-pattern recursion is coded as a recurse
1936     into group 0, so it won't be picked up here. Instead, we catch it when the
1937     OP_END is reached. Other recursion is handled here. We just have to record
1938     the current subject position and start match pointer and give a MATCH
1939     return. */
1940 
1941     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1942         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1943       {
1944       number = GET2(prev, 1+LINK_SIZE);
1945       offset = number << 1;
1946 
1947 #ifdef PCRE_DEBUG
1948       printf("end bracket %d", number);
1949       printf("\n");
1950 #endif
1951 
1952       /* Handle a recursively called group. */
1953 
1954       if (md->recursive != NULL && md->recursive->group_num == number)
1955         {
1956         md->end_match_ptr = eptr;
1957         md->start_match_ptr = mstart;
1958         RRETURN(MATCH_MATCH);
1959         }
1960 
1961       /* Deal with capturing */
1962 
1963       md->capture_last = (md->capture_last & OVFLMASK) | number;
1964       if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1965         {
1966         /* If offset is greater than offset_top, it means that we are
1967         "skipping" a capturing group, and that group's offsets must be marked
1968         unset. In earlier versions of PCRE, all the offsets were unset at the
1969         start of matching, but this doesn't work because atomic groups and
1970         assertions can cause a value to be set that should later be unset.
1971         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1972         part of the atomic group, but this is not on the final matching path,
1973         so must be unset when 2 is set. (If there is no group 2, there is no
1974         problem, because offset_top will then be 2, indicating no capture.) */
1975 
1976         if (offset > offset_top)
1977           {
1978           register int *iptr = md->offset_vector + offset_top;
1979           register int *iend = md->offset_vector + offset;
1980           while (iptr < iend) *iptr++ = -1;
1981           }
1982 
1983         /* Now make the extraction */
1984 
1985         md->offset_vector[offset] =
1986           md->offset_vector[md->offset_end - number];
1987         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1988         if (offset_top <= offset) offset_top = offset + 2;
1989         }
1990       }
1991 
1992     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1993     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1994     at a time from the outer level, thus saving stack. This must precede the
1995     empty string test - in this case that test is done at the outer level. */
1996 
1997     if (*ecode == OP_KETRPOS)
1998       {
1999       md->start_match_ptr = mstart;    /* In case \K reset it */
2000       md->end_match_ptr = eptr;
2001       md->end_offset_top = offset_top;
2002       RRETURN(MATCH_KETRPOS);
2003       }
2004 
2005     /* For an ordinary non-repeating ket, just continue at this level. This
2006     also happens for a repeating ket if no characters were matched in the
2007     group. This is the forcible breaking of infinite loops as implemented in
2008     Perl 5.005. For a non-repeating atomic group that includes captures,
2009     establish a backup point by processing the rest of the pattern at a lower
2010     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
2011     original OP_ONCE level, thereby bypassing intermediate backup points, but
2012     resetting any captures that happened along the way. */
2013 
2014     if (*ecode == OP_KET || eptr == saved_eptr)
2015       {
2016       if (*prev == OP_ONCE)
2017         {
2018         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
2019         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2020         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2021         RRETURN(MATCH_ONCE);
2022         }
2023       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
2024       break;
2025       }
2026 
2027     /* The normal repeating kets try the rest of the pattern or restart from
2028     the preceding bracket, in the appropriate order. In the second case, we can
2029     use tail recursion to avoid using another stack frame, unless we have an
2030     an atomic group or an unlimited repeat of a group that can match an empty
2031     string. */
2032 
2033     if (*ecode == OP_KETRMIN)
2034       {
2035       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2036       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2037       if (*prev == OP_ONCE)
2038         {
2039         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2040         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2041         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2042         RRETURN(MATCH_ONCE);
2043         }
2044       if (*prev >= OP_SBRA)    /* Could match an empty string */
2045         {
2046         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2047         RRETURN(rrc);
2048         }
2049       ecode = prev;
2050       goto TAIL_RECURSE;
2051       }
2052     else  /* OP_KETRMAX */
2053       {
2054       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2055       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2056       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2057       if (*prev == OP_ONCE)
2058         {
2059         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2060         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2061         md->once_target = prev;
2062         RRETURN(MATCH_ONCE);
2063         }
2064       ecode += 1 + LINK_SIZE;
2065       goto TAIL_RECURSE;
2066       }
2067     /* Control never gets here */
2068 
2069     /* Not multiline mode: start of subject assertion, unless notbol. */
2070 
2071     case OP_CIRC:
2072     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2073 
2074     /* Start of subject assertion */
2075 
2076     case OP_SOD:
2077     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2078     ecode++;
2079     break;
2080 
2081     /* Multiline mode: start of subject unless notbol, or after any newline. */
2082 
2083     case OP_CIRCM:
2084     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2085     if (eptr != md->start_subject &&
2086         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2087       RRETURN(MATCH_NOMATCH);
2088     ecode++;
2089     break;
2090 
2091     /* Start of match assertion */
2092 
2093     case OP_SOM:
2094     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2095     ecode++;
2096     break;
2097 
2098     /* Reset the start of match point */
2099 
2100     case OP_SET_SOM:
2101     mstart = eptr;
2102     ecode++;
2103     break;
2104 
2105     /* Multiline mode: assert before any newline, or before end of subject
2106     unless noteol is set. */
2107 
2108     case OP_DOLLM:
2109     if (eptr < md->end_subject)
2110       {
2111       if (!IS_NEWLINE(eptr))
2112         {
2113         if (md->partial != 0 &&
2114             eptr + 1 >= md->end_subject &&
2115             NLBLOCK->nltype == NLTYPE_FIXED &&
2116             NLBLOCK->nllen == 2 &&
2117             UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2118           {
2119           md->hitend = TRUE;
2120           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2121           }
2122         RRETURN(MATCH_NOMATCH);
2123         }
2124       }
2125     else
2126       {
2127       if (md->noteol) RRETURN(MATCH_NOMATCH);
2128       SCHECK_PARTIAL();
2129       }
2130     ecode++;
2131     break;
2132 
2133     /* Not multiline mode: assert before a terminating newline or before end of
2134     subject unless noteol is set. */
2135 
2136     case OP_DOLL:
2137     if (md->noteol) RRETURN(MATCH_NOMATCH);
2138     if (!md->endonly) goto ASSERT_NL_OR_EOS;
2139 
2140     /* ... else fall through for endonly */
2141 
2142     /* End of subject assertion (\z) */
2143 
2144     case OP_EOD:
2145     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2146     SCHECK_PARTIAL();
2147     ecode++;
2148     break;
2149 
2150     /* End of subject or ending \n assertion (\Z) */
2151 
2152     case OP_EODN:
2153     ASSERT_NL_OR_EOS:
2154     if (eptr < md->end_subject &&
2155         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2156       {
2157       if (md->partial != 0 &&
2158           eptr + 1 >= md->end_subject &&
2159           NLBLOCK->nltype == NLTYPE_FIXED &&
2160           NLBLOCK->nllen == 2 &&
2161           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2162         {
2163         md->hitend = TRUE;
2164         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2165         }
2166       RRETURN(MATCH_NOMATCH);
2167       }
2168 
2169     /* Either at end of string or \n before end. */
2170 
2171     SCHECK_PARTIAL();
2172     ecode++;
2173     break;
2174 
2175     /* Word boundary assertions */
2176 
2177     case OP_NOT_WORD_BOUNDARY:
2178     case OP_WORD_BOUNDARY:
2179       {
2180 
2181       /* Find out if the previous and current characters are "word" characters.
2182       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2183       be "non-word" characters. Remember the earliest consulted character for
2184       partial matching. */
2185 
2186 #ifdef SUPPORT_UTF
2187       if (utf)
2188         {
2189         /* Get status of previous character */
2190 
2191         if (eptr == md->start_subject) prev_is_word = FALSE; else
2192           {
2193           PCRE_PUCHAR lastptr = eptr - 1;
2194           BACKCHAR(lastptr);
2195           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2196           GETCHAR(c, lastptr);
2197 #ifdef SUPPORT_UCP
2198           if (md->use_ucp)
2199             {
2200             if (c == '_') prev_is_word = TRUE; else
2201               {
2202               int cat = UCD_CATEGORY(c);
2203               prev_is_word = (cat == ucp_L || cat == ucp_N);
2204               }
2205             }
2206           else
2207 #endif
2208           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2209           }
2210 
2211         /* Get status of next character */
2212 
2213         if (eptr >= md->end_subject)
2214           {
2215           SCHECK_PARTIAL();
2216           cur_is_word = FALSE;
2217           }
2218         else
2219           {
2220           GETCHAR(c, eptr);
2221 #ifdef SUPPORT_UCP
2222           if (md->use_ucp)
2223             {
2224             if (c == '_') cur_is_word = TRUE; else
2225               {
2226               int cat = UCD_CATEGORY(c);
2227               cur_is_word = (cat == ucp_L || cat == ucp_N);
2228               }
2229             }
2230           else
2231 #endif
2232           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2233           }
2234         }
2235       else
2236 #endif
2237 
2238       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2239       consistency with the behaviour of \w we do use it in this case. */
2240 
2241         {
2242         /* Get status of previous character */
2243 
2244         if (eptr == md->start_subject) prev_is_word = FALSE; else
2245           {
2246           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2247 #ifdef SUPPORT_UCP
2248           if (md->use_ucp)
2249             {
2250             c = eptr[-1];
2251             if (c == '_') prev_is_word = TRUE; else
2252               {
2253               int cat = UCD_CATEGORY(c);
2254               prev_is_word = (cat == ucp_L || cat == ucp_N);
2255               }
2256             }
2257           else
2258 #endif
2259           prev_is_word = MAX_255(eptr[-1])
2260             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2261           }
2262 
2263         /* Get status of next character */
2264 
2265         if (eptr >= md->end_subject)
2266           {
2267           SCHECK_PARTIAL();
2268           cur_is_word = FALSE;
2269           }
2270         else
2271 #ifdef SUPPORT_UCP
2272         if (md->use_ucp)
2273           {
2274           c = *eptr;
2275           if (c == '_') cur_is_word = TRUE; else
2276             {
2277             int cat = UCD_CATEGORY(c);
2278             cur_is_word = (cat == ucp_L || cat == ucp_N);
2279             }
2280           }
2281         else
2282 #endif
2283         cur_is_word = MAX_255(*eptr)
2284           && ((md->ctypes[*eptr] & ctype_word) != 0);
2285         }
2286 
2287       /* Now see if the situation is what we want */
2288 
2289       if ((*ecode++ == OP_WORD_BOUNDARY)?
2290            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2291         RRETURN(MATCH_NOMATCH);
2292       }
2293     break;
2294 
2295     /* Match any single character type except newline; have to take care with
2296     CRLF newlines and partial matching. */
2297 
2298     case OP_ANY:
2299     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2300     if (md->partial != 0 &&
2301         eptr + 1 >= md->end_subject &&
2302         NLBLOCK->nltype == NLTYPE_FIXED &&
2303         NLBLOCK->nllen == 2 &&
2304         UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2305       {
2306       md->hitend = TRUE;
2307       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2308       }
2309 
2310     /* Fall through */
2311 
2312     /* Match any single character whatsoever. */
2313 
2314     case OP_ALLANY:
2315     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2316       {                            /* not be updated before SCHECK_PARTIAL. */
2317       SCHECK_PARTIAL();
2318       RRETURN(MATCH_NOMATCH);
2319       }
2320     eptr++;
2321 #ifdef SUPPORT_UTF
2322     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2323 #endif
2324     ecode++;
2325     break;
2326 
2327     /* Match a single byte, even in UTF-8 mode. This opcode really does match
2328     any byte, even newline, independent of the setting of PCRE_DOTALL. */
2329 
2330     case OP_ANYBYTE:
2331     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2332       {                            /* not be updated before SCHECK_PARTIAL. */
2333       SCHECK_PARTIAL();
2334       RRETURN(MATCH_NOMATCH);
2335       }
2336     eptr++;
2337     ecode++;
2338     break;
2339 
2340     case OP_NOT_DIGIT:
2341     if (eptr >= md->end_subject)
2342       {
2343       SCHECK_PARTIAL();
2344       RRETURN(MATCH_NOMATCH);
2345       }
2346     GETCHARINCTEST(c, eptr);
2347     if (
2348 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2349        c < 256 &&
2350 #endif
2351        (md->ctypes[c] & ctype_digit) != 0
2352        )
2353       RRETURN(MATCH_NOMATCH);
2354     ecode++;
2355     break;
2356 
2357     case OP_DIGIT:
2358     if (eptr >= md->end_subject)
2359       {
2360       SCHECK_PARTIAL();
2361       RRETURN(MATCH_NOMATCH);
2362       }
2363     GETCHARINCTEST(c, eptr);
2364     if (
2365 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2366        c > 255 ||
2367 #endif
2368        (md->ctypes[c] & ctype_digit) == 0
2369        )
2370       RRETURN(MATCH_NOMATCH);
2371     ecode++;
2372     break;
2373 
2374     case OP_NOT_WHITESPACE:
2375     if (eptr >= md->end_subject)
2376       {
2377       SCHECK_PARTIAL();
2378       RRETURN(MATCH_NOMATCH);
2379       }
2380     GETCHARINCTEST(c, eptr);
2381     if (
2382 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2383        c < 256 &&
2384 #endif
2385        (md->ctypes[c] & ctype_space) != 0
2386        )
2387       RRETURN(MATCH_NOMATCH);
2388     ecode++;
2389     break;
2390 
2391     case OP_WHITESPACE:
2392     if (eptr >= md->end_subject)
2393       {
2394       SCHECK_PARTIAL();
2395       RRETURN(MATCH_NOMATCH);
2396       }
2397     GETCHARINCTEST(c, eptr);
2398     if (
2399 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2400        c > 255 ||
2401 #endif
2402        (md->ctypes[c] & ctype_space) == 0
2403        )
2404       RRETURN(MATCH_NOMATCH);
2405     ecode++;
2406     break;
2407 
2408     case OP_NOT_WORDCHAR:
2409     if (eptr >= md->end_subject)
2410       {
2411       SCHECK_PARTIAL();
2412       RRETURN(MATCH_NOMATCH);
2413       }
2414     GETCHARINCTEST(c, eptr);
2415     if (
2416 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2417        c < 256 &&
2418 #endif
2419        (md->ctypes[c] & ctype_word) != 0
2420        )
2421       RRETURN(MATCH_NOMATCH);
2422     ecode++;
2423     break;
2424 
2425     case OP_WORDCHAR:
2426     if (eptr >= md->end_subject)
2427       {
2428       SCHECK_PARTIAL();
2429       RRETURN(MATCH_NOMATCH);
2430       }
2431     GETCHARINCTEST(c, eptr);
2432     if (
2433 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2434        c > 255 ||
2435 #endif
2436        (md->ctypes[c] & ctype_word) == 0
2437        )
2438       RRETURN(MATCH_NOMATCH);
2439     ecode++;
2440     break;
2441 
2442     case OP_ANYNL:
2443     if (eptr >= md->end_subject)
2444       {
2445       SCHECK_PARTIAL();
2446       RRETURN(MATCH_NOMATCH);
2447       }
2448     GETCHARINCTEST(c, eptr);
2449     switch(c)
2450       {
2451       default: RRETURN(MATCH_NOMATCH);
2452 
2453       case CHAR_CR:
2454       if (eptr >= md->end_subject)
2455         {
2456         SCHECK_PARTIAL();
2457         }
2458       else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
2459       break;
2460 
2461       case CHAR_LF:
2462       break;
2463 
2464       case CHAR_VT:
2465       case CHAR_FF:
2466       case CHAR_NEL:
2467 #ifndef EBCDIC
2468       case 0x2028:
2469       case 0x2029:
2470 #endif  /* Not EBCDIC */
2471       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2472       break;
2473       }
2474     ecode++;
2475     break;
2476 
2477     case OP_NOT_HSPACE:
2478     if (eptr >= md->end_subject)
2479       {
2480       SCHECK_PARTIAL();
2481       RRETURN(MATCH_NOMATCH);
2482       }
2483     GETCHARINCTEST(c, eptr);
2484     switch(c)
2485       {
2486       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2487       default: break;
2488       }
2489     ecode++;
2490     break;
2491 
2492     case OP_HSPACE:
2493     if (eptr >= md->end_subject)
2494       {
2495       SCHECK_PARTIAL();
2496       RRETURN(MATCH_NOMATCH);
2497       }
2498     GETCHARINCTEST(c, eptr);
2499     switch(c)
2500       {
2501       HSPACE_CASES: break;  /* Byte and multibyte cases */
2502       default: RRETURN(MATCH_NOMATCH);
2503       }
2504     ecode++;
2505     break;
2506 
2507     case OP_NOT_VSPACE:
2508     if (eptr >= md->end_subject)
2509       {
2510       SCHECK_PARTIAL();
2511       RRETURN(MATCH_NOMATCH);
2512       }
2513     GETCHARINCTEST(c, eptr);
2514     switch(c)
2515       {
2516       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2517       default: break;
2518       }
2519     ecode++;
2520     break;
2521 
2522     case OP_VSPACE:
2523     if (eptr >= md->end_subject)
2524       {
2525       SCHECK_PARTIAL();
2526       RRETURN(MATCH_NOMATCH);
2527       }
2528     GETCHARINCTEST(c, eptr);
2529     switch(c)
2530       {
2531       VSPACE_CASES: break;
2532       default: RRETURN(MATCH_NOMATCH);
2533       }
2534     ecode++;
2535     break;
2536 
2537 #ifdef SUPPORT_UCP
2538     /* Check the next character by Unicode property. We will get here only
2539     if the support is in the binary; otherwise a compile-time error occurs. */
2540 
2541     case OP_PROP:
2542     case OP_NOTPROP:
2543     if (eptr >= md->end_subject)
2544       {
2545       SCHECK_PARTIAL();
2546       RRETURN(MATCH_NOMATCH);
2547       }
2548     GETCHARINCTEST(c, eptr);
2549       {
2550       const pcre_uint32 *cp;
2551       const ucd_record *prop = GET_UCD(c);
2552 
2553       switch(ecode[1])
2554         {
2555         case PT_ANY:
2556         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2557         break;
2558 
2559         case PT_LAMP:
2560         if ((prop->chartype == ucp_Lu ||
2561              prop->chartype == ucp_Ll ||
2562              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2563           RRETURN(MATCH_NOMATCH);
2564         break;
2565 
2566         case PT_GC:
2567         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2568           RRETURN(MATCH_NOMATCH);
2569         break;
2570 
2571         case PT_PC:
2572         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2573           RRETURN(MATCH_NOMATCH);
2574         break;
2575 
2576         case PT_SC:
2577         if ((ecode[2] != prop->script) == (op == OP_PROP))
2578           RRETURN(MATCH_NOMATCH);
2579         break;
2580 
2581         /* These are specials */
2582 
2583         case PT_ALNUM:
2584         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2585              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2586           RRETURN(MATCH_NOMATCH);
2587         break;
2588 
2589         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2590         which means that Perl space and POSIX space are now identical. PCRE
2591         was changed at release 8.34. */
2592 
2593         case PT_SPACE:    /* Perl space */
2594         case PT_PXSPACE:  /* POSIX space */
2595         switch(c)
2596           {
2597           HSPACE_CASES:
2598           VSPACE_CASES:
2599           if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2600           break;
2601 
2602           default:
2603           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2604             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2605           break;
2606           }
2607         break;
2608 
2609         case PT_WORD:
2610         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2611              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2612              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2613           RRETURN(MATCH_NOMATCH);
2614         break;
2615 
2616         case PT_CLIST:
2617         cp = PRIV(ucd_caseless_sets) + ecode[2];
2618         for (;;)
2619           {
2620           if (c < *cp)
2621             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2622           if (c == *cp++)
2623             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2624           }
2625         break;
2626 
2627         case PT_UCNC:
2628         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2629              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2630              c >= 0xe000) == (op == OP_NOTPROP))
2631           RRETURN(MATCH_NOMATCH);
2632         break;
2633 
2634         /* This should never occur */
2635 
2636         default:
2637         RRETURN(PCRE_ERROR_INTERNAL);
2638         }
2639 
2640       ecode += 3;
2641       }
2642     break;
2643 
2644     /* Match an extended Unicode sequence. We will get here only if the support
2645     is in the binary; otherwise a compile-time error occurs. */
2646 
2647     case OP_EXTUNI:
2648     if (eptr >= md->end_subject)
2649       {
2650       SCHECK_PARTIAL();
2651       RRETURN(MATCH_NOMATCH);
2652       }
2653     else
2654       {
2655       int lgb, rgb;
2656       GETCHARINCTEST(c, eptr);
2657       lgb = UCD_GRAPHBREAK(c);
2658       while (eptr < md->end_subject)
2659         {
2660         int len = 1;
2661         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2662         rgb = UCD_GRAPHBREAK(c);
2663         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2664         lgb = rgb;
2665         eptr += len;
2666         }
2667       }
2668     CHECK_PARTIAL();
2669     ecode++;
2670     break;
2671 #endif  /* SUPPORT_UCP */
2672 
2673 
2674     /* Match a back reference, possibly repeatedly. Look past the end of the
2675     item to see if there is repeat information following. The code is similar
2676     to that for character classes, but repeated for efficiency. Then obey
2677     similar code to character type repeats - written out again for speed.
2678     However, if the referenced string is the empty string, always treat
2679     it as matched, any number of times (otherwise there could be infinite
2680     loops). If the reference is unset, there are two possibilities:
2681 
2682     (a) In the default, Perl-compatible state, set the length negative;
2683     this ensures that every attempt at a match fails. We can't just fail
2684     here, because of the possibility of quantifiers with zero minima.
2685 
2686     (b) If the JavaScript compatibility flag is set, set the length to zero
2687     so that the back reference matches an empty string.
2688 
2689     Otherwise, set the length to the length of what was matched by the
2690     referenced subpattern.
2691 
2692     The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2693     or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2694     and OP_DNREFI are used. In this case we must scan the list of groups to
2695     which the name refers, and use the first one that is set. */
2696 
2697     case OP_DNREF:
2698     case OP_DNREFI:
2699     caseless = op == OP_DNREFI;
2700       {
2701       int count = GET2(ecode, 1+IMM2_SIZE);
2702       pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
2703       ecode += 1 + 2*IMM2_SIZE;
2704 
2705       /* Setting the default length first and initializing 'offset' avoids
2706       compiler warnings in the REF_REPEAT code. */
2707 
2708       length = (md->jscript_compat)? 0 : -1;
2709       offset = 0;
2710 
2711       while (count-- > 0)
2712         {
2713         offset = GET2(slot, 0) << 1;
2714         if (offset < offset_top && md->offset_vector[offset] >= 0)
2715           {
2716           length = md->offset_vector[offset+1] - md->offset_vector[offset];
2717           break;
2718           }
2719         slot += md->name_entry_size;
2720         }
2721       }
2722     goto REF_REPEAT;
2723 
2724     case OP_REF:
2725     case OP_REFI:
2726     caseless = op == OP_REFI;
2727     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2728     ecode += 1 + IMM2_SIZE;
2729     if (offset >= offset_top || md->offset_vector[offset] < 0)
2730       length = (md->jscript_compat)? 0 : -1;
2731     else
2732       length = md->offset_vector[offset+1] - md->offset_vector[offset];
2733 
2734     /* Set up for repetition, or handle the non-repeated case */
2735 
2736     REF_REPEAT:
2737     switch (*ecode)
2738       {
2739       case OP_CRSTAR:
2740       case OP_CRMINSTAR:
2741       case OP_CRPLUS:
2742       case OP_CRMINPLUS:
2743       case OP_CRQUERY:
2744       case OP_CRMINQUERY:
2745       c = *ecode++ - OP_CRSTAR;
2746       minimize = (c & 1) != 0;
2747       min = rep_min[c];                 /* Pick up values from tables; */
2748       max = rep_max[c];                 /* zero for max => infinity */
2749       if (max == 0) max = INT_MAX;
2750       break;
2751 
2752       case OP_CRRANGE:
2753       case OP_CRMINRANGE:
2754       minimize = (*ecode == OP_CRMINRANGE);
2755       min = GET2(ecode, 1);
2756       max = GET2(ecode, 1 + IMM2_SIZE);
2757       if (max == 0) max = INT_MAX;
2758       ecode += 1 + 2 * IMM2_SIZE;
2759       break;
2760 
2761       default:               /* No repeat follows */
2762       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2763         {
2764         if (length == -2) eptr = md->end_subject;   /* Partial match */
2765         CHECK_PARTIAL();
2766         RRETURN(MATCH_NOMATCH);
2767         }
2768       eptr += length;
2769       continue;              /* With the main loop */
2770       }
2771 
2772     /* Handle repeated back references. If the length of the reference is
2773     zero, just continue with the main loop. If the length is negative, it
2774     means the reference is unset in non-Java-compatible mode. If the minimum is
2775     zero, we can continue at the same level without recursion. For any other
2776     minimum, carrying on will result in NOMATCH. */
2777 
2778     if (length == 0) continue;
2779     if (length < 0 && min == 0) continue;
2780 
2781     /* First, ensure the minimum number of matches are present. We get back
2782     the length of the reference string explicitly rather than passing the
2783     address of eptr, so that eptr can be a register variable. */
2784 
2785     for (i = 1; i <= min; i++)
2786       {
2787       int slength;
2788       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2789         {
2790         if (slength == -2) eptr = md->end_subject;   /* Partial match */
2791         CHECK_PARTIAL();
2792         RRETURN(MATCH_NOMATCH);
2793         }
2794       eptr += slength;
2795       }
2796 
2797     /* If min = max, continue at the same level without recursion.
2798     They are not both allowed to be zero. */
2799 
2800     if (min == max) continue;
2801 
2802     /* If minimizing, keep trying and advancing the pointer */
2803 
2804     if (minimize)
2805       {
2806       for (fi = min;; fi++)
2807         {
2808         int slength;
2809         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2810         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2811         if (fi >= max) RRETURN(MATCH_NOMATCH);
2812         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2813           {
2814           if (slength == -2) eptr = md->end_subject;   /* Partial match */
2815           CHECK_PARTIAL();
2816           RRETURN(MATCH_NOMATCH);
2817           }
2818         eptr += slength;
2819         }
2820       /* Control never gets here */
2821       }
2822 
2823     /* If maximizing, find the longest string and work backwards */
2824 
2825     else
2826       {
2827       pp = eptr;
2828       for (i = min; i < max; i++)
2829         {
2830         int slength;
2831         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2832           {
2833           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2834           the soft partial matching case. */
2835 
2836           if (slength == -2 && md->partial != 0 &&
2837               md->end_subject > md->start_used_ptr)
2838             {
2839             md->hitend = TRUE;
2840             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2841             }
2842           break;
2843           }
2844         eptr += slength;
2845         }
2846 
2847       while (eptr >= pp)
2848         {
2849         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2850         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2851         eptr -= length;
2852         }
2853       RRETURN(MATCH_NOMATCH);
2854       }
2855     /* Control never gets here */
2856 
2857     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2858     used when all the characters in the class have values in the range 0-255,
2859     and either the matching is caseful, or the characters are in the range
2860     0-127 when UTF-8 processing is enabled. The only difference between
2861     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2862     encountered.
2863 
2864     First, look past the end of the item to see if there is repeat information
2865     following. Then obey similar code to character type repeats - written out
2866     again for speed. */
2867 
2868     case OP_NCLASS:
2869     case OP_CLASS:
2870       {
2871       /* The data variable is saved across frames, so the byte map needs to
2872       be stored there. */
2873 #define BYTE_MAP ((pcre_uint8 *)data)
2874       data = ecode + 1;                /* Save for matching */
2875       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2876 
2877       switch (*ecode)
2878         {
2879         case OP_CRSTAR:
2880         case OP_CRMINSTAR:
2881         case OP_CRPLUS:
2882         case OP_CRMINPLUS:
2883         case OP_CRQUERY:
2884         case OP_CRMINQUERY:
2885         case OP_CRPOSSTAR:
2886         case OP_CRPOSPLUS:
2887         case OP_CRPOSQUERY:
2888         c = *ecode++ - OP_CRSTAR;
2889         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
2890         else possessive = TRUE;
2891         min = rep_min[c];                 /* Pick up values from tables; */
2892         max = rep_max[c];                 /* zero for max => infinity */
2893         if (max == 0) max = INT_MAX;
2894         break;
2895 
2896         case OP_CRRANGE:
2897         case OP_CRMINRANGE:
2898         case OP_CRPOSRANGE:
2899         minimize = (*ecode == OP_CRMINRANGE);
2900         possessive = (*ecode == OP_CRPOSRANGE);
2901         min = GET2(ecode, 1);
2902         max = GET2(ecode, 1 + IMM2_SIZE);
2903         if (max == 0) max = INT_MAX;
2904         ecode += 1 + 2 * IMM2_SIZE;
2905         break;
2906 
2907         default:               /* No repeat follows */
2908         min = max = 1;
2909         break;
2910         }
2911 
2912       /* First, ensure the minimum number of matches are present. */
2913 
2914 #ifdef SUPPORT_UTF
2915       if (utf)
2916         {
2917         for (i = 1; i <= min; i++)
2918           {
2919           if (eptr >= md->end_subject)
2920             {
2921             SCHECK_PARTIAL();
2922             RRETURN(MATCH_NOMATCH);
2923             }
2924           GETCHARINC(c, eptr);
2925           if (c > 255)
2926             {
2927             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2928             }
2929           else
2930             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2931           }
2932         }
2933       else
2934 #endif
2935       /* Not UTF mode */
2936         {
2937         for (i = 1; i <= min; i++)
2938           {
2939           if (eptr >= md->end_subject)
2940             {
2941             SCHECK_PARTIAL();
2942             RRETURN(MATCH_NOMATCH);
2943             }
2944           c = *eptr++;
2945 #ifndef COMPILE_PCRE8
2946           if (c > 255)
2947             {
2948             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2949             }
2950           else
2951 #endif
2952             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2953           }
2954         }
2955 
2956       /* If max == min we can continue with the main loop without the
2957       need to recurse. */
2958 
2959       if (min == max) continue;
2960 
2961       /* If minimizing, keep testing the rest of the expression and advancing
2962       the pointer while it matches the class. */
2963 
2964       if (minimize)
2965         {
2966 #ifdef SUPPORT_UTF
2967         if (utf)
2968           {
2969           for (fi = min;; fi++)
2970             {
2971             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2972             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2973             if (fi >= max) RRETURN(MATCH_NOMATCH);
2974             if (eptr >= md->end_subject)
2975               {
2976               SCHECK_PARTIAL();
2977               RRETURN(MATCH_NOMATCH);
2978               }
2979             GETCHARINC(c, eptr);
2980             if (c > 255)
2981               {
2982               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2983               }
2984             else
2985               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2986             }
2987           }
2988         else
2989 #endif
2990         /* Not UTF mode */
2991           {
2992           for (fi = min;; fi++)
2993             {
2994             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2995             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2996             if (fi >= max) RRETURN(MATCH_NOMATCH);
2997             if (eptr >= md->end_subject)
2998               {
2999               SCHECK_PARTIAL();
3000               RRETURN(MATCH_NOMATCH);
3001               }
3002             c = *eptr++;
3003 #ifndef COMPILE_PCRE8
3004             if (c > 255)
3005               {
3006               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3007               }
3008             else
3009 #endif
3010               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3011             }
3012           }
3013         /* Control never gets here */
3014         }
3015 
3016       /* If maximizing, find the longest possible run, then work backwards. */
3017 
3018       else
3019         {
3020         pp = eptr;
3021 
3022 #ifdef SUPPORT_UTF
3023         if (utf)
3024           {
3025           for (i = min; i < max; i++)
3026             {
3027             int len = 1;
3028             if (eptr >= md->end_subject)
3029               {
3030               SCHECK_PARTIAL();
3031               break;
3032               }
3033             GETCHARLEN(c, eptr, len);
3034             if (c > 255)
3035               {
3036               if (op == OP_CLASS) break;
3037               }
3038             else
3039               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3040             eptr += len;
3041             }
3042 
3043           if (possessive) continue;    /* No backtracking */
3044 
3045           for (;;)
3046             {
3047             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
3048             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3049             if (eptr-- == pp) break;        /* Stop if tried at original pos */
3050             BACKCHAR(eptr);
3051             }
3052           }
3053         else
3054 #endif
3055           /* Not UTF mode */
3056           {
3057           for (i = min; i < max; i++)
3058             {
3059             if (eptr >= md->end_subject)
3060               {
3061               SCHECK_PARTIAL();
3062               break;
3063               }
3064             c = *eptr;
3065 #ifndef COMPILE_PCRE8
3066             if (c > 255)
3067               {
3068               if (op == OP_CLASS) break;
3069               }
3070             else
3071 #endif
3072               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3073             eptr++;
3074             }
3075 
3076           if (possessive) continue;    /* No backtracking */
3077 
3078           while (eptr >= pp)
3079             {
3080             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3081             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3082             eptr--;
3083             }
3084           }
3085 
3086         RRETURN(MATCH_NOMATCH);
3087         }
3088 #undef BYTE_MAP
3089       }
3090     /* Control never gets here */
3091 
3092 
3093     /* Match an extended character class. In the 8-bit library, this opcode is
3094     encountered only when UTF-8 mode mode is supported. In the 16-bit and
3095     32-bit libraries, codepoints greater than 255 may be encountered even when
3096     UTF is not supported. */
3097 
3098 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3099     case OP_XCLASS:
3100       {
3101       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3102       ecode += GET(ecode, 1);                      /* Advance past the item */
3103 
3104       switch (*ecode)
3105         {
3106         case OP_CRSTAR:
3107         case OP_CRMINSTAR:
3108         case OP_CRPLUS:
3109         case OP_CRMINPLUS:
3110         case OP_CRQUERY:
3111         case OP_CRMINQUERY:
3112         case OP_CRPOSSTAR:
3113         case OP_CRPOSPLUS:
3114         case OP_CRPOSQUERY:
3115         c = *ecode++ - OP_CRSTAR;
3116         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3117         else possessive = TRUE;
3118         min = rep_min[c];                 /* Pick up values from tables; */
3119         max = rep_max[c];                 /* zero for max => infinity */
3120         if (max == 0) max = INT_MAX;
3121         break;
3122 
3123         case OP_CRRANGE:
3124         case OP_CRMINRANGE:
3125         case OP_CRPOSRANGE:
3126         minimize = (*ecode == OP_CRMINRANGE);
3127         possessive = (*ecode == OP_CRPOSRANGE);
3128         min = GET2(ecode, 1);
3129         max = GET2(ecode, 1 + IMM2_SIZE);
3130         if (max == 0) max = INT_MAX;
3131         ecode += 1 + 2 * IMM2_SIZE;
3132         break;
3133 
3134         default:               /* No repeat follows */
3135         min = max = 1;
3136         break;
3137         }
3138 
3139       /* First, ensure the minimum number of matches are present. */
3140 
3141       for (i = 1; i <= min; i++)
3142         {
3143         if (eptr >= md->end_subject)
3144           {
3145           SCHECK_PARTIAL();
3146           RRETURN(MATCH_NOMATCH);
3147           }
3148         GETCHARINCTEST(c, eptr);
3149         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3150         }
3151 
3152       /* If max == min we can continue with the main loop without the
3153       need to recurse. */
3154 
3155       if (min == max) continue;
3156 
3157       /* If minimizing, keep testing the rest of the expression and advancing
3158       the pointer while it matches the class. */
3159 
3160       if (minimize)
3161         {
3162         for (fi = min;; fi++)
3163           {
3164           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3165           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3166           if (fi >= max) RRETURN(MATCH_NOMATCH);
3167           if (eptr >= md->end_subject)
3168             {
3169             SCHECK_PARTIAL();
3170             RRETURN(MATCH_NOMATCH);
3171             }
3172           GETCHARINCTEST(c, eptr);
3173           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3174           }
3175         /* Control never gets here */
3176         }
3177 
3178       /* If maximizing, find the longest possible run, then work backwards. */
3179 
3180       else
3181         {
3182         pp = eptr;
3183         for (i = min; i < max; i++)
3184           {
3185           int len = 1;
3186           if (eptr >= md->end_subject)
3187             {
3188             SCHECK_PARTIAL();
3189             break;
3190             }
3191 #ifdef SUPPORT_UTF
3192           GETCHARLENTEST(c, eptr, len);
3193 #else
3194           c = *eptr;
3195 #endif
3196           if (!PRIV(xclass)(c, data, utf)) break;
3197           eptr += len;
3198           }
3199 
3200         if (possessive) continue;    /* No backtracking */
3201 
3202         for(;;)
3203           {
3204           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3205           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3206           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3207 #ifdef SUPPORT_UTF
3208           if (utf) BACKCHAR(eptr);
3209 #endif
3210           }
3211         RRETURN(MATCH_NOMATCH);
3212         }
3213 
3214       /* Control never gets here */
3215       }
3216 #endif    /* End of XCLASS */
3217 
3218     /* Match a single character, casefully */
3219 
3220     case OP_CHAR:
3221 #ifdef SUPPORT_UTF
3222     if (utf)
3223       {
3224       length = 1;
3225       ecode++;
3226       GETCHARLEN(fc, ecode, length);
3227       if (length > md->end_subject - eptr)
3228         {
3229         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3230         RRETURN(MATCH_NOMATCH);
3231         }
3232       while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
3233       }
3234     else
3235 #endif
3236     /* Not UTF mode */
3237       {
3238       if (md->end_subject - eptr < 1)
3239         {
3240         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3241         RRETURN(MATCH_NOMATCH);
3242         }
3243       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3244       ecode += 2;
3245       }
3246     break;
3247 
3248     /* Match a single character, caselessly. If we are at the end of the
3249     subject, give up immediately. */
3250 
3251     case OP_CHARI:
3252     if (eptr >= md->end_subject)
3253       {
3254       SCHECK_PARTIAL();
3255       RRETURN(MATCH_NOMATCH);
3256       }
3257 
3258 #ifdef SUPPORT_UTF
3259     if (utf)
3260       {
3261       length = 1;
3262       ecode++;
3263       GETCHARLEN(fc, ecode, length);
3264 
3265       /* If the pattern character's value is < 128, we have only one byte, and
3266       we know that its other case must also be one byte long, so we can use the
3267       fast lookup table. We know that there is at least one byte left in the
3268       subject. */
3269 
3270       if (fc < 128)
3271         {
3272         pcre_uint32 cc = UCHAR21(eptr);
3273         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3274         ecode++;
3275         eptr++;
3276         }
3277 
3278       /* Otherwise we must pick up the subject character. Note that we cannot
3279       use the value of "length" to check for sufficient bytes left, because the
3280       other case of the character may have more or fewer bytes.  */
3281 
3282       else
3283         {
3284         pcre_uint32 dc;
3285         GETCHARINC(dc, eptr);
3286         ecode += length;
3287 
3288         /* If we have Unicode property support, we can use it to test the other
3289         case of the character, if there is one. */
3290 
3291         if (fc != dc)
3292           {
3293 #ifdef SUPPORT_UCP
3294           if (dc != UCD_OTHERCASE(fc))
3295 #endif
3296             RRETURN(MATCH_NOMATCH);
3297           }
3298         }
3299       }
3300     else
3301 #endif   /* SUPPORT_UTF */
3302 
3303     /* Not UTF mode */
3304       {
3305       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3306           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3307       eptr++;
3308       ecode += 2;
3309       }
3310     break;
3311 
3312     /* Match a single character repeatedly. */
3313 
3314     case OP_EXACT:
3315     case OP_EXACTI:
3316     min = max = GET2(ecode, 1);
3317     ecode += 1 + IMM2_SIZE;
3318     goto REPEATCHAR;
3319 
3320     case OP_POSUPTO:
3321     case OP_POSUPTOI:
3322     possessive = TRUE;
3323     /* Fall through */
3324 
3325     case OP_UPTO:
3326     case OP_UPTOI:
3327     case OP_MINUPTO:
3328     case OP_MINUPTOI:
3329     min = 0;
3330     max = GET2(ecode, 1);
3331     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3332     ecode += 1 + IMM2_SIZE;
3333     goto REPEATCHAR;
3334 
3335     case OP_POSSTAR:
3336     case OP_POSSTARI:
3337     possessive = TRUE;
3338     min = 0;
3339     max = INT_MAX;
3340     ecode++;
3341     goto REPEATCHAR;
3342 
3343     case OP_POSPLUS:
3344     case OP_POSPLUSI:
3345     possessive = TRUE;
3346     min = 1;
3347     max = INT_MAX;
3348     ecode++;
3349     goto REPEATCHAR;
3350 
3351     case OP_POSQUERY:
3352     case OP_POSQUERYI:
3353     possessive = TRUE;
3354     min = 0;
3355     max = 1;
3356     ecode++;
3357     goto REPEATCHAR;
3358 
3359     case OP_STAR:
3360     case OP_STARI:
3361     case OP_MINSTAR:
3362     case OP_MINSTARI:
3363     case OP_PLUS:
3364     case OP_PLUSI:
3365     case OP_MINPLUS:
3366     case OP_MINPLUSI:
3367     case OP_QUERY:
3368     case OP_QUERYI:
3369     case OP_MINQUERY:
3370     case OP_MINQUERYI:
3371     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3372     minimize = (c & 1) != 0;
3373     min = rep_min[c];                 /* Pick up values from tables; */
3374     max = rep_max[c];                 /* zero for max => infinity */
3375     if (max == 0) max = INT_MAX;
3376 
3377     /* Common code for all repeated single-character matches. We first check
3378     for the minimum number of characters. If the minimum equals the maximum, we
3379     are done. Otherwise, if minimizing, check the rest of the pattern for a
3380     match; if there isn't one, advance up to the maximum, one character at a
3381     time.
3382 
3383     If maximizing, advance up to the maximum number of matching characters,
3384     until eptr is past the end of the maximum run. If possessive, we are
3385     then done (no backing up). Otherwise, match at this position; anything
3386     other than no match is immediately returned. For nomatch, back up one
3387     character, unless we are matching \R and the last thing matched was
3388     \r\n, in which case, back up two bytes. When we reach the first optional
3389     character position, we can save stack by doing a tail recurse.
3390 
3391     The various UTF/non-UTF and caseful/caseless cases are handled separately,
3392     for speed. */
3393 
3394     REPEATCHAR:
3395 #ifdef SUPPORT_UTF
3396     if (utf)
3397       {
3398       length = 1;
3399       charptr = ecode;
3400       GETCHARLEN(fc, ecode, length);
3401       ecode += length;
3402 
3403       /* Handle multibyte character matching specially here. There is
3404       support for caseless matching if UCP support is present. */
3405 
3406       if (length > 1)
3407         {
3408 #ifdef SUPPORT_UCP
3409         pcre_uint32 othercase;
3410         if (op >= OP_STARI &&     /* Caseless */
3411             (othercase = UCD_OTHERCASE(fc)) != fc)
3412           oclength = PRIV(ord2utf)(othercase, occhars);
3413         else oclength = 0;
3414 #endif  /* SUPPORT_UCP */
3415 
3416         for (i = 1; i <= min; i++)
3417           {
3418           if (eptr <= md->end_subject - length &&
3419             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3420 #ifdef SUPPORT_UCP
3421           else if (oclength > 0 &&
3422                    eptr <= md->end_subject - oclength &&
3423                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3424 #endif  /* SUPPORT_UCP */
3425           else
3426             {
3427             CHECK_PARTIAL();
3428             RRETURN(MATCH_NOMATCH);
3429             }
3430           }
3431 
3432         if (min == max) continue;
3433 
3434         if (minimize)
3435           {
3436           for (fi = min;; fi++)
3437             {
3438             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3439             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3440             if (fi >= max) RRETURN(MATCH_NOMATCH);
3441             if (eptr <= md->end_subject - length &&
3442               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3443 #ifdef SUPPORT_UCP
3444             else if (oclength > 0 &&
3445                      eptr <= md->end_subject - oclength &&
3446                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3447 #endif  /* SUPPORT_UCP */
3448             else
3449               {
3450               CHECK_PARTIAL();
3451               RRETURN(MATCH_NOMATCH);
3452               }
3453             }
3454           /* Control never gets here */
3455           }
3456 
3457         else  /* Maximize */
3458           {
3459           pp = eptr;
3460           for (i = min; i < max; i++)
3461             {
3462             if (eptr <= md->end_subject - length &&
3463                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3464 #ifdef SUPPORT_UCP
3465             else if (oclength > 0 &&
3466                      eptr <= md->end_subject - oclength &&
3467                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3468 #endif  /* SUPPORT_UCP */
3469             else
3470               {
3471               CHECK_PARTIAL();
3472               break;
3473               }
3474             }
3475 
3476           if (possessive) continue;    /* No backtracking */
3477           for(;;)
3478             {
3479             if (eptr == pp) goto TAIL_RECURSE;
3480             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3481             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3482 #ifdef SUPPORT_UCP
3483             eptr--;
3484             BACKCHAR(eptr);
3485 #else   /* without SUPPORT_UCP */
3486             eptr -= length;
3487 #endif  /* SUPPORT_UCP */
3488             }
3489           }
3490         /* Control never gets here */
3491         }
3492 
3493       /* If the length of a UTF-8 character is 1, we fall through here, and
3494       obey the code as for non-UTF-8 characters below, though in this case the
3495       value of fc will always be < 128. */
3496       }
3497     else
3498 #endif  /* SUPPORT_UTF */
3499       /* When not in UTF-8 mode, load a single-byte character. */
3500       fc = *ecode++;
3501 
3502     /* The value of fc at this point is always one character, though we may
3503     or may not be in UTF mode. The code is duplicated for the caseless and
3504     caseful cases, for speed, since matching characters is likely to be quite
3505     common. First, ensure the minimum number of matches are present. If min =
3506     max, continue at the same level without recursing. Otherwise, if
3507     minimizing, keep trying the rest of the expression and advancing one
3508     matching character if failing, up to the maximum. Alternatively, if
3509     maximizing, find the maximum number of characters and work backwards. */
3510 
3511     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3512       max, (char *)eptr));
3513 
3514     if (op >= OP_STARI)  /* Caseless */
3515       {
3516 #ifdef COMPILE_PCRE8
3517       /* fc must be < 128 if UTF is enabled. */
3518       foc = md->fcc[fc];
3519 #else
3520 #ifdef SUPPORT_UTF
3521 #ifdef SUPPORT_UCP
3522       if (utf && fc > 127)
3523         foc = UCD_OTHERCASE(fc);
3524 #else
3525       if (utf && fc > 127)
3526         foc = fc;
3527 #endif /* SUPPORT_UCP */
3528       else
3529 #endif /* SUPPORT_UTF */
3530         foc = TABLE_GET(fc, md->fcc, fc);
3531 #endif /* COMPILE_PCRE8 */
3532 
3533       for (i = 1; i <= min; i++)
3534         {
3535         pcre_uint32 cc;                 /* Faster than pcre_uchar */
3536         if (eptr >= md->end_subject)
3537           {
3538           SCHECK_PARTIAL();
3539           RRETURN(MATCH_NOMATCH);
3540           }
3541         cc = UCHAR21TEST(eptr);
3542         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3543         eptr++;
3544         }
3545       if (min == max) continue;
3546       if (minimize)
3547         {
3548         for (fi = min;; fi++)
3549           {
3550           pcre_uint32 cc;               /* Faster than pcre_uchar */
3551           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3552           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3553           if (fi >= max) RRETURN(MATCH_NOMATCH);
3554           if (eptr >= md->end_subject)
3555             {
3556             SCHECK_PARTIAL();
3557             RRETURN(MATCH_NOMATCH);
3558             }
3559           cc = UCHAR21TEST(eptr);
3560           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3561           eptr++;
3562           }
3563         /* Control never gets here */
3564         }
3565       else  /* Maximize */
3566         {
3567         pp = eptr;
3568         for (i = min; i < max; i++)
3569           {
3570           pcre_uint32 cc;               /* Faster than pcre_uchar */
3571           if (eptr >= md->end_subject)
3572             {
3573             SCHECK_PARTIAL();
3574             break;
3575             }
3576           cc = UCHAR21TEST(eptr);
3577           if (fc != cc && foc != cc) break;
3578           eptr++;
3579           }
3580         if (possessive) continue;       /* No backtracking */
3581         for (;;)
3582           {
3583           if (eptr == pp) goto TAIL_RECURSE;
3584           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3585           eptr--;
3586           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3587           }
3588         /* Control never gets here */
3589         }
3590       }
3591 
3592     /* Caseful comparisons (includes all multi-byte characters) */
3593 
3594     else
3595       {
3596       for (i = 1; i <= min; i++)
3597         {
3598         if (eptr >= md->end_subject)
3599           {
3600           SCHECK_PARTIAL();
3601           RRETURN(MATCH_NOMATCH);
3602           }
3603         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3604         }
3605 
3606       if (min == max) continue;
3607 
3608       if (minimize)
3609         {
3610         for (fi = min;; fi++)
3611           {
3612           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3613           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3614           if (fi >= max) RRETURN(MATCH_NOMATCH);
3615           if (eptr >= md->end_subject)
3616             {
3617             SCHECK_PARTIAL();
3618             RRETURN(MATCH_NOMATCH);
3619             }
3620           if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3621           }
3622         /* Control never gets here */
3623         }
3624       else  /* Maximize */
3625         {
3626         pp = eptr;
3627         for (i = min; i < max; i++)
3628           {
3629           if (eptr >= md->end_subject)
3630             {
3631             SCHECK_PARTIAL();
3632             break;
3633             }
3634           if (fc != UCHAR21TEST(eptr)) break;
3635           eptr++;
3636           }
3637         if (possessive) continue;    /* No backtracking */
3638         for (;;)
3639           {
3640           if (eptr == pp) goto TAIL_RECURSE;
3641           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3642           eptr--;
3643           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3644           }
3645         /* Control never gets here */
3646         }
3647       }
3648     /* Control never gets here */
3649 
3650     /* Match a negated single one-byte character. The character we are
3651     checking can be multibyte. */
3652 
3653     case OP_NOT:
3654     case OP_NOTI:
3655     if (eptr >= md->end_subject)
3656       {
3657       SCHECK_PARTIAL();
3658       RRETURN(MATCH_NOMATCH);
3659       }
3660 #ifdef SUPPORT_UTF
3661     if (utf)
3662       {
3663       register pcre_uint32 ch, och;
3664 
3665       ecode++;
3666       GETCHARINC(ch, ecode);
3667       GETCHARINC(c, eptr);
3668 
3669       if (op == OP_NOT)
3670         {
3671         if (ch == c) RRETURN(MATCH_NOMATCH);
3672         }
3673       else
3674         {
3675 #ifdef SUPPORT_UCP
3676         if (ch > 127)
3677           och = UCD_OTHERCASE(ch);
3678 #else
3679         if (ch > 127)
3680           och = ch;
3681 #endif /* SUPPORT_UCP */
3682         else
3683           och = TABLE_GET(ch, md->fcc, ch);
3684         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3685         }
3686       }
3687     else
3688 #endif
3689       {
3690       register pcre_uint32 ch = ecode[1];
3691       c = *eptr++;
3692       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3693         RRETURN(MATCH_NOMATCH);
3694       ecode += 2;
3695       }
3696     break;
3697 
3698     /* Match a negated single one-byte character repeatedly. This is almost a
3699     repeat of the code for a repeated single character, but I haven't found a
3700     nice way of commoning these up that doesn't require a test of the
3701     positive/negative option for each character match. Maybe that wouldn't add
3702     very much to the time taken, but character matching *is* what this is all
3703     about... */
3704 
3705     case OP_NOTEXACT:
3706     case OP_NOTEXACTI:
3707     min = max = GET2(ecode, 1);
3708     ecode += 1 + IMM2_SIZE;
3709     goto REPEATNOTCHAR;
3710 
3711     case OP_NOTUPTO:
3712     case OP_NOTUPTOI:
3713     case OP_NOTMINUPTO:
3714     case OP_NOTMINUPTOI:
3715     min = 0;
3716     max = GET2(ecode, 1);
3717     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3718     ecode += 1 + IMM2_SIZE;
3719     goto REPEATNOTCHAR;
3720 
3721     case OP_NOTPOSSTAR:
3722     case OP_NOTPOSSTARI:
3723     possessive = TRUE;
3724     min = 0;
3725     max = INT_MAX;
3726     ecode++;
3727     goto REPEATNOTCHAR;
3728 
3729     case OP_NOTPOSPLUS:
3730     case OP_NOTPOSPLUSI:
3731     possessive = TRUE;
3732     min = 1;
3733     max = INT_MAX;
3734     ecode++;
3735     goto REPEATNOTCHAR;
3736 
3737     case OP_NOTPOSQUERY:
3738     case OP_NOTPOSQUERYI:
3739     possessive = TRUE;
3740     min = 0;
3741     max = 1;
3742     ecode++;
3743     goto REPEATNOTCHAR;
3744 
3745     case OP_NOTPOSUPTO:
3746     case OP_NOTPOSUPTOI:
3747     possessive = TRUE;
3748     min = 0;
3749     max = GET2(ecode, 1);
3750     ecode += 1 + IMM2_SIZE;
3751     goto REPEATNOTCHAR;
3752 
3753     case OP_NOTSTAR:
3754     case OP_NOTSTARI:
3755     case OP_NOTMINSTAR:
3756     case OP_NOTMINSTARI:
3757     case OP_NOTPLUS:
3758     case OP_NOTPLUSI:
3759     case OP_NOTMINPLUS:
3760     case OP_NOTMINPLUSI:
3761     case OP_NOTQUERY:
3762     case OP_NOTQUERYI:
3763     case OP_NOTMINQUERY:
3764     case OP_NOTMINQUERYI:
3765     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3766     minimize = (c & 1) != 0;
3767     min = rep_min[c];                 /* Pick up values from tables; */
3768     max = rep_max[c];                 /* zero for max => infinity */
3769     if (max == 0) max = INT_MAX;
3770 
3771     /* Common code for all repeated single-byte matches. */
3772 
3773     REPEATNOTCHAR:
3774     GETCHARINCTEST(fc, ecode);
3775 
3776     /* The code is duplicated for the caseless and caseful cases, for speed,
3777     since matching characters is likely to be quite common. First, ensure the
3778     minimum number of matches are present. If min = max, continue at the same
3779     level without recursing. Otherwise, if minimizing, keep trying the rest of
3780     the expression and advancing one matching character if failing, up to the
3781     maximum. Alternatively, if maximizing, find the maximum number of
3782     characters and work backwards. */
3783 
3784     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3785       max, (char *)eptr));
3786 
3787     if (op >= OP_NOTSTARI)     /* Caseless */
3788       {
3789 #ifdef SUPPORT_UTF
3790 #ifdef SUPPORT_UCP
3791       if (utf && fc > 127)
3792         foc = UCD_OTHERCASE(fc);
3793 #else
3794       if (utf && fc > 127)
3795         foc = fc;
3796 #endif /* SUPPORT_UCP */
3797       else
3798 #endif /* SUPPORT_UTF */
3799         foc = TABLE_GET(fc, md->fcc, fc);
3800 
3801 #ifdef SUPPORT_UTF
3802       if (utf)
3803         {
3804         register pcre_uint32 d;
3805         for (i = 1; i <= min; i++)
3806           {
3807           if (eptr >= md->end_subject)
3808             {
3809             SCHECK_PARTIAL();
3810             RRETURN(MATCH_NOMATCH);
3811             }
3812           GETCHARINC(d, eptr);
3813           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3814           }
3815         }
3816       else
3817 #endif  /* SUPPORT_UTF */
3818       /* Not UTF mode */
3819         {
3820         for (i = 1; i <= min; i++)
3821           {
3822           if (eptr >= md->end_subject)
3823             {
3824             SCHECK_PARTIAL();
3825             RRETURN(MATCH_NOMATCH);
3826             }
3827           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3828           eptr++;
3829           }
3830         }
3831 
3832       if (min == max) continue;
3833 
3834       if (minimize)
3835         {
3836 #ifdef SUPPORT_UTF
3837         if (utf)
3838           {
3839           register pcre_uint32 d;
3840           for (fi = min;; fi++)
3841             {
3842             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3843             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3844             if (fi >= max) RRETURN(MATCH_NOMATCH);
3845             if (eptr >= md->end_subject)
3846               {
3847               SCHECK_PARTIAL();
3848               RRETURN(MATCH_NOMATCH);
3849               }
3850             GETCHARINC(d, eptr);
3851             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3852             }
3853           }
3854         else
3855 #endif  /*SUPPORT_UTF */
3856         /* Not UTF mode */
3857           {
3858           for (fi = min;; fi++)
3859             {
3860             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3861             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3862             if (fi >= max) RRETURN(MATCH_NOMATCH);
3863             if (eptr >= md->end_subject)
3864               {
3865               SCHECK_PARTIAL();
3866               RRETURN(MATCH_NOMATCH);
3867               }
3868             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3869             eptr++;
3870             }
3871           }
3872         /* Control never gets here */
3873         }
3874 
3875       /* Maximize case */
3876 
3877       else
3878         {
3879         pp = eptr;
3880 
3881 #ifdef SUPPORT_UTF
3882         if (utf)
3883           {
3884           register pcre_uint32 d;
3885           for (i = min; i < max; i++)
3886             {
3887             int len = 1;
3888             if (eptr >= md->end_subject)
3889               {
3890               SCHECK_PARTIAL();
3891               break;
3892               }
3893             GETCHARLEN(d, eptr, len);
3894             if (fc == d || (unsigned int)foc == d) break;
3895             eptr += len;
3896             }
3897           if (possessive) continue;    /* No backtracking */
3898           for(;;)
3899             {
3900             if (eptr == pp) goto TAIL_RECURSE;
3901             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3902             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3903             eptr--;
3904             BACKCHAR(eptr);
3905             }
3906           }
3907         else
3908 #endif  /* SUPPORT_UTF */
3909         /* Not UTF mode */
3910           {
3911           for (i = min; i < max; i++)
3912             {
3913             if (eptr >= md->end_subject)
3914               {
3915               SCHECK_PARTIAL();
3916               break;
3917               }
3918             if (fc == *eptr || foc == *eptr) break;
3919             eptr++;
3920             }
3921           if (possessive) continue;    /* No backtracking */
3922           for (;;)
3923             {
3924             if (eptr == pp) goto TAIL_RECURSE;
3925             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3926             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3927             eptr--;
3928             }
3929           }
3930         /* Control never gets here */
3931         }
3932       }
3933 
3934     /* Caseful comparisons */
3935 
3936     else
3937       {
3938 #ifdef SUPPORT_UTF
3939       if (utf)
3940         {
3941         register pcre_uint32 d;
3942         for (i = 1; i <= min; i++)
3943           {
3944           if (eptr >= md->end_subject)
3945             {
3946             SCHECK_PARTIAL();
3947             RRETURN(MATCH_NOMATCH);
3948             }
3949           GETCHARINC(d, eptr);
3950           if (fc == d) RRETURN(MATCH_NOMATCH);
3951           }
3952         }
3953       else
3954 #endif
3955       /* Not UTF mode */
3956         {
3957         for (i = 1; i <= min; i++)
3958           {
3959           if (eptr >= md->end_subject)
3960             {
3961             SCHECK_PARTIAL();
3962             RRETURN(MATCH_NOMATCH);
3963             }
3964           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3965           }
3966         }
3967 
3968       if (min == max) continue;
3969 
3970       if (minimize)
3971         {
3972 #ifdef SUPPORT_UTF
3973         if (utf)
3974           {
3975           register pcre_uint32 d;
3976           for (fi = min;; fi++)
3977             {
3978             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3979             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3980             if (fi >= max) RRETURN(MATCH_NOMATCH);
3981             if (eptr >= md->end_subject)
3982               {
3983               SCHECK_PARTIAL();
3984               RRETURN(MATCH_NOMATCH);
3985               }
3986             GETCHARINC(d, eptr);
3987             if (fc == d) RRETURN(MATCH_NOMATCH);
3988             }
3989           }
3990         else
3991 #endif
3992         /* Not UTF mode */
3993           {
3994           for (fi = min;; fi++)
3995             {
3996             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3997             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3998             if (fi >= max) RRETURN(MATCH_NOMATCH);
3999             if (eptr >= md->end_subject)
4000               {
4001               SCHECK_PARTIAL();
4002               RRETURN(MATCH_NOMATCH);
4003               }
4004             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4005             }
4006           }
4007         /* Control never gets here */
4008         }
4009 
4010       /* Maximize case */
4011 
4012       else
4013         {
4014         pp = eptr;
4015 
4016 #ifdef SUPPORT_UTF
4017         if (utf)
4018           {
4019           register pcre_uint32 d;
4020           for (i = min; i < max; i++)
4021             {
4022             int len = 1;
4023             if (eptr >= md->end_subject)
4024               {
4025               SCHECK_PARTIAL();
4026               break;
4027               }
4028             GETCHARLEN(d, eptr, len);
4029             if (fc == d) break;
4030             eptr += len;
4031             }
4032           if (possessive) continue;    /* No backtracking */
4033           for(;;)
4034             {
4035             if (eptr == pp) goto TAIL_RECURSE;
4036             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4037             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4038             eptr--;
4039             BACKCHAR(eptr);
4040             }
4041           }
4042         else
4043 #endif
4044         /* Not UTF mode */
4045           {
4046           for (i = min; i < max; i++)
4047             {
4048             if (eptr >= md->end_subject)
4049               {
4050               SCHECK_PARTIAL();
4051               break;
4052               }
4053             if (fc == *eptr) break;
4054             eptr++;
4055             }
4056           if (possessive) continue;    /* No backtracking */
4057           for (;;)
4058             {
4059             if (eptr == pp) goto TAIL_RECURSE;
4060             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4061             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4062             eptr--;
4063             }
4064           }
4065         /* Control never gets here */
4066         }
4067       }
4068     /* Control never gets here */
4069 
4070     /* Match a single character type repeatedly; several different opcodes
4071     share code. This is very similar to the code for single characters, but we
4072     repeat it in the interests of efficiency. */
4073 
4074     case OP_TYPEEXACT:
4075     min = max = GET2(ecode, 1);
4076     minimize = TRUE;
4077     ecode += 1 + IMM2_SIZE;
4078     goto REPEATTYPE;
4079 
4080     case OP_TYPEUPTO:
4081     case OP_TYPEMINUPTO:
4082     min = 0;
4083     max = GET2(ecode, 1);
4084     minimize = *ecode == OP_TYPEMINUPTO;
4085     ecode += 1 + IMM2_SIZE;
4086     goto REPEATTYPE;
4087 
4088     case OP_TYPEPOSSTAR:
4089     possessive = TRUE;
4090     min = 0;
4091     max = INT_MAX;
4092     ecode++;
4093     goto REPEATTYPE;
4094 
4095     case OP_TYPEPOSPLUS:
4096     possessive = TRUE;
4097     min = 1;
4098     max = INT_MAX;
4099     ecode++;
4100     goto REPEATTYPE;
4101 
4102     case OP_TYPEPOSQUERY:
4103     possessive = TRUE;
4104     min = 0;
4105     max = 1;
4106     ecode++;
4107     goto REPEATTYPE;
4108 
4109     case OP_TYPEPOSUPTO:
4110     possessive = TRUE;
4111     min = 0;
4112     max = GET2(ecode, 1);
4113     ecode += 1 + IMM2_SIZE;
4114     goto REPEATTYPE;
4115 
4116     case OP_TYPESTAR:
4117     case OP_TYPEMINSTAR:
4118     case OP_TYPEPLUS:
4119     case OP_TYPEMINPLUS:
4120     case OP_TYPEQUERY:
4121     case OP_TYPEMINQUERY:
4122     c = *ecode++ - OP_TYPESTAR;
4123     minimize = (c & 1) != 0;
4124     min = rep_min[c];                 /* Pick up values from tables; */
4125     max = rep_max[c];                 /* zero for max => infinity */
4126     if (max == 0) max = INT_MAX;
4127 
4128     /* Common code for all repeated single character type matches. Note that
4129     in UTF-8 mode, '.' matches a character of any length, but for the other
4130     character types, the valid characters are all one-byte long. */
4131 
4132     REPEATTYPE:
4133     ctype = *ecode++;      /* Code for the character type */
4134 
4135 #ifdef SUPPORT_UCP
4136     if (ctype == OP_PROP || ctype == OP_NOTPROP)
4137       {
4138       prop_fail_result = ctype == OP_NOTPROP;
4139       prop_type = *ecode++;
4140       prop_value = *ecode++;
4141       }
4142     else prop_type = -1;
4143 #endif
4144 
4145     /* First, ensure the minimum number of matches are present. Use inline
4146     code for maximizing the speed, and do the type test once at the start
4147     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4148     is tidier. Also separate the UCP code, which can be the same for both UTF-8
4149     and single-bytes. */
4150 
4151     if (min > 0)
4152       {
4153 #ifdef SUPPORT_UCP
4154       if (prop_type >= 0)
4155         {
4156         switch(prop_type)
4157           {
4158           case PT_ANY:
4159           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4160           for (i = 1; i <= min; i++)
4161             {
4162             if (eptr >= md->end_subject)
4163               {
4164               SCHECK_PARTIAL();
4165               RRETURN(MATCH_NOMATCH);
4166               }
4167             GETCHARINCTEST(c, eptr);
4168             }
4169           break;
4170 
4171           case PT_LAMP:
4172           for (i = 1; i <= min; i++)
4173             {
4174             int chartype;
4175             if (eptr >= md->end_subject)
4176               {
4177               SCHECK_PARTIAL();
4178               RRETURN(MATCH_NOMATCH);
4179               }
4180             GETCHARINCTEST(c, eptr);
4181             chartype = UCD_CHARTYPE(c);
4182             if ((chartype == ucp_Lu ||
4183                  chartype == ucp_Ll ||
4184                  chartype == ucp_Lt) == prop_fail_result)
4185               RRETURN(MATCH_NOMATCH);
4186             }
4187           break;
4188 
4189           case PT_GC:
4190           for (i = 1; i <= min; i++)
4191             {
4192             if (eptr >= md->end_subject)
4193               {
4194               SCHECK_PARTIAL();
4195               RRETURN(MATCH_NOMATCH);
4196               }
4197             GETCHARINCTEST(c, eptr);
4198             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4199               RRETURN(MATCH_NOMATCH);
4200             }
4201           break;
4202 
4203           case PT_PC:
4204           for (i = 1; i <= min; i++)
4205             {
4206             if (eptr >= md->end_subject)
4207               {
4208               SCHECK_PARTIAL();
4209               RRETURN(MATCH_NOMATCH);
4210               }
4211             GETCHARINCTEST(c, eptr);
4212             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4213               RRETURN(MATCH_NOMATCH);
4214             }
4215           break;
4216 
4217           case PT_SC:
4218           for (i = 1; i <= min; i++)
4219             {
4220             if (eptr >= md->end_subject)
4221               {
4222               SCHECK_PARTIAL();
4223               RRETURN(MATCH_NOMATCH);
4224               }
4225             GETCHARINCTEST(c, eptr);
4226             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4227               RRETURN(MATCH_NOMATCH);
4228             }
4229           break;
4230 
4231           case PT_ALNUM:
4232           for (i = 1; i <= min; i++)
4233             {
4234             int category;
4235             if (eptr >= md->end_subject)
4236               {
4237               SCHECK_PARTIAL();
4238               RRETURN(MATCH_NOMATCH);
4239               }
4240             GETCHARINCTEST(c, eptr);
4241             category = UCD_CATEGORY(c);
4242             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4243               RRETURN(MATCH_NOMATCH);
4244             }
4245           break;
4246 
4247           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4248           which means that Perl space and POSIX space are now identical. PCRE
4249           was changed at release 8.34. */
4250 
4251           case PT_SPACE:    /* Perl space */
4252           case PT_PXSPACE:  /* POSIX space */
4253           for (i = 1; i <= min; i++)
4254             {
4255             if (eptr >= md->end_subject)
4256               {
4257               SCHECK_PARTIAL();
4258               RRETURN(MATCH_NOMATCH);
4259               }
4260             GETCHARINCTEST(c, eptr);
4261             switch(c)
4262               {
4263               HSPACE_CASES:
4264               VSPACE_CASES:
4265               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4266               break;
4267 
4268               default:
4269               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
4270                 RRETURN(MATCH_NOMATCH);
4271               break;
4272               }
4273             }
4274           break;
4275 
4276           case PT_WORD:
4277           for (i = 1; i <= min; i++)
4278             {
4279             int category;
4280             if (eptr >= md->end_subject)
4281               {
4282               SCHECK_PARTIAL();
4283               RRETURN(MATCH_NOMATCH);
4284               }
4285             GETCHARINCTEST(c, eptr);
4286             category = UCD_CATEGORY(c);
4287             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4288                    == prop_fail_result)
4289               RRETURN(MATCH_NOMATCH);
4290             }
4291           break;
4292 
4293           case PT_CLIST:
4294           for (i = 1; i <= min; i++)
4295             {
4296             const pcre_uint32 *cp;
4297             if (eptr >= md->end_subject)
4298               {
4299               SCHECK_PARTIAL();
4300               RRETURN(MATCH_NOMATCH);
4301               }
4302             GETCHARINCTEST(c, eptr);
4303             cp = PRIV(ucd_caseless_sets) + prop_value;
4304             for (;;)
4305               {
4306               if (c < *cp)
4307                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4308               if (c == *cp++)
4309                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4310               }
4311             }
4312           break;
4313 
4314           case PT_UCNC:
4315           for (i = 1; i <= min; i++)
4316             {
4317             if (eptr >= md->end_subject)
4318               {
4319               SCHECK_PARTIAL();
4320               RRETURN(MATCH_NOMATCH);
4321               }
4322             GETCHARINCTEST(c, eptr);
4323             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4324                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4325                  c >= 0xe000) == prop_fail_result)
4326               RRETURN(MATCH_NOMATCH);
4327             }
4328           break;
4329 
4330           /* This should not occur */
4331 
4332           default:
4333           RRETURN(PCRE_ERROR_INTERNAL);
4334           }
4335         }
4336 
4337       /* Match extended Unicode sequences. We will get here only if the
4338       support is in the binary; otherwise a compile-time error occurs. */
4339 
4340       else if (ctype == OP_EXTUNI)
4341         {
4342         for (i = 1; i <= min; i++)
4343           {
4344           if (eptr >= md->end_subject)
4345             {
4346             SCHECK_PARTIAL();
4347             RRETURN(MATCH_NOMATCH);
4348             }
4349           else
4350             {
4351             int lgb, rgb;
4352             GETCHARINCTEST(c, eptr);
4353             lgb = UCD_GRAPHBREAK(c);
4354            while (eptr < md->end_subject)
4355               {
4356               int len = 1;
4357               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4358               rgb = UCD_GRAPHBREAK(c);
4359               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4360               lgb = rgb;
4361               eptr += len;
4362               }
4363             }
4364           CHECK_PARTIAL();
4365           }
4366         }
4367 
4368       else
4369 #endif     /* SUPPORT_UCP */
4370 
4371 /* Handle all other cases when the coding is UTF-8 */
4372 
4373 #ifdef SUPPORT_UTF
4374       if (utf) switch(ctype)
4375         {
4376         case OP_ANY:
4377         for (i = 1; i <= min; i++)
4378           {
4379           if (eptr >= md->end_subject)
4380             {
4381             SCHECK_PARTIAL();
4382             RRETURN(MATCH_NOMATCH);
4383             }
4384           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4385           if (md->partial != 0 &&
4386               eptr + 1 >= md->end_subject &&
4387               NLBLOCK->nltype == NLTYPE_FIXED &&
4388               NLBLOCK->nllen == 2 &&
4389               UCHAR21(eptr) == NLBLOCK->nl[0])
4390             {
4391             md->hitend = TRUE;
4392             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4393             }
4394           eptr++;
4395           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4396           }
4397         break;
4398 
4399         case OP_ALLANY:
4400         for (i = 1; i <= min; i++)
4401           {
4402           if (eptr >= md->end_subject)
4403             {
4404             SCHECK_PARTIAL();
4405             RRETURN(MATCH_NOMATCH);
4406             }
4407           eptr++;
4408           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4409           }
4410         break;
4411 
4412         case OP_ANYBYTE:
4413         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4414         eptr += min;
4415         break;
4416 
4417         case OP_ANYNL:
4418         for (i = 1; i <= min; i++)
4419           {
4420           if (eptr >= md->end_subject)
4421             {
4422             SCHECK_PARTIAL();
4423             RRETURN(MATCH_NOMATCH);
4424             }
4425           GETCHARINC(c, eptr);
4426           switch(c)
4427             {
4428             default: RRETURN(MATCH_NOMATCH);
4429 
4430             case CHAR_CR:
4431             if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
4432             break;
4433 
4434             case CHAR_LF:
4435             break;
4436 
4437             case CHAR_VT:
4438             case CHAR_FF:
4439             case CHAR_NEL:
4440 #ifndef EBCDIC
4441             case 0x2028:
4442             case 0x2029:
4443 #endif  /* Not EBCDIC */
4444             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4445             break;
4446             }
4447           }
4448         break;
4449 
4450         case OP_NOT_HSPACE:
4451         for (i = 1; i <= min; i++)
4452           {
4453           if (eptr >= md->end_subject)
4454             {
4455             SCHECK_PARTIAL();
4456             RRETURN(MATCH_NOMATCH);
4457             }
4458           GETCHARINC(c, eptr);
4459           switch(c)
4460             {
4461             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4462             default: break;
4463             }
4464           }
4465         break;
4466 
4467         case OP_HSPACE:
4468         for (i = 1; i <= min; i++)
4469           {
4470           if (eptr >= md->end_subject)
4471             {
4472             SCHECK_PARTIAL();
4473             RRETURN(MATCH_NOMATCH);
4474             }
4475           GETCHARINC(c, eptr);
4476           switch(c)
4477             {
4478             HSPACE_CASES: break;  /* Byte and multibyte cases */
4479             default: RRETURN(MATCH_NOMATCH);
4480             }
4481           }
4482         break;
4483 
4484         case OP_NOT_VSPACE:
4485         for (i = 1; i <= min; i++)
4486           {
4487           if (eptr >= md->end_subject)
4488             {
4489             SCHECK_PARTIAL();
4490             RRETURN(MATCH_NOMATCH);
4491             }
4492           GETCHARINC(c, eptr);
4493           switch(c)
4494             {
4495             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4496             default: break;
4497             }
4498           }
4499         break;
4500 
4501         case OP_VSPACE:
4502         for (i = 1; i <= min; i++)
4503           {
4504           if (eptr >= md->end_subject)
4505             {
4506             SCHECK_PARTIAL();
4507             RRETURN(MATCH_NOMATCH);
4508             }
4509           GETCHARINC(c, eptr);
4510           switch(c)
4511             {
4512             VSPACE_CASES: break;
4513             default: RRETURN(MATCH_NOMATCH);
4514             }
4515           }
4516         break;
4517 
4518         case OP_NOT_DIGIT:
4519         for (i = 1; i <= min; i++)
4520           {
4521           if (eptr >= md->end_subject)
4522             {
4523             SCHECK_PARTIAL();
4524             RRETURN(MATCH_NOMATCH);
4525             }
4526           GETCHARINC(c, eptr);
4527           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4528             RRETURN(MATCH_NOMATCH);
4529           }
4530         break;
4531 
4532         case OP_DIGIT:
4533         for (i = 1; i <= min; i++)
4534           {
4535           pcre_uint32 cc;
4536           if (eptr >= md->end_subject)
4537             {
4538             SCHECK_PARTIAL();
4539             RRETURN(MATCH_NOMATCH);
4540             }
4541           cc = UCHAR21(eptr);
4542           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4543             RRETURN(MATCH_NOMATCH);
4544           eptr++;
4545           /* No need to skip more bytes - we know it's a 1-byte character */
4546           }
4547         break;
4548 
4549         case OP_NOT_WHITESPACE:
4550         for (i = 1; i <= min; i++)
4551           {
4552           pcre_uint32 cc;
4553           if (eptr >= md->end_subject)
4554             {
4555             SCHECK_PARTIAL();
4556             RRETURN(MATCH_NOMATCH);
4557             }
4558           cc = UCHAR21(eptr);
4559           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
4560             RRETURN(MATCH_NOMATCH);
4561           eptr++;
4562           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4563           }
4564         break;
4565 
4566         case OP_WHITESPACE:
4567         for (i = 1; i <= min; i++)
4568           {
4569           pcre_uint32 cc;
4570           if (eptr >= md->end_subject)
4571             {
4572             SCHECK_PARTIAL();
4573             RRETURN(MATCH_NOMATCH);
4574             }
4575           cc = UCHAR21(eptr);
4576           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
4577             RRETURN(MATCH_NOMATCH);
4578           eptr++;
4579           /* No need to skip more bytes - we know it's a 1-byte character */
4580           }
4581         break;
4582 
4583         case OP_NOT_WORDCHAR:
4584         for (i = 1; i <= min; i++)
4585           {
4586           pcre_uint32 cc;
4587           if (eptr >= md->end_subject)
4588             {
4589             SCHECK_PARTIAL();
4590             RRETURN(MATCH_NOMATCH);
4591             }
4592           cc = UCHAR21(eptr);
4593           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
4594             RRETURN(MATCH_NOMATCH);
4595           eptr++;
4596           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4597           }
4598         break;
4599 
4600         case OP_WORDCHAR:
4601         for (i = 1; i <= min; i++)
4602           {
4603           pcre_uint32 cc;
4604           if (eptr >= md->end_subject)
4605             {
4606             SCHECK_PARTIAL();
4607             RRETURN(MATCH_NOMATCH);
4608             }
4609           cc = UCHAR21(eptr);
4610           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
4611             RRETURN(MATCH_NOMATCH);
4612           eptr++;
4613           /* No need to skip more bytes - we know it's a 1-byte character */
4614           }
4615         break;
4616 
4617         default:
4618         RRETURN(PCRE_ERROR_INTERNAL);
4619         }  /* End switch(ctype) */
4620 
4621       else
4622 #endif     /* SUPPORT_UTF */
4623 
4624       /* Code for the non-UTF-8 case for minimum matching of operators other
4625       than OP_PROP and OP_NOTPROP. */
4626 
4627       switch(ctype)
4628         {
4629         case OP_ANY:
4630         for (i = 1; i <= min; i++)
4631           {
4632           if (eptr >= md->end_subject)
4633             {
4634             SCHECK_PARTIAL();
4635             RRETURN(MATCH_NOMATCH);
4636             }
4637           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4638           if (md->partial != 0 &&
4639               eptr + 1 >= md->end_subject &&
4640               NLBLOCK->nltype == NLTYPE_FIXED &&
4641               NLBLOCK->nllen == 2 &&
4642               *eptr == NLBLOCK->nl[0])
4643             {
4644             md->hitend = TRUE;
4645             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4646             }
4647           eptr++;
4648           }
4649         break;
4650 
4651         case OP_ALLANY:
4652         if (eptr > md->end_subject - min)
4653           {
4654           SCHECK_PARTIAL();
4655           RRETURN(MATCH_NOMATCH);
4656           }
4657         eptr += min;
4658         break;
4659 
4660         case OP_ANYBYTE:
4661         if (eptr > md->end_subject - min)
4662           {
4663           SCHECK_PARTIAL();
4664           RRETURN(MATCH_NOMATCH);
4665           }
4666         eptr += min;
4667         break;
4668 
4669         case OP_ANYNL:
4670         for (i = 1; i <= min; i++)
4671           {
4672           if (eptr >= md->end_subject)
4673             {
4674             SCHECK_PARTIAL();
4675             RRETURN(MATCH_NOMATCH);
4676             }
4677           switch(*eptr++)
4678             {
4679             default: RRETURN(MATCH_NOMATCH);
4680 
4681             case CHAR_CR:
4682             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4683             break;
4684 
4685             case CHAR_LF:
4686             break;
4687 
4688             case CHAR_VT:
4689             case CHAR_FF:
4690             case CHAR_NEL:
4691 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4692             case 0x2028:
4693             case 0x2029:
4694 #endif
4695             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4696             break;
4697             }
4698           }
4699         break;
4700 
4701         case OP_NOT_HSPACE:
4702         for (i = 1; i <= min; i++)
4703           {
4704           if (eptr >= md->end_subject)
4705             {
4706             SCHECK_PARTIAL();
4707             RRETURN(MATCH_NOMATCH);
4708             }
4709           switch(*eptr++)
4710             {
4711             default: break;
4712             HSPACE_BYTE_CASES:
4713 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4714             HSPACE_MULTIBYTE_CASES:
4715 #endif
4716             RRETURN(MATCH_NOMATCH);
4717             }
4718           }
4719         break;
4720 
4721         case OP_HSPACE:
4722         for (i = 1; i <= min; i++)
4723           {
4724           if (eptr >= md->end_subject)
4725             {
4726             SCHECK_PARTIAL();
4727             RRETURN(MATCH_NOMATCH);
4728             }
4729           switch(*eptr++)
4730             {
4731             default: RRETURN(MATCH_NOMATCH);
4732             HSPACE_BYTE_CASES:
4733 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4734             HSPACE_MULTIBYTE_CASES:
4735 #endif
4736             break;
4737             }
4738           }
4739         break;
4740 
4741         case OP_NOT_VSPACE:
4742         for (i = 1; i <= min; i++)
4743           {
4744           if (eptr >= md->end_subject)
4745             {
4746             SCHECK_PARTIAL();
4747             RRETURN(MATCH_NOMATCH);
4748             }
4749           switch(*eptr++)
4750             {
4751             VSPACE_BYTE_CASES:
4752 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4753             VSPACE_MULTIBYTE_CASES:
4754 #endif
4755             RRETURN(MATCH_NOMATCH);
4756             default: break;
4757             }
4758           }
4759         break;
4760 
4761         case OP_VSPACE:
4762         for (i = 1; i <= min; i++)
4763           {
4764           if (eptr >= md->end_subject)
4765             {
4766             SCHECK_PARTIAL();
4767             RRETURN(MATCH_NOMATCH);
4768             }
4769           switch(*eptr++)
4770             {
4771             default: RRETURN(MATCH_NOMATCH);
4772             VSPACE_BYTE_CASES:
4773 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4774             VSPACE_MULTIBYTE_CASES:
4775 #endif
4776             break;
4777             }
4778           }
4779         break;
4780 
4781         case OP_NOT_DIGIT:
4782         for (i = 1; i <= min; i++)
4783           {
4784           if (eptr >= md->end_subject)
4785             {
4786             SCHECK_PARTIAL();
4787             RRETURN(MATCH_NOMATCH);
4788             }
4789           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4790             RRETURN(MATCH_NOMATCH);
4791           eptr++;
4792           }
4793         break;
4794 
4795         case OP_DIGIT:
4796         for (i = 1; i <= min; i++)
4797           {
4798           if (eptr >= md->end_subject)
4799             {
4800             SCHECK_PARTIAL();
4801             RRETURN(MATCH_NOMATCH);
4802             }
4803           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4804             RRETURN(MATCH_NOMATCH);
4805           eptr++;
4806           }
4807         break;
4808 
4809         case OP_NOT_WHITESPACE:
4810         for (i = 1; i <= min; i++)
4811           {
4812           if (eptr >= md->end_subject)
4813             {
4814             SCHECK_PARTIAL();
4815             RRETURN(MATCH_NOMATCH);
4816             }
4817           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4818             RRETURN(MATCH_NOMATCH);
4819           eptr++;
4820           }
4821         break;
4822 
4823         case OP_WHITESPACE:
4824         for (i = 1; i <= min; i++)
4825           {
4826           if (eptr >= md->end_subject)
4827             {
4828             SCHECK_PARTIAL();
4829             RRETURN(MATCH_NOMATCH);
4830             }
4831           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4832             RRETURN(MATCH_NOMATCH);
4833           eptr++;
4834           }
4835         break;
4836 
4837         case OP_NOT_WORDCHAR:
4838         for (i = 1; i <= min; i++)
4839           {
4840           if (eptr >= md->end_subject)
4841             {
4842             SCHECK_PARTIAL();
4843             RRETURN(MATCH_NOMATCH);
4844             }
4845           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4846             RRETURN(MATCH_NOMATCH);
4847           eptr++;
4848           }
4849         break;
4850 
4851         case OP_WORDCHAR:
4852         for (i = 1; i <= min; i++)
4853           {
4854           if (eptr >= md->end_subject)
4855             {
4856             SCHECK_PARTIAL();
4857             RRETURN(MATCH_NOMATCH);
4858             }
4859           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4860             RRETURN(MATCH_NOMATCH);
4861           eptr++;
4862           }
4863         break;
4864 
4865         default:
4866         RRETURN(PCRE_ERROR_INTERNAL);
4867         }
4868       }
4869 
4870     /* If min = max, continue at the same level without recursing */
4871 
4872     if (min == max) continue;
4873 
4874     /* If minimizing, we have to test the rest of the pattern before each
4875     subsequent match. Again, separate the UTF-8 case for speed, and also
4876     separate the UCP cases. */
4877 
4878     if (minimize)
4879       {
4880 #ifdef SUPPORT_UCP
4881       if (prop_type >= 0)
4882         {
4883         switch(prop_type)
4884           {
4885           case PT_ANY:
4886           for (fi = min;; fi++)
4887             {
4888             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4889             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4890             if (fi >= max) RRETURN(MATCH_NOMATCH);
4891             if (eptr >= md->end_subject)
4892               {
4893               SCHECK_PARTIAL();
4894               RRETURN(MATCH_NOMATCH);
4895               }
4896             GETCHARINCTEST(c, eptr);
4897             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4898             }
4899           /* Control never gets here */
4900 
4901           case PT_LAMP:
4902           for (fi = min;; fi++)
4903             {
4904             int chartype;
4905             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4906             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4907             if (fi >= max) RRETURN(MATCH_NOMATCH);
4908             if (eptr >= md->end_subject)
4909               {
4910               SCHECK_PARTIAL();
4911               RRETURN(MATCH_NOMATCH);
4912               }
4913             GETCHARINCTEST(c, eptr);
4914             chartype = UCD_CHARTYPE(c);
4915             if ((chartype == ucp_Lu ||
4916                  chartype == ucp_Ll ||
4917                  chartype == ucp_Lt) == prop_fail_result)
4918               RRETURN(MATCH_NOMATCH);
4919             }
4920           /* Control never gets here */
4921 
4922           case PT_GC:
4923           for (fi = min;; fi++)
4924             {
4925             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4926             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4927             if (fi >= max) RRETURN(MATCH_NOMATCH);
4928             if (eptr >= md->end_subject)
4929               {
4930               SCHECK_PARTIAL();
4931               RRETURN(MATCH_NOMATCH);
4932               }
4933             GETCHARINCTEST(c, eptr);
4934             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4935               RRETURN(MATCH_NOMATCH);
4936             }
4937           /* Control never gets here */
4938 
4939           case PT_PC:
4940           for (fi = min;; fi++)
4941             {
4942             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4943             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4944             if (fi >= max) RRETURN(MATCH_NOMATCH);
4945             if (eptr >= md->end_subject)
4946               {
4947               SCHECK_PARTIAL();
4948               RRETURN(MATCH_NOMATCH);
4949               }
4950             GETCHARINCTEST(c, eptr);
4951             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4952               RRETURN(MATCH_NOMATCH);
4953             }
4954           /* Control never gets here */
4955 
4956           case PT_SC:
4957           for (fi = min;; fi++)
4958             {
4959             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4960             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4961             if (fi >= max) RRETURN(MATCH_NOMATCH);
4962             if (eptr >= md->end_subject)
4963               {
4964               SCHECK_PARTIAL();
4965               RRETURN(MATCH_NOMATCH);
4966               }
4967             GETCHARINCTEST(c, eptr);
4968             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4969               RRETURN(MATCH_NOMATCH);
4970             }
4971           /* Control never gets here */
4972 
4973           case PT_ALNUM:
4974           for (fi = min;; fi++)
4975             {
4976             int category;
4977             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4978             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4979             if (fi >= max) RRETURN(MATCH_NOMATCH);
4980             if (eptr >= md->end_subject)
4981               {
4982               SCHECK_PARTIAL();
4983               RRETURN(MATCH_NOMATCH);
4984               }
4985             GETCHARINCTEST(c, eptr);
4986             category = UCD_CATEGORY(c);
4987             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4988               RRETURN(MATCH_NOMATCH);
4989             }
4990           /* Control never gets here */
4991 
4992           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4993           which means that Perl space and POSIX space are now identical. PCRE
4994           was changed at release 8.34. */
4995 
4996           case PT_SPACE:    /* Perl space */
4997           case PT_PXSPACE:  /* POSIX space */
4998           for (fi = min;; fi++)
4999             {
5000             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
5001             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5002             if (fi >= max) RRETURN(MATCH_NOMATCH);
5003             if (eptr >= md->end_subject)
5004               {
5005               SCHECK_PARTIAL();
5006               RRETURN(MATCH_NOMATCH);
5007               }
5008             GETCHARINCTEST(c, eptr);
5009             switch(c)
5010               {
5011               HSPACE_CASES:
5012               VSPACE_CASES:
5013               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
5014               break;
5015 
5016               default:
5017               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5018                 RRETURN(MATCH_NOMATCH);
5019               break;
5020               }
5021             }
5022           /* Control never gets here */
5023 
5024           case PT_WORD:
5025           for (fi = min;; fi++)
5026             {
5027             int category;
5028             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
5029             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5030             if (fi >= max) RRETURN(MATCH_NOMATCH);
5031             if (eptr >= md->end_subject)
5032               {
5033               SCHECK_PARTIAL();
5034               RRETURN(MATCH_NOMATCH);
5035               }
5036             GETCHARINCTEST(c, eptr);
5037             category = UCD_CATEGORY(c);
5038             if ((category == ucp_L ||
5039                  category == ucp_N ||
5040                  c == CHAR_UNDERSCORE)
5041                    == prop_fail_result)
5042               RRETURN(MATCH_NOMATCH);
5043             }
5044           /* Control never gets here */
5045 
5046           case PT_CLIST:
5047           for (fi = min;; fi++)
5048             {
5049             const pcre_uint32 *cp;
5050             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5051             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5052             if (fi >= max) RRETURN(MATCH_NOMATCH);
5053             if (eptr >= md->end_subject)
5054               {
5055               SCHECK_PARTIAL();
5056               RRETURN(MATCH_NOMATCH);
5057               }
5058             GETCHARINCTEST(c, eptr);
5059             cp = PRIV(ucd_caseless_sets) + prop_value;
5060             for (;;)
5061               {
5062               if (c < *cp)
5063                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
5064               if (c == *cp++)
5065                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
5066               }
5067             }
5068           /* Control never gets here */
5069 
5070           case PT_UCNC:
5071           for (fi = min;; fi++)
5072             {
5073             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
5074             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5075             if (fi >= max) RRETURN(MATCH_NOMATCH);
5076             if (eptr >= md->end_subject)
5077               {
5078               SCHECK_PARTIAL();
5079               RRETURN(MATCH_NOMATCH);
5080               }
5081             GETCHARINCTEST(c, eptr);
5082             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5083                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5084                  c >= 0xe000) == prop_fail_result)
5085               RRETURN(MATCH_NOMATCH);
5086             }
5087           /* Control never gets here */
5088 
5089           /* This should never occur */
5090           default:
5091           RRETURN(PCRE_ERROR_INTERNAL);
5092           }
5093         }
5094 
5095       /* Match extended Unicode sequences. We will get here only if the
5096       support is in the binary; otherwise a compile-time error occurs. */
5097 
5098       else if (ctype == OP_EXTUNI)
5099         {
5100         for (fi = min;; fi++)
5101           {
5102           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5103           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5104           if (fi >= max) RRETURN(MATCH_NOMATCH);
5105           if (eptr >= md->end_subject)
5106             {
5107             SCHECK_PARTIAL();
5108             RRETURN(MATCH_NOMATCH);
5109             }
5110           else
5111             {
5112             int lgb, rgb;
5113             GETCHARINCTEST(c, eptr);
5114             lgb = UCD_GRAPHBREAK(c);
5115             while (eptr < md->end_subject)
5116               {
5117               int len = 1;
5118               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5119               rgb = UCD_GRAPHBREAK(c);
5120               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5121               lgb = rgb;
5122               eptr += len;
5123               }
5124             }
5125           CHECK_PARTIAL();
5126           }
5127         }
5128       else
5129 #endif     /* SUPPORT_UCP */
5130 
5131 #ifdef SUPPORT_UTF
5132       if (utf)
5133         {
5134         for (fi = min;; fi++)
5135           {
5136           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5137           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5138           if (fi >= max) RRETURN(MATCH_NOMATCH);
5139           if (eptr >= md->end_subject)
5140             {
5141             SCHECK_PARTIAL();
5142             RRETURN(MATCH_NOMATCH);
5143             }
5144           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5145             RRETURN(MATCH_NOMATCH);
5146           GETCHARINC(c, eptr);
5147           switch(ctype)
5148             {
5149             case OP_ANY:               /* This is the non-NL case */
5150             if (md->partial != 0 &&    /* Take care with CRLF partial */
5151                 eptr >= md->end_subject &&
5152                 NLBLOCK->nltype == NLTYPE_FIXED &&
5153                 NLBLOCK->nllen == 2 &&
5154                 c == NLBLOCK->nl[0])
5155               {
5156               md->hitend = TRUE;
5157               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5158               }
5159             break;
5160 
5161             case OP_ALLANY:
5162             case OP_ANYBYTE:
5163             break;
5164 
5165             case OP_ANYNL:
5166             switch(c)
5167               {
5168               default: RRETURN(MATCH_NOMATCH);
5169               case CHAR_CR:
5170               if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
5171               break;
5172 
5173               case CHAR_LF:
5174               break;
5175 
5176               case CHAR_VT:
5177               case CHAR_FF:
5178               case CHAR_NEL:
5179 #ifndef EBCDIC
5180               case 0x2028:
5181               case 0x2029:
5182 #endif  /* Not EBCDIC */
5183               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5184               break;
5185               }
5186             break;
5187 
5188             case OP_NOT_HSPACE:
5189             switch(c)
5190               {
5191               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5192               default: break;
5193               }
5194             break;
5195 
5196             case OP_HSPACE:
5197             switch(c)
5198               {
5199               HSPACE_CASES: break;
5200               default: RRETURN(MATCH_NOMATCH);
5201               }
5202             break;
5203 
5204             case OP_NOT_VSPACE:
5205             switch(c)
5206               {
5207               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5208               default: break;
5209               }
5210             break;
5211 
5212             case OP_VSPACE:
5213             switch(c)
5214               {
5215               VSPACE_CASES: break;
5216               default: RRETURN(MATCH_NOMATCH);
5217               }
5218             break;
5219 
5220             case OP_NOT_DIGIT:
5221             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5222               RRETURN(MATCH_NOMATCH);
5223             break;
5224 
5225             case OP_DIGIT:
5226             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5227               RRETURN(MATCH_NOMATCH);
5228             break;
5229 
5230             case OP_NOT_WHITESPACE:
5231             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5232               RRETURN(MATCH_NOMATCH);
5233             break;
5234 
5235             case OP_WHITESPACE:
5236             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5237               RRETURN(MATCH_NOMATCH);
5238             break;
5239 
5240             case OP_NOT_WORDCHAR:
5241             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5242               RRETURN(MATCH_NOMATCH);
5243             break;
5244 
5245             case OP_WORDCHAR:
5246             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5247               RRETURN(MATCH_NOMATCH);
5248             break;
5249 
5250             default:
5251             RRETURN(PCRE_ERROR_INTERNAL);
5252             }
5253           }
5254         }
5255       else
5256 #endif
5257       /* Not UTF mode */
5258         {
5259         for (fi = min;; fi++)
5260           {
5261           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5262           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5263           if (fi >= max) RRETURN(MATCH_NOMATCH);
5264           if (eptr >= md->end_subject)
5265             {
5266             SCHECK_PARTIAL();
5267             RRETURN(MATCH_NOMATCH);
5268             }
5269           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5270             RRETURN(MATCH_NOMATCH);
5271           c = *eptr++;
5272           switch(ctype)
5273             {
5274             case OP_ANY:               /* This is the non-NL case */
5275             if (md->partial != 0 &&    /* Take care with CRLF partial */
5276                 eptr >= md->end_subject &&
5277                 NLBLOCK->nltype == NLTYPE_FIXED &&
5278                 NLBLOCK->nllen == 2 &&
5279                 c == NLBLOCK->nl[0])
5280               {
5281               md->hitend = TRUE;
5282               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5283               }
5284             break;
5285 
5286             case OP_ALLANY:
5287             case OP_ANYBYTE:
5288             break;
5289 
5290             case OP_ANYNL:
5291             switch(c)
5292               {
5293               default: RRETURN(MATCH_NOMATCH);
5294               case CHAR_CR:
5295               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5296               break;
5297 
5298               case CHAR_LF:
5299               break;
5300 
5301               case CHAR_VT:
5302               case CHAR_FF:
5303               case CHAR_NEL:
5304 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5305               case 0x2028:
5306               case 0x2029:
5307 #endif
5308               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5309               break;
5310               }
5311             break;
5312 
5313             case OP_NOT_HSPACE:
5314             switch(c)
5315               {
5316               default: break;
5317               HSPACE_BYTE_CASES:
5318 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5319               HSPACE_MULTIBYTE_CASES:
5320 #endif
5321               RRETURN(MATCH_NOMATCH);
5322               }
5323             break;
5324 
5325             case OP_HSPACE:
5326             switch(c)
5327               {
5328               default: RRETURN(MATCH_NOMATCH);
5329               HSPACE_BYTE_CASES:
5330 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5331               HSPACE_MULTIBYTE_CASES:
5332 #endif
5333               break;
5334               }
5335             break;
5336 
5337             case OP_NOT_VSPACE:
5338             switch(c)
5339               {
5340               default: break;
5341               VSPACE_BYTE_CASES:
5342 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5343               VSPACE_MULTIBYTE_CASES:
5344 #endif
5345               RRETURN(MATCH_NOMATCH);
5346               }
5347             break;
5348 
5349             case OP_VSPACE:
5350             switch(c)
5351               {
5352               default: RRETURN(MATCH_NOMATCH);
5353               VSPACE_BYTE_CASES:
5354 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5355               VSPACE_MULTIBYTE_CASES:
5356 #endif
5357               break;
5358               }
5359             break;
5360 
5361             case OP_NOT_DIGIT:
5362             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5363             break;
5364 
5365             case OP_DIGIT:
5366             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5367             break;
5368 
5369             case OP_NOT_WHITESPACE:
5370             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5371             break;
5372 
5373             case OP_WHITESPACE:
5374             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5375             break;
5376 
5377             case OP_NOT_WORDCHAR:
5378             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5379             break;
5380 
5381             case OP_WORDCHAR:
5382             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5383             break;
5384 
5385             default:
5386             RRETURN(PCRE_ERROR_INTERNAL);
5387             }
5388           }
5389         }
5390       /* Control never gets here */
5391       }
5392 
5393     /* If maximizing, it is worth using inline code for speed, doing the type
5394     test once at the start (i.e. keep it out of the loop). Again, keep the
5395     UTF-8 and UCP stuff separate. */
5396 
5397     else
5398       {
5399       pp = eptr;  /* Remember where we started */
5400 
5401 #ifdef SUPPORT_UCP
5402       if (prop_type >= 0)
5403         {
5404         switch(prop_type)
5405           {
5406           case PT_ANY:
5407           for (i = min; i < max; i++)
5408             {
5409             int len = 1;
5410             if (eptr >= md->end_subject)
5411               {
5412               SCHECK_PARTIAL();
5413               break;
5414               }
5415             GETCHARLENTEST(c, eptr, len);
5416             if (prop_fail_result) break;
5417             eptr+= len;
5418             }
5419           break;
5420 
5421           case PT_LAMP:
5422           for (i = min; i < max; i++)
5423             {
5424             int chartype;
5425             int len = 1;
5426             if (eptr >= md->end_subject)
5427               {
5428               SCHECK_PARTIAL();
5429               break;
5430               }
5431             GETCHARLENTEST(c, eptr, len);
5432             chartype = UCD_CHARTYPE(c);
5433             if ((chartype == ucp_Lu ||
5434                  chartype == ucp_Ll ||
5435                  chartype == ucp_Lt) == prop_fail_result)
5436               break;
5437             eptr+= len;
5438             }
5439           break;
5440 
5441           case PT_GC:
5442           for (i = min; i < max; i++)
5443             {
5444             int len = 1;
5445             if (eptr >= md->end_subject)
5446               {
5447               SCHECK_PARTIAL();
5448               break;
5449               }
5450             GETCHARLENTEST(c, eptr, len);
5451             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5452             eptr+= len;
5453             }
5454           break;
5455 
5456           case PT_PC:
5457           for (i = min; i < max; i++)
5458             {
5459             int len = 1;
5460             if (eptr >= md->end_subject)
5461               {
5462               SCHECK_PARTIAL();
5463               break;
5464               }
5465             GETCHARLENTEST(c, eptr, len);
5466             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5467             eptr+= len;
5468             }
5469           break;
5470 
5471           case PT_SC:
5472           for (i = min; i < max; i++)
5473             {
5474             int len = 1;
5475             if (eptr >= md->end_subject)
5476               {
5477               SCHECK_PARTIAL();
5478               break;
5479               }
5480             GETCHARLENTEST(c, eptr, len);
5481             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5482             eptr+= len;
5483             }
5484           break;
5485 
5486           case PT_ALNUM:
5487           for (i = min; i < max; i++)
5488             {
5489             int category;
5490             int len = 1;
5491             if (eptr >= md->end_subject)
5492               {
5493               SCHECK_PARTIAL();
5494               break;
5495               }
5496             GETCHARLENTEST(c, eptr, len);
5497             category = UCD_CATEGORY(c);
5498             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5499               break;
5500             eptr+= len;
5501             }
5502           break;
5503 
5504           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5505           which means that Perl space and POSIX space are now identical. PCRE
5506           was changed at release 8.34. */
5507 
5508           case PT_SPACE:    /* Perl space */
5509           case PT_PXSPACE:  /* POSIX space */
5510           for (i = min; i < max; i++)
5511             {
5512             int len = 1;
5513             if (eptr >= md->end_subject)
5514               {
5515               SCHECK_PARTIAL();
5516               break;
5517               }
5518             GETCHARLENTEST(c, eptr, len);
5519             switch(c)
5520               {
5521               HSPACE_CASES:
5522               VSPACE_CASES:
5523               if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
5524               break;
5525 
5526               default:
5527               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5528                 goto ENDLOOP99;   /* Break the loop */
5529               break;
5530               }
5531             eptr+= len;
5532             }
5533           ENDLOOP99:
5534           break;
5535 
5536           case PT_WORD:
5537           for (i = min; i < max; i++)
5538             {
5539             int category;
5540             int len = 1;
5541             if (eptr >= md->end_subject)
5542               {
5543               SCHECK_PARTIAL();
5544               break;
5545               }
5546             GETCHARLENTEST(c, eptr, len);
5547             category = UCD_CATEGORY(c);
5548             if ((category == ucp_L || category == ucp_N ||
5549                  c == CHAR_UNDERSCORE) == prop_fail_result)
5550               break;
5551             eptr+= len;
5552             }
5553           break;
5554 
5555           case PT_CLIST:
5556           for (i = min; i < max; i++)
5557             {
5558             const pcre_uint32 *cp;
5559             int len = 1;
5560             if (eptr >= md->end_subject)
5561               {
5562               SCHECK_PARTIAL();
5563               break;
5564               }
5565             GETCHARLENTEST(c, eptr, len);
5566             cp = PRIV(ucd_caseless_sets) + prop_value;
5567             for (;;)
5568               {
5569               if (c < *cp)
5570                 { if (prop_fail_result) break; else goto GOT_MAX; }
5571               if (c == *cp++)
5572                 { if (prop_fail_result) goto GOT_MAX; else break; }
5573               }
5574             eptr += len;
5575             }
5576           GOT_MAX:
5577           break;
5578 
5579           case PT_UCNC:
5580           for (i = min; i < max; i++)
5581             {
5582             int len = 1;
5583             if (eptr >= md->end_subject)
5584               {
5585               SCHECK_PARTIAL();
5586               break;
5587               }
5588             GETCHARLENTEST(c, eptr, len);
5589             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5590                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5591                  c >= 0xe000) == prop_fail_result)
5592               break;
5593             eptr += len;
5594             }
5595           break;
5596 
5597           default:
5598           RRETURN(PCRE_ERROR_INTERNAL);
5599           }
5600 
5601         /* eptr is now past the end of the maximum run */
5602 
5603         if (possessive) continue;    /* No backtracking */
5604         for(;;)
5605           {
5606           if (eptr == pp) goto TAIL_RECURSE;
5607           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5608           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5609           eptr--;
5610           if (utf) BACKCHAR(eptr);
5611           }
5612         }
5613 
5614       /* Match extended Unicode grapheme clusters. We will get here only if the
5615       support is in the binary; otherwise a compile-time error occurs. */
5616 
5617       else if (ctype == OP_EXTUNI)
5618         {
5619         for (i = min; i < max; i++)
5620           {
5621           if (eptr >= md->end_subject)
5622             {
5623             SCHECK_PARTIAL();
5624             break;
5625             }
5626           else
5627             {
5628             int lgb, rgb;
5629             GETCHARINCTEST(c, eptr);
5630             lgb = UCD_GRAPHBREAK(c);
5631             while (eptr < md->end_subject)
5632               {
5633               int len = 1;
5634               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5635               rgb = UCD_GRAPHBREAK(c);
5636               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5637               lgb = rgb;
5638               eptr += len;
5639               }
5640             }
5641           CHECK_PARTIAL();
5642           }
5643 
5644         /* eptr is now past the end of the maximum run */
5645 
5646         if (possessive) continue;    /* No backtracking */
5647 
5648         for(;;)
5649           {
5650           int lgb, rgb;
5651           PCRE_PUCHAR fptr;
5652 
5653           if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
5654           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5655           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5656 
5657           /* Backtracking over an extended grapheme cluster involves inspecting
5658           the previous two characters (if present) to see if a break is
5659           permitted between them. */
5660 
5661           eptr--;
5662           if (!utf) c = *eptr; else
5663             {
5664             BACKCHAR(eptr);
5665             GETCHAR(c, eptr);
5666             }
5667           rgb = UCD_GRAPHBREAK(c);
5668 
5669           for (;;)
5670             {
5671             if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
5672             fptr = eptr - 1;
5673             if (!utf) c = *fptr; else
5674               {
5675               BACKCHAR(fptr);
5676               GETCHAR(c, fptr);
5677               }
5678             lgb = UCD_GRAPHBREAK(c);
5679             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5680             eptr = fptr;
5681             rgb = lgb;
5682             }
5683           }
5684         }
5685 
5686       else
5687 #endif   /* SUPPORT_UCP */
5688 
5689 #ifdef SUPPORT_UTF
5690       if (utf)
5691         {
5692         switch(ctype)
5693           {
5694           case OP_ANY:
5695           for (i = min; i < max; i++)
5696             {
5697             if (eptr >= md->end_subject)
5698               {
5699               SCHECK_PARTIAL();
5700               break;
5701               }
5702             if (IS_NEWLINE(eptr)) break;
5703             if (md->partial != 0 &&    /* Take care with CRLF partial */
5704                 eptr + 1 >= md->end_subject &&
5705                 NLBLOCK->nltype == NLTYPE_FIXED &&
5706                 NLBLOCK->nllen == 2 &&
5707                 UCHAR21(eptr) == NLBLOCK->nl[0])
5708               {
5709               md->hitend = TRUE;
5710               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5711               }
5712             eptr++;
5713             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5714             }
5715           break;
5716 
5717           case OP_ALLANY:
5718           if (max < INT_MAX)
5719             {
5720             for (i = min; i < max; i++)
5721               {
5722               if (eptr >= md->end_subject)
5723                 {
5724                 SCHECK_PARTIAL();
5725                 break;
5726                 }
5727               eptr++;
5728               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5729               }
5730             }
5731           else
5732             {
5733             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5734             SCHECK_PARTIAL();
5735             }
5736           break;
5737 
5738           /* The byte case is the same as non-UTF8 */
5739 
5740           case OP_ANYBYTE:
5741           c = max - min;
5742           if (c > (unsigned int)(md->end_subject - eptr))
5743             {
5744             eptr = md->end_subject;
5745             SCHECK_PARTIAL();
5746             }
5747           else eptr += c;
5748           break;
5749 
5750           case OP_ANYNL:
5751           for (i = min; i < max; i++)
5752             {
5753             int len = 1;
5754             if (eptr >= md->end_subject)
5755               {
5756               SCHECK_PARTIAL();
5757               break;
5758               }
5759             GETCHARLEN(c, eptr, len);
5760             if (c == CHAR_CR)
5761               {
5762               if (++eptr >= md->end_subject) break;
5763               if (UCHAR21(eptr) == CHAR_LF) eptr++;
5764               }
5765             else
5766               {
5767               if (c != CHAR_LF &&
5768                   (md->bsr_anycrlf ||
5769                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5770 #ifndef EBCDIC
5771                     && c != 0x2028 && c != 0x2029
5772 #endif  /* Not EBCDIC */
5773                     )))
5774                 break;
5775               eptr += len;
5776               }
5777             }
5778           break;
5779 
5780           case OP_NOT_HSPACE:
5781           case OP_HSPACE:
5782           for (i = min; i < max; i++)
5783             {
5784             BOOL gotspace;
5785             int len = 1;
5786             if (eptr >= md->end_subject)
5787               {
5788               SCHECK_PARTIAL();
5789               break;
5790               }
5791             GETCHARLEN(c, eptr, len);
5792             switch(c)
5793               {
5794               HSPACE_CASES: gotspace = TRUE; break;
5795               default: gotspace = FALSE; break;
5796               }
5797             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5798             eptr += len;
5799             }
5800           break;
5801 
5802           case OP_NOT_VSPACE:
5803           case OP_VSPACE:
5804           for (i = min; i < max; i++)
5805             {
5806             BOOL gotspace;
5807             int len = 1;
5808             if (eptr >= md->end_subject)
5809               {
5810               SCHECK_PARTIAL();
5811               break;
5812               }
5813             GETCHARLEN(c, eptr, len);
5814             switch(c)
5815               {
5816               VSPACE_CASES: gotspace = TRUE; break;
5817               default: gotspace = FALSE; break;
5818               }
5819             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5820             eptr += len;
5821             }
5822           break;
5823 
5824           case OP_NOT_DIGIT:
5825           for (i = min; i < max; i++)
5826             {
5827             int len = 1;
5828             if (eptr >= md->end_subject)
5829               {
5830               SCHECK_PARTIAL();
5831               break;
5832               }
5833             GETCHARLEN(c, eptr, len);
5834             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5835             eptr+= len;
5836             }
5837           break;
5838 
5839           case OP_DIGIT:
5840           for (i = min; i < max; i++)
5841             {
5842             int len = 1;
5843             if (eptr >= md->end_subject)
5844               {
5845               SCHECK_PARTIAL();
5846               break;
5847               }
5848             GETCHARLEN(c, eptr, len);
5849             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5850             eptr+= len;
5851             }
5852           break;
5853 
5854           case OP_NOT_WHITESPACE:
5855           for (i = min; i < max; i++)
5856             {
5857             int len = 1;
5858             if (eptr >= md->end_subject)
5859               {
5860               SCHECK_PARTIAL();
5861               break;
5862               }
5863             GETCHARLEN(c, eptr, len);
5864             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5865             eptr+= len;
5866             }
5867           break;
5868 
5869           case OP_WHITESPACE:
5870           for (i = min; i < max; i++)
5871             {
5872             int len = 1;
5873             if (eptr >= md->end_subject)
5874               {
5875               SCHECK_PARTIAL();
5876               break;
5877               }
5878             GETCHARLEN(c, eptr, len);
5879             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5880             eptr+= len;
5881             }
5882           break;
5883 
5884           case OP_NOT_WORDCHAR:
5885           for (i = min; i < max; i++)
5886             {
5887             int len = 1;
5888             if (eptr >= md->end_subject)
5889               {
5890               SCHECK_PARTIAL();
5891               break;
5892               }
5893             GETCHARLEN(c, eptr, len);
5894             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5895             eptr+= len;
5896             }
5897           break;
5898 
5899           case OP_WORDCHAR:
5900           for (i = min; i < max; i++)
5901             {
5902             int len = 1;
5903             if (eptr >= md->end_subject)
5904               {
5905               SCHECK_PARTIAL();
5906               break;
5907               }
5908             GETCHARLEN(c, eptr, len);
5909             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5910             eptr+= len;
5911             }
5912           break;
5913 
5914           default:
5915           RRETURN(PCRE_ERROR_INTERNAL);
5916           }
5917 
5918         if (possessive) continue;    /* No backtracking */
5919         for(;;)
5920           {
5921           if (eptr == pp) goto TAIL_RECURSE;
5922           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5923           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5924           eptr--;
5925           BACKCHAR(eptr);
5926           if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
5927               UCHAR21(eptr - 1) == CHAR_CR) eptr--;
5928           }
5929         }
5930       else
5931 #endif  /* SUPPORT_UTF */
5932       /* Not UTF mode */
5933         {
5934         switch(ctype)
5935           {
5936           case OP_ANY:
5937           for (i = min; i < max; i++)
5938             {
5939             if (eptr >= md->end_subject)
5940               {
5941               SCHECK_PARTIAL();
5942               break;
5943               }
5944             if (IS_NEWLINE(eptr)) break;
5945             if (md->partial != 0 &&    /* Take care with CRLF partial */
5946                 eptr + 1 >= md->end_subject &&
5947                 NLBLOCK->nltype == NLTYPE_FIXED &&
5948                 NLBLOCK->nllen == 2 &&
5949                 *eptr == NLBLOCK->nl[0])
5950               {
5951               md->hitend = TRUE;
5952               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5953               }
5954             eptr++;
5955             }
5956           break;
5957 
5958           case OP_ALLANY:
5959           case OP_ANYBYTE:
5960           c = max - min;
5961           if (c > (unsigned int)(md->end_subject - eptr))
5962             {
5963             eptr = md->end_subject;
5964             SCHECK_PARTIAL();
5965             }
5966           else eptr += c;
5967           break;
5968 
5969           case OP_ANYNL:
5970           for (i = min; i < max; i++)
5971             {
5972             if (eptr >= md->end_subject)
5973               {
5974               SCHECK_PARTIAL();
5975               break;
5976               }
5977             c = *eptr;
5978             if (c == CHAR_CR)
5979               {
5980               if (++eptr >= md->end_subject) break;
5981               if (*eptr == CHAR_LF) eptr++;
5982               }
5983             else
5984               {
5985               if (c != CHAR_LF && (md->bsr_anycrlf ||
5986                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5987 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5988                  && c != 0x2028 && c != 0x2029
5989 #endif
5990                  ))) break;
5991               eptr++;
5992               }
5993             }
5994           break;
5995 
5996           case OP_NOT_HSPACE:
5997           for (i = min; i < max; i++)
5998             {
5999             if (eptr >= md->end_subject)
6000               {
6001               SCHECK_PARTIAL();
6002               break;
6003               }
6004             switch(*eptr)
6005               {
6006               default: eptr++; break;
6007               HSPACE_BYTE_CASES:
6008 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6009               HSPACE_MULTIBYTE_CASES:
6010 #endif
6011               goto ENDLOOP00;
6012               }
6013             }
6014           ENDLOOP00:
6015           break;
6016 
6017           case OP_HSPACE:
6018           for (i = min; i < max; i++)
6019             {
6020             if (eptr >= md->end_subject)
6021               {
6022               SCHECK_PARTIAL();
6023               break;
6024               }
6025             switch(*eptr)
6026               {
6027               default: goto ENDLOOP01;
6028               HSPACE_BYTE_CASES:
6029 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6030               HSPACE_MULTIBYTE_CASES:
6031 #endif
6032               eptr++; break;
6033               }
6034             }
6035           ENDLOOP01:
6036           break;
6037 
6038           case OP_NOT_VSPACE:
6039           for (i = min; i < max; i++)
6040             {
6041             if (eptr >= md->end_subject)
6042               {
6043               SCHECK_PARTIAL();
6044               break;
6045               }
6046             switch(*eptr)
6047               {
6048               default: eptr++; break;
6049               VSPACE_BYTE_CASES:
6050 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6051               VSPACE_MULTIBYTE_CASES:
6052 #endif
6053               goto ENDLOOP02;
6054               }
6055             }
6056           ENDLOOP02:
6057           break;
6058 
6059           case OP_VSPACE:
6060           for (i = min; i < max; i++)
6061             {
6062             if (eptr >= md->end_subject)
6063               {
6064               SCHECK_PARTIAL();
6065               break;
6066               }
6067             switch(*eptr)
6068               {
6069               default: goto ENDLOOP03;
6070               VSPACE_BYTE_CASES:
6071 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6072               VSPACE_MULTIBYTE_CASES:
6073 #endif
6074               eptr++; break;
6075               }
6076             }
6077           ENDLOOP03:
6078           break;
6079 
6080           case OP_NOT_DIGIT:
6081           for (i = min; i < max; i++)
6082             {
6083             if (eptr >= md->end_subject)
6084               {
6085               SCHECK_PARTIAL();
6086               break;
6087               }
6088             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6089             eptr++;
6090             }
6091           break;
6092 
6093           case OP_DIGIT:
6094           for (i = min; i < max; i++)
6095             {
6096             if (eptr >= md->end_subject)
6097               {
6098               SCHECK_PARTIAL();
6099               break;
6100               }
6101             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6102             eptr++;
6103             }
6104           break;
6105 
6106           case OP_NOT_WHITESPACE:
6107           for (i = min; i < max; i++)
6108             {
6109             if (eptr >= md->end_subject)
6110               {
6111               SCHECK_PARTIAL();
6112               break;
6113               }
6114             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6115             eptr++;
6116             }
6117           break;
6118 
6119           case OP_WHITESPACE:
6120           for (i = min; i < max; i++)
6121             {
6122             if (eptr >= md->end_subject)
6123               {
6124               SCHECK_PARTIAL();
6125               break;
6126               }
6127             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6128             eptr++;
6129             }
6130           break;
6131 
6132           case OP_NOT_WORDCHAR:
6133           for (i = min; i < max; i++)
6134             {
6135             if (eptr >= md->end_subject)
6136               {
6137               SCHECK_PARTIAL();
6138               break;
6139               }
6140             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6141             eptr++;
6142             }
6143           break;
6144 
6145           case OP_WORDCHAR:
6146           for (i = min; i < max; i++)
6147             {
6148             if (eptr >= md->end_subject)
6149               {
6150               SCHECK_PARTIAL();
6151               break;
6152               }
6153             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6154             eptr++;
6155             }
6156           break;
6157 
6158           default:
6159           RRETURN(PCRE_ERROR_INTERNAL);
6160           }
6161 
6162         if (possessive) continue;    /* No backtracking */
6163         for (;;)
6164           {
6165           if (eptr == pp) goto TAIL_RECURSE;
6166           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6167           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6168           eptr--;
6169           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
6170               eptr[-1] == CHAR_CR) eptr--;
6171           }
6172         }
6173 
6174       /* Control never gets here */
6175       }
6176 
6177     /* There's been some horrible disaster. Arrival here can only mean there is
6178     something seriously wrong in the code above or the OP_xxx definitions. */
6179 
6180     default:
6181     DPRINTF(("Unknown opcode %d\n", *ecode));
6182     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6183     }
6184 
6185   /* Do not stick any code in here without much thought; it is assumed
6186   that "continue" in the code above comes out to here to repeat the main
6187   loop. */
6188 
6189   }             /* End of main loop */
6190 /* Control never reaches here */
6191 
6192 
6193 /* When compiling to use the heap rather than the stack for recursive calls to
6194 match(), the RRETURN() macro jumps here. The number that is saved in
6195 frame->Xwhere indicates which label we actually want to return to. */
6196 
6197 #ifdef NO_RECURSE
6198 #define LBL(val) case val: goto L_RM##val;
6199 HEAP_RETURN:
6200 switch (frame->Xwhere)
6201   {
6202   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6203   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6204   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6205   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6206   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6207   LBL(65) LBL(66)
6208 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6209   LBL(20) LBL(21)
6210 #endif
6211 #ifdef SUPPORT_UTF
6212   LBL(16) LBL(18)
6213   LBL(22) LBL(23) LBL(28) LBL(30)
6214   LBL(32) LBL(34) LBL(42) LBL(46)
6215 #ifdef SUPPORT_UCP
6216   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6217   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
6218 #endif  /* SUPPORT_UCP */
6219 #endif  /* SUPPORT_UTF */
6220   default:
6221   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6222   return PCRE_ERROR_INTERNAL;
6223   }
6224 #undef LBL
6225 #endif  /* NO_RECURSE */
6226 }
6227 
6228 
6229 /***************************************************************************
6230 ****************************************************************************
6231                    RECURSION IN THE match() FUNCTION
6232 
6233 Undefine all the macros that were defined above to handle this. */
6234 
6235 #ifdef NO_RECURSE
6236 #undef eptr
6237 #undef ecode
6238 #undef mstart
6239 #undef offset_top
6240 #undef eptrb
6241 #undef flags
6242 
6243 #undef callpat
6244 #undef charptr
6245 #undef data
6246 #undef next
6247 #undef pp
6248 #undef prev
6249 #undef saved_eptr
6250 
6251 #undef new_recursive
6252 
6253 #undef cur_is_word
6254 #undef condition
6255 #undef prev_is_word
6256 
6257 #undef ctype
6258 #undef length
6259 #undef max
6260 #undef min
6261 #undef number
6262 #undef offset
6263 #undef op
6264 #undef save_capture_last
6265 #undef save_offset1
6266 #undef save_offset2
6267 #undef save_offset3
6268 #undef stacksave
6269 
6270 #undef newptrb
6271 
6272 #endif
6273 
6274 /* These two are defined as macros in both cases */
6275 
6276 #undef fc
6277 #undef fi
6278 
6279 /***************************************************************************
6280 ***************************************************************************/
6281 
6282 
6283 #ifdef NO_RECURSE
6284 /*************************************************
6285 *          Release allocated heap frames         *
6286 *************************************************/
6287 
6288 /* This function releases all the allocated frames. The base frame is on the
6289 machine stack, and so must not be freed.
6290 
6291 Argument: the address of the base frame
6292 Returns:  nothing
6293 */
6294 
6295 static void
release_match_heapframes(heapframe * frame_base)6296 release_match_heapframes (heapframe *frame_base)
6297 {
6298 heapframe *nextframe = frame_base->Xnextframe;
6299 while (nextframe != NULL)
6300   {
6301   heapframe *oldframe = nextframe;
6302   nextframe = nextframe->Xnextframe;
6303   (PUBL(stack_free))(oldframe);
6304   }
6305 }
6306 #endif
6307 
6308 
6309 /*************************************************
6310 *         Execute a Regular Expression           *
6311 *************************************************/
6312 
6313 /* This function applies a compiled re to a subject string and picks out
6314 portions of the string if it matches. Two elements in the vector are set for
6315 each substring: the offsets to the start and end of the substring.
6316 
6317 Arguments:
6318   argument_re     points to the compiled expression
6319   extra_data      points to extra data or is NULL
6320   subject         points to the subject string
6321   length          length of subject string (may contain binary zeros)
6322   start_offset    where to start in the subject string
6323   options         option bits
6324   offsets         points to a vector of ints to be filled in with offsets
6325   offsetcount     the number of elements in the vector
6326 
6327 Returns:          > 0 => success; value is the number of elements filled in
6328                   = 0 => success, but offsets is not big enough
6329                    -1 => failed to match
6330                  < -1 => some kind of unexpected problem
6331 */
6332 
6333 #if defined COMPILE_PCRE8
6334 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offsetcount)6335 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6336   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6337   int offsetcount)
6338 #elif defined COMPILE_PCRE16
6339 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6340 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6341   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6342   int offsetcount)
6343 #elif defined COMPILE_PCRE32
6344 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6345 pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
6346   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
6347   int offsetcount)
6348 #endif
6349 {
6350 int rc, ocount, arg_offset_max;
6351 int newline;
6352 BOOL using_temporary_offsets = FALSE;
6353 BOOL anchored;
6354 BOOL startline;
6355 BOOL firstline;
6356 BOOL utf;
6357 BOOL has_first_char = FALSE;
6358 BOOL has_req_char = FALSE;
6359 pcre_uchar first_char = 0;
6360 pcre_uchar first_char2 = 0;
6361 pcre_uchar req_char = 0;
6362 pcre_uchar req_char2 = 0;
6363 match_data match_block;
6364 match_data *md = &match_block;
6365 const pcre_uint8 *tables;
6366 const pcre_uint8 *start_bits = NULL;
6367 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6368 PCRE_PUCHAR end_subject;
6369 PCRE_PUCHAR start_partial = NULL;
6370 PCRE_PUCHAR match_partial = NULL;
6371 PCRE_PUCHAR req_char_ptr = start_match - 1;
6372 
6373 const pcre_study_data *study;
6374 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6375 
6376 #ifdef NO_RECURSE
6377 heapframe frame_zero;
6378 frame_zero.Xprevframe = NULL;            /* Marks the top level */
6379 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6380 md->match_frames_base = &frame_zero;
6381 #endif
6382 
6383 /* Check for the special magic call that measures the size of the stack used
6384 per recursive call of match(). Without the funny casting for sizeof, a Windows
6385 compiler gave this error: "unary minus operator applied to unsigned type,
6386 result still unsigned". Hopefully the cast fixes that. */
6387 
6388 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6389     start_offset == -999)
6390 #ifdef NO_RECURSE
6391   return -((int)sizeof(heapframe));
6392 #else
6393   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6394 #endif
6395 
6396 /* Plausibility checks */
6397 
6398 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6399 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6400   return PCRE_ERROR_NULL;
6401 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6402 if (length < 0) return PCRE_ERROR_BADLENGTH;
6403 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6404 
6405 /* Check that the first field in the block is the magic number. If it is not,
6406 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6407 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6408 means that the pattern is likely compiled with different endianness. */
6409 
6410 if (re->magic_number != MAGIC_NUMBER)
6411   return re->magic_number == REVERSED_MAGIC_NUMBER?
6412     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6413 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6414 
6415 /* These two settings are used in the code for checking a UTF-8 string that
6416 follows immediately afterwards. Other values in the md block are used only
6417 during "normal" pcre_exec() processing, not when the JIT support is in use,
6418 so they are set up later. */
6419 
6420 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6421 utf = md->utf = (re->options & PCRE_UTF8) != 0;
6422 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6423               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6424 
6425 /* Check a UTF-8 string if required. Pass back the character offset and error
6426 code for an invalid string if a results vector is available. */
6427 
6428 #ifdef SUPPORT_UTF
6429 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6430   {
6431   int erroroffset;
6432   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6433   if (errorcode != 0)
6434     {
6435     if (offsetcount >= 2)
6436       {
6437       offsets[0] = erroroffset;
6438       offsets[1] = errorcode;
6439       }
6440 #if defined COMPILE_PCRE8
6441     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6442       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6443 #elif defined COMPILE_PCRE16
6444     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6445       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6446 #elif defined COMPILE_PCRE32
6447     return PCRE_ERROR_BADUTF32;
6448 #endif
6449     }
6450 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6451   /* Check that a start_offset points to the start of a UTF character. */
6452   if (start_offset > 0 && start_offset < length &&
6453       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6454     return PCRE_ERROR_BADUTF8_OFFSET;
6455 #endif
6456   }
6457 #endif
6458 
6459 /* If the pattern was successfully studied with JIT support, run the JIT
6460 executable instead of the rest of this function. Most options must be set at
6461 compile time for the JIT code to be usable. Fallback to the normal code path if
6462 an unsupported flag is set. */
6463 
6464 #ifdef SUPPORT_JIT
6465 if (extra_data != NULL
6466     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6467                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6468     && extra_data->executable_jit != NULL
6469     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
6470   {
6471   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6472        start_offset, options, offsets, offsetcount);
6473 
6474   /* PCRE_ERROR_NULL means that the selected normal or partial matching
6475   mode is not compiled. In this case we simply fallback to interpreter. */
6476 
6477   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6478   }
6479 #endif
6480 
6481 /* Carry on with non-JIT matching. This information is for finding all the
6482 numbers associated with a given name, for condition testing. */
6483 
6484 md->name_table = (pcre_uchar *)re + re->name_table_offset;
6485 md->name_count = re->name_count;
6486 md->name_entry_size = re->name_entry_size;
6487 
6488 /* Fish out the optional data from the extra_data structure, first setting
6489 the default values. */
6490 
6491 study = NULL;
6492 md->match_limit = MATCH_LIMIT;
6493 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6494 md->callout_data = NULL;
6495 
6496 /* The table pointer is always in native byte order. */
6497 
6498 tables = re->tables;
6499 
6500 /* The two limit values override the defaults, whatever their value. */
6501 
6502 if (extra_data != NULL)
6503   {
6504   unsigned long int flags = extra_data->flags;
6505   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6506     study = (const pcre_study_data *)extra_data->study_data;
6507   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6508     md->match_limit = extra_data->match_limit;
6509   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6510     md->match_limit_recursion = extra_data->match_limit_recursion;
6511   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6512     md->callout_data = extra_data->callout_data;
6513   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6514   }
6515 
6516 /* Limits in the regex override only if they are smaller. */
6517 
6518 if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
6519   md->match_limit = re->limit_match;
6520 
6521 if ((re->flags & PCRE_RLSET) != 0 &&
6522     re->limit_recursion < md->match_limit_recursion)
6523   md->match_limit_recursion = re->limit_recursion;
6524 
6525 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6526 is a feature that makes it possible to save compiled regex and re-use them
6527 in other programs later. */
6528 
6529 if (tables == NULL) tables = PRIV(default_tables);
6530 
6531 /* Set up other data */
6532 
6533 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6534 startline = (re->flags & PCRE_STARTLINE) != 0;
6535 firstline = (re->options & PCRE_FIRSTLINE) != 0;
6536 
6537 /* The code starts after the real_pcre block and the capture name table. */
6538 
6539 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6540   re->name_count * re->name_entry_size;
6541 
6542 md->start_subject = (PCRE_PUCHAR)subject;
6543 md->start_offset = start_offset;
6544 md->end_subject = md->start_subject + length;
6545 end_subject = md->end_subject;
6546 
6547 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6548 md->use_ucp = (re->options & PCRE_UCP) != 0;
6549 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6550 md->ignore_skip_arg = 0;
6551 
6552 /* Some options are unpacked into BOOL variables in the hope that testing
6553 them will be faster than individual option bits. */
6554 
6555 md->notbol = (options & PCRE_NOTBOL) != 0;
6556 md->noteol = (options & PCRE_NOTEOL) != 0;
6557 md->notempty = (options & PCRE_NOTEMPTY) != 0;
6558 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6559 
6560 md->hitend = FALSE;
6561 md->mark = md->nomatch_mark = NULL;     /* In case never set */
6562 
6563 md->recursive = NULL;                   /* No recursion at top level */
6564 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6565 
6566 md->lcc = tables + lcc_offset;
6567 md->fcc = tables + fcc_offset;
6568 md->ctypes = tables + ctypes_offset;
6569 
6570 /* Handle different \R options. */
6571 
6572 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6573   {
6574   case 0:
6575   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6576     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6577   else
6578 #ifdef BSR_ANYCRLF
6579   md->bsr_anycrlf = TRUE;
6580 #else
6581   md->bsr_anycrlf = FALSE;
6582 #endif
6583   break;
6584 
6585   case PCRE_BSR_ANYCRLF:
6586   md->bsr_anycrlf = TRUE;
6587   break;
6588 
6589   case PCRE_BSR_UNICODE:
6590   md->bsr_anycrlf = FALSE;
6591   break;
6592 
6593   default: return PCRE_ERROR_BADNEWLINE;
6594   }
6595 
6596 /* Handle different types of newline. The three bits give eight cases. If
6597 nothing is set at run time, whatever was used at compile time applies. */
6598 
6599 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6600         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6601   {
6602   case 0: newline = NEWLINE; break;   /* Compile-time default */
6603   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6604   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6605   case PCRE_NEWLINE_CR+
6606        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6607   case PCRE_NEWLINE_ANY: newline = -1; break;
6608   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6609   default: return PCRE_ERROR_BADNEWLINE;
6610   }
6611 
6612 if (newline == -2)
6613   {
6614   md->nltype = NLTYPE_ANYCRLF;
6615   }
6616 else if (newline < 0)
6617   {
6618   md->nltype = NLTYPE_ANY;
6619   }
6620 else
6621   {
6622   md->nltype = NLTYPE_FIXED;
6623   if (newline > 255)
6624     {
6625     md->nllen = 2;
6626     md->nl[0] = (newline >> 8) & 255;
6627     md->nl[1] = newline & 255;
6628     }
6629   else
6630     {
6631     md->nllen = 1;
6632     md->nl[0] = newline;
6633     }
6634   }
6635 
6636 /* Partial matching was originally supported only for a restricted set of
6637 regexes; from release 8.00 there are no restrictions, but the bits are still
6638 defined (though never set). So there's no harm in leaving this code. */
6639 
6640 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6641   return PCRE_ERROR_BADPARTIAL;
6642 
6643 /* If the expression has got more back references than the offsets supplied can
6644 hold, we get a temporary chunk of working store to use during the matching.
6645 Otherwise, we can use the vector supplied, rounding down its size to a multiple
6646 of 3. */
6647 
6648 ocount = offsetcount - (offsetcount % 3);
6649 arg_offset_max = (2*ocount)/3;
6650 
6651 if (re->top_backref > 0 && re->top_backref >= ocount/3)
6652   {
6653   ocount = re->top_backref * 3 + 3;
6654   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6655   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6656   using_temporary_offsets = TRUE;
6657   DPRINTF(("Got memory to hold back references\n"));
6658   }
6659 else md->offset_vector = offsets;
6660 md->offset_end = ocount;
6661 md->offset_max = (2*ocount)/3;
6662 md->capture_last = 0;
6663 
6664 /* Reset the working variable associated with each extraction. These should
6665 never be used unless previously set, but they get saved and restored, and so we
6666 initialize them to avoid reading uninitialized locations. Also, unset the
6667 offsets for the matched string. This is really just for tidiness with callouts,
6668 in case they inspect these fields. */
6669 
6670 if (md->offset_vector != NULL)
6671   {
6672   register int *iptr = md->offset_vector + ocount;
6673   register int *iend = iptr - re->top_bracket;
6674   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6675   while (--iptr >= iend) *iptr = -1;
6676   md->offset_vector[0] = md->offset_vector[1] = -1;
6677   }
6678 
6679 /* Set up the first character to match, if available. The first_char value is
6680 never set for an anchored regular expression, but the anchoring may be forced
6681 at run time, so we have to test for anchoring. The first char may be unset for
6682 an unanchored pattern, of course. If there's no first char and the pattern was
6683 studied, there may be a bitmap of possible first characters. */
6684 
6685 if (!anchored)
6686   {
6687   if ((re->flags & PCRE_FIRSTSET) != 0)
6688     {
6689     has_first_char = TRUE;
6690     first_char = first_char2 = (pcre_uchar)(re->first_char);
6691     if ((re->flags & PCRE_FCH_CASELESS) != 0)
6692       {
6693       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6694 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6695       if (utf && first_char > 127)
6696         first_char2 = UCD_OTHERCASE(first_char);
6697 #endif
6698       }
6699     }
6700   else
6701     if (!startline && study != NULL &&
6702       (study->flags & PCRE_STUDY_MAPPED) != 0)
6703         start_bits = study->start_bits;
6704   }
6705 
6706 /* For anchored or unanchored matches, there may be a "last known required
6707 character" set. */
6708 
6709 if ((re->flags & PCRE_REQCHSET) != 0)
6710   {
6711   has_req_char = TRUE;
6712   req_char = req_char2 = (pcre_uchar)(re->req_char);
6713   if ((re->flags & PCRE_RCH_CASELESS) != 0)
6714     {
6715     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6716 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6717     if (utf && req_char > 127)
6718       req_char2 = UCD_OTHERCASE(req_char);
6719 #endif
6720     }
6721   }
6722 
6723 
6724 /* ==========================================================================*/
6725 
6726 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6727 the loop runs just once. */
6728 
6729 for(;;)
6730   {
6731   PCRE_PUCHAR save_end_subject = end_subject;
6732   PCRE_PUCHAR new_start_match;
6733 
6734   /* If firstline is TRUE, the start of the match is constrained to the first
6735   line of a multiline string. That is, the match must be before or at the first
6736   newline. Implement this by temporarily adjusting end_subject so that we stop
6737   scanning at a newline. If the match fails at the newline, later code breaks
6738   this loop. */
6739 
6740   if (firstline)
6741     {
6742     PCRE_PUCHAR t = start_match;
6743 #ifdef SUPPORT_UTF
6744     if (utf)
6745       {
6746       while (t < md->end_subject && !IS_NEWLINE(t))
6747         {
6748         t++;
6749         ACROSSCHAR(t < end_subject, *t, t++);
6750         }
6751       }
6752     else
6753 #endif
6754     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6755     end_subject = t;
6756     }
6757 
6758   /* There are some optimizations that avoid running the match if a known
6759   starting point is not found, or if a known later character is not present.
6760   However, there is an option that disables these, for testing and for ensuring
6761   that all callouts do actually occur. The option can be set in the regex by
6762   (*NO_START_OPT) or passed in match-time options. */
6763 
6764   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6765     {
6766     /* Advance to a unique first char if there is one. */
6767 
6768     if (has_first_char)
6769       {
6770       pcre_uchar smc;
6771 
6772       if (first_char != first_char2)
6773         while (start_match < end_subject &&
6774           (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
6775           start_match++;
6776       else
6777         while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
6778           start_match++;
6779       }
6780 
6781     /* Or to just after a linebreak for a multiline match */
6782 
6783     else if (startline)
6784       {
6785       if (start_match > md->start_subject + start_offset)
6786         {
6787 #ifdef SUPPORT_UTF
6788         if (utf)
6789           {
6790           while (start_match < end_subject && !WAS_NEWLINE(start_match))
6791             {
6792             start_match++;
6793             ACROSSCHAR(start_match < end_subject, *start_match,
6794               start_match++);
6795             }
6796           }
6797         else
6798 #endif
6799         while (start_match < end_subject && !WAS_NEWLINE(start_match))
6800           start_match++;
6801 
6802         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6803         and we are now at a LF, advance the match position by one more character.
6804         */
6805 
6806         if (start_match[-1] == CHAR_CR &&
6807              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6808              start_match < end_subject &&
6809              UCHAR21TEST(start_match) == CHAR_NL)
6810           start_match++;
6811         }
6812       }
6813 
6814     /* Or to a non-unique first byte after study */
6815 
6816     else if (start_bits != NULL)
6817       {
6818       while (start_match < end_subject)
6819         {
6820         register pcre_uint32 c = UCHAR21TEST(start_match);
6821 #ifndef COMPILE_PCRE8
6822         if (c > 255) c = 255;
6823 #endif
6824         if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
6825         start_match++;
6826         }
6827       }
6828     }   /* Starting optimizations */
6829 
6830   /* Restore fudged end_subject */
6831 
6832   end_subject = save_end_subject;
6833 
6834   /* The following two optimizations are disabled for partial matching or if
6835   disabling is explicitly requested. */
6836 
6837   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6838     {
6839     /* If the pattern was studied, a minimum subject length may be set. This is
6840     a lower bound; no actual string of that length may actually match the
6841     pattern. Although the value is, strictly, in characters, we treat it as
6842     bytes to avoid spending too much time in this optimization. */
6843 
6844     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6845         (pcre_uint32)(end_subject - start_match) < study->minlength)
6846       {
6847       rc = MATCH_NOMATCH;
6848       break;
6849       }
6850 
6851     /* If req_char is set, we know that that character must appear in the
6852     subject for the match to succeed. If the first character is set, req_char
6853     must be later in the subject; otherwise the test starts at the match point.
6854     This optimization can save a huge amount of backtracking in patterns with
6855     nested unlimited repeats that aren't going to match. Writing separate code
6856     for cased/caseless versions makes it go faster, as does using an
6857     autoincrement and backing off on a match.
6858 
6859     HOWEVER: when the subject string is very, very long, searching to its end
6860     can take a long time, and give bad performance on quite ordinary patterns.
6861     This showed up when somebody was matching something like /^\d+C/ on a
6862     32-megabyte string... so we don't do this when the string is sufficiently
6863     long. */
6864 
6865     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6866       {
6867       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6868 
6869       /* We don't need to repeat the search if we haven't yet reached the
6870       place we found it at last time. */
6871 
6872       if (p > req_char_ptr)
6873         {
6874         if (req_char != req_char2)
6875           {
6876           while (p < end_subject)
6877             {
6878             register pcre_uint32 pp = UCHAR21INCTEST(p);
6879             if (pp == req_char || pp == req_char2) { p--; break; }
6880             }
6881           }
6882         else
6883           {
6884           while (p < end_subject)
6885             {
6886             if (UCHAR21INCTEST(p) == req_char) { p--; break; }
6887             }
6888           }
6889 
6890         /* If we can't find the required character, break the matching loop,
6891         forcing a match failure. */
6892 
6893         if (p >= end_subject)
6894           {
6895           rc = MATCH_NOMATCH;
6896           break;
6897           }
6898 
6899         /* If we have found the required character, save the point where we
6900         found it, so that we don't search again next time round the loop if
6901         the start hasn't passed this character yet. */
6902 
6903         req_char_ptr = p;
6904         }
6905       }
6906     }
6907 
6908 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6909   printf(">>>> Match against: ");
6910   pchars(start_match, end_subject - start_match, TRUE, md);
6911   printf("\n");
6912 #endif
6913 
6914   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6915   first starting point for which a partial match was found. */
6916 
6917   md->start_match_ptr = start_match;
6918   md->start_used_ptr = start_match;
6919   md->match_call_count = 0;
6920   md->match_function_type = 0;
6921   md->end_offset_top = 0;
6922   md->skip_arg_count = 0;
6923   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6924   if (md->hitend && start_partial == NULL)
6925     {
6926     start_partial = md->start_used_ptr;
6927     match_partial = start_match;
6928     }
6929 
6930   switch(rc)
6931     {
6932     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6933     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6934     entirely. The only way we can do that is to re-do the match at the same
6935     point, with a flag to force SKIP with an argument to be ignored. Just
6936     treating this case as NOMATCH does not work because it does not check other
6937     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6938 
6939     case MATCH_SKIP_ARG:
6940     new_start_match = start_match;
6941     md->ignore_skip_arg = md->skip_arg_count;
6942     break;
6943 
6944     /* SKIP passes back the next starting point explicitly, but if it is no
6945     greater than the match we have just done, treat it as NOMATCH. */
6946 
6947     case MATCH_SKIP:
6948     if (md->start_match_ptr > start_match)
6949       {
6950       new_start_match = md->start_match_ptr;
6951       break;
6952       }
6953     /* Fall through */
6954 
6955     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6956     exactly like PRUNE. Unset ignore SKIP-with-argument. */
6957 
6958     case MATCH_NOMATCH:
6959     case MATCH_PRUNE:
6960     case MATCH_THEN:
6961     md->ignore_skip_arg = 0;
6962     new_start_match = start_match + 1;
6963 #ifdef SUPPORT_UTF
6964     if (utf)
6965       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6966         new_start_match++);
6967 #endif
6968     break;
6969 
6970     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6971 
6972     case MATCH_COMMIT:
6973     rc = MATCH_NOMATCH;
6974     goto ENDLOOP;
6975 
6976     /* Any other return is either a match, or some kind of error. */
6977 
6978     default:
6979     goto ENDLOOP;
6980     }
6981 
6982   /* Control reaches here for the various types of "no match at this point"
6983   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6984 
6985   rc = MATCH_NOMATCH;
6986 
6987   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6988   newline in the subject (though it may continue over the newline). Therefore,
6989   if we have just failed to match, starting at a newline, do not continue. */
6990 
6991   if (firstline && IS_NEWLINE(start_match)) break;
6992 
6993   /* Advance to new matching position */
6994 
6995   start_match = new_start_match;
6996 
6997   /* Break the loop if the pattern is anchored or if we have passed the end of
6998   the subject. */
6999 
7000   if (anchored || start_match > end_subject) break;
7001 
7002   /* If we have just passed a CR and we are now at a LF, and the pattern does
7003   not contain any explicit matches for \r or \n, and the newline option is CRLF
7004   or ANY or ANYCRLF, advance the match position by one more character. In
7005   normal matching start_match will aways be greater than the first position at
7006   this stage, but a failed *SKIP can cause a return at the same point, which is
7007   why the first test exists. */
7008 
7009   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
7010       start_match[-1] == CHAR_CR &&
7011       start_match < end_subject &&
7012       *start_match == CHAR_NL &&
7013       (re->flags & PCRE_HASCRORLF) == 0 &&
7014         (md->nltype == NLTYPE_ANY ||
7015          md->nltype == NLTYPE_ANYCRLF ||
7016          md->nllen == 2))
7017     start_match++;
7018 
7019   md->mark = NULL;   /* Reset for start of next match attempt */
7020   }                  /* End of for(;;) "bumpalong" loop */
7021 
7022 /* ==========================================================================*/
7023 
7024 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
7025 conditions is true:
7026 
7027 (1) The pattern is anchored or the match was failed by (*COMMIT);
7028 
7029 (2) We are past the end of the subject;
7030 
7031 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7032     this option requests that a match occur at or before the first newline in
7033     the subject.
7034 
7035 When we have a match and the offset vector is big enough to deal with any
7036 backreferences, captured substring offsets will already be set up. In the case
7037 where we had to get some local store to hold offsets for backreference
7038 processing, copy those that we can. In this case there need not be overflow if
7039 certain parts of the pattern were not used, even though there are more
7040 capturing parentheses than vector slots. */
7041 
7042 ENDLOOP:
7043 
7044 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
7045   {
7046   if (using_temporary_offsets)
7047     {
7048     if (arg_offset_max >= 4)
7049       {
7050       memcpy(offsets + 2, md->offset_vector + 2,
7051         (arg_offset_max - 2) * sizeof(int));
7052       DPRINTF(("Copied offsets from temporary memory\n"));
7053       }
7054     if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7055     DPRINTF(("Freeing temporary memory\n"));
7056     (PUBL(free))(md->offset_vector);
7057     }
7058 
7059   /* Set the return code to the number of captured strings, or 0 if there were
7060   too many to fit into the vector. */
7061 
7062   rc = ((md->capture_last & OVFLBIT) != 0 &&
7063          md->end_offset_top >= arg_offset_max)?
7064     0 : md->end_offset_top/2;
7065 
7066   /* If there is space in the offset vector, set any unused pairs at the end of
7067   the pattern to -1 for backwards compatibility. It is documented that this
7068   happens. In earlier versions, the whole set of potential capturing offsets
7069   was set to -1 each time round the loop, but this is handled differently now.
7070   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7071   those at the end that need unsetting here. We can't just unset them all at
7072   the start of the whole thing because they may get set in one branch that is
7073   not the final matching branch. */
7074 
7075   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
7076     {
7077     register int *iptr, *iend;
7078     int resetcount = 2 + re->top_bracket * 2;
7079     if (resetcount > offsetcount) resetcount = offsetcount;
7080     iptr = offsets + md->end_offset_top;
7081     iend = offsets + resetcount;
7082     while (iptr < iend) *iptr++ = -1;
7083     }
7084 
7085   /* If there is space, set up the whole thing as substring 0. The value of
7086   md->start_match_ptr might be modified if \K was encountered on the success
7087   matching path. */
7088 
7089   if (offsetcount < 2) rc = 0; else
7090     {
7091     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
7092     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
7093     }
7094 
7095   /* Return MARK data if requested */
7096 
7097   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7098     *(extra_data->mark) = (pcre_uchar *)md->mark;
7099   DPRINTF((">>>> returning %d\n", rc));
7100 #ifdef NO_RECURSE
7101   release_match_heapframes(&frame_zero);
7102 #endif
7103   return rc;
7104   }
7105 
7106 /* Control gets here if there has been an error, or if the overall match
7107 attempt has failed at all permitted starting positions. */
7108 
7109 if (using_temporary_offsets)
7110   {
7111   DPRINTF(("Freeing temporary memory\n"));
7112   (PUBL(free))(md->offset_vector);
7113   }
7114 
7115 /* For anything other than nomatch or partial match, just return the code. */
7116 
7117 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7118   {
7119   DPRINTF((">>>> error: returning %d\n", rc));
7120 #ifdef NO_RECURSE
7121   release_match_heapframes(&frame_zero);
7122 #endif
7123   return rc;
7124   }
7125 
7126 /* Handle partial matches - disable any mark data */
7127 
7128 if (match_partial != NULL)
7129   {
7130   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7131   md->mark = NULL;
7132   if (offsetcount > 1)
7133     {
7134     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7135     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7136     if (offsetcount > 2)
7137       offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7138     }
7139   rc = PCRE_ERROR_PARTIAL;
7140   }
7141 
7142 /* This is the classic nomatch case */
7143 
7144 else
7145   {
7146   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7147   rc = PCRE_ERROR_NOMATCH;
7148   }
7149 
7150 /* Return the MARK data if it has been requested. */
7151 
7152 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7153   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7154 #ifdef NO_RECURSE
7155   release_match_heapframes(&frame_zero);
7156 #endif
7157 return rc;
7158 }
7159 
7160 /* End of pcre_exec.c */
7161