1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10          New API code Copyright (c) 2016 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #include "pcre2_internal.h"
47 
48 #define PTR_STACK_SIZE 20
49 
50 #define SUBSTITUTE_OPTIONS \
51   (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
52    PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
53    PCRE2_SUBSTITUTE_UNSET_EMPTY)
54 
55 
56 
57 /*************************************************
58 *           Find end of substitute text          *
59 *************************************************/
60 
61 /* In extended mode, we recognize ${name:+set text:unset text} and similar
62 constructions. This requires the identification of unescaped : and }
63 characters. This function scans for such. It must deal with nested ${
64 constructions. The pointer to the text is updated, either to the required end
65 character, or to where an error was detected.
66 
67 Arguments:
68   code      points to the compiled expression (for options)
69   ptrptr    points to the pointer to the start of the text (updated)
70   ptrend    end of the whole string
71   last      TRUE if the last expected string (only } recognized)
72 
73 Returns:    0 on success
74             negative error code on failure
75 */
76 
77 static int
find_text_end(const pcre2_code * code,PCRE2_SPTR * ptrptr,PCRE2_SPTR ptrend,BOOL last)78 find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
79   BOOL last)
80 {
81 int rc = 0;
82 uint32_t nestlevel = 0;
83 BOOL literal = FALSE;
84 PCRE2_SPTR ptr = *ptrptr;
85 
86 for (; ptr < ptrend; ptr++)
87   {
88   if (literal)
89     {
90     if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
91       {
92       literal = FALSE;
93       ptr += 1;
94       }
95     }
96 
97   else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
98     {
99     if (nestlevel == 0) goto EXIT;
100     nestlevel--;
101     }
102 
103   else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
104 
105   else if (*ptr == CHAR_DOLLAR_SIGN)
106     {
107     if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
108       {
109       nestlevel++;
110       ptr += 1;
111       }
112     }
113 
114   else if (*ptr == CHAR_BACKSLASH)
115     {
116     int erc;
117     int errorcode = 0;
118     uint32_t ch;
119 
120     if (ptr < ptrend - 1) switch (ptr[1])
121       {
122       case CHAR_L:
123       case CHAR_l:
124       case CHAR_U:
125       case CHAR_u:
126       ptr += 1;
127       continue;
128       }
129 
130     erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
131       code->overall_options, FALSE, NULL);
132     if (errorcode != 0)
133       {
134       rc = errorcode;
135       goto EXIT;
136       }
137 
138     switch(erc)
139       {
140       case 0:      /* Data character */
141       case ESC_E:  /* Isolated \E is ignored */
142       break;
143 
144       case ESC_Q:
145       literal = TRUE;
146       break;
147 
148       default:
149       rc = PCRE2_ERROR_BADREPESCAPE;
150       goto EXIT;
151       }
152     }
153   }
154 
155 rc = PCRE2_ERROR_REPMISSINGBRACE;   /* Terminator not found */
156 
157 EXIT:
158 *ptrptr = ptr;
159 return rc;
160 }
161 
162 
163 
164 /*************************************************
165 *              Match and substitute              *
166 *************************************************/
167 
168 /* This function applies a compiled re to a subject string and creates a new
169 string with substitutions. The first 7 arguments are the same as for
170 pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
171 
172 Arguments:
173   code            points to the compiled expression
174   subject         points to the subject string
175   length          length of subject string (may contain binary zeros)
176   start_offset    where to start in the subject string
177   options         option bits
178   match_data      points to a match_data block, or is NULL
179   context         points a PCRE2 context
180   replacement     points to the replacement string
181   rlength         length of replacement string
182   buffer          where to put the substituted string
183   blength         points to length of buffer; updated to length of string
184 
185 Returns:          >= 0 number of substitutions made
186                   < 0 an error code
187                   PCRE2_ERROR_BADREPLACEMENT means invalid use of $
188 */
189 
190 /* This macro checks for space in the buffer before copying into it. On
191 overflow, either give an error immediately, or keep on, accumulating the
192 length. */
193 
194 #define CHECKMEMCPY(from,length) \
195   if (!overflowed && lengthleft < length) \
196     { \
197     if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
198     overflowed = TRUE; \
199     extra_needed = length - lengthleft; \
200     } \
201   else if (overflowed) \
202     { \
203     extra_needed += length; \
204     }  \
205   else \
206     {  \
207     memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
208     buff_offset += length; \
209     lengthleft -= length; \
210     }
211 
212 /* Here's the function */
213 
214 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substitute(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext,PCRE2_SPTR replacement,PCRE2_SIZE rlength,PCRE2_UCHAR * buffer,PCRE2_SIZE * blength)215 pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
216   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
217   pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
218   PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
219 {
220 int rc;
221 int subs;
222 int forcecase = 0;
223 int forcecasereset = 0;
224 uint32_t ovector_count;
225 uint32_t goptions = 0;
226 uint32_t suboptions;
227 BOOL match_data_created = FALSE;
228 BOOL literal = FALSE;
229 BOOL overflowed = FALSE;
230 #ifdef SUPPORT_UNICODE
231 BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
232 #endif
233 PCRE2_UCHAR temp[6];
234 PCRE2_SPTR ptr;
235 PCRE2_SPTR repend;
236 PCRE2_SIZE extra_needed = 0;
237 PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
238 PCRE2_SIZE *ovector;
239 
240 buff_offset = 0;
241 lengthleft = buff_length = *blength;
242 *blength = PCRE2_UNSET;
243 
244 /* Partial matching is not valid. */
245 
246 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
247   return PCRE2_ERROR_BADOPTION;
248 
249 /* If no match data block is provided, create one. */
250 
251 if (match_data == NULL)
252   {
253   pcre2_general_context *gcontext = (mcontext == NULL)?
254     (pcre2_general_context *)code :
255     (pcre2_general_context *)mcontext;
256   match_data = pcre2_match_data_create_from_pattern(code, gcontext);
257   if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
258   match_data_created = TRUE;
259   }
260 ovector = pcre2_get_ovector_pointer(match_data);
261 ovector_count = pcre2_get_ovector_count(match_data);
262 
263 /* Find lengths of zero-terminated strings and the end of the replacement. */
264 
265 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
266 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
267 repend = replacement + rlength;
268 
269 /* Check UTF replacement string if necessary. */
270 
271 #ifdef SUPPORT_UNICODE
272 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
273   {
274   rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
275   if (rc != 0)
276     {
277     match_data->leftchar = 0;
278     goto EXIT;
279     }
280   }
281 #endif  /* SUPPORT_UNICODE */
282 
283 /* Save the substitute options and remove them from the match options. */
284 
285 suboptions = options & SUBSTITUTE_OPTIONS;
286 options &= ~SUBSTITUTE_OPTIONS;
287 
288 /* Copy up to the start offset */
289 
290 CHECKMEMCPY(subject, start_offset);
291 
292 /* Loop for global substituting. */
293 
294 subs = 0;
295 do
296   {
297   PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
298   uint32_t ptrstackptr = 0;
299 
300   rc = pcre2_match(code, subject, length, start_offset, options|goptions,
301     match_data, mcontext);
302 
303 #ifdef SUPPORT_UNICODE
304   if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
305 #endif
306 
307   /* Any error other than no match returns the error code. No match when not
308   doing the special after-empty-match global rematch, or when at the end of the
309   subject, breaks the global loop. Otherwise, advance the starting point by one
310   character, copying it to the output, and try again. */
311 
312   if (rc < 0)
313     {
314     PCRE2_SIZE save_start;
315 
316     if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
317     if (goptions == 0 || start_offset >= length) break;
318 
319     /* Advance by one code point. Then, if CRLF is a valid newline sequence and
320     we have advanced into the middle of it, advance one more code point. In
321     other words, do not start in the middle of CRLF, even if CR and LF on their
322     own are valid newlines. */
323 
324     save_start = start_offset++;
325     if (subject[start_offset-1] == CHAR_CR &&
326         code->newline_convention != PCRE2_NEWLINE_CR &&
327         code->newline_convention != PCRE2_NEWLINE_LF &&
328         start_offset < length &&
329         subject[start_offset] == CHAR_LF)
330       start_offset++;
331 
332     /* Otherwise, in UTF mode, advance past any secondary code points. */
333 
334     else if ((code->overall_options & PCRE2_UTF) != 0)
335       {
336 #if PCRE2_CODE_UNIT_WIDTH == 8
337       while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
338         start_offset++;
339 #elif PCRE2_CODE_UNIT_WIDTH == 16
340       while (start_offset < length &&
341             (subject[start_offset] & 0xfc00) == 0xdc00)
342         start_offset++;
343 #endif
344       }
345 
346     /* Copy what we have advanced past, reset the special global options, and
347     continue to the next match. */
348 
349     fraglength = start_offset - save_start;
350     CHECKMEMCPY(subject + save_start, fraglength);
351     goptions = 0;
352     continue;
353     }
354 
355   /* Handle a successful match. Matches that use \K to end before they start
356   are not supported. */
357 
358   if (ovector[1] < ovector[0])
359     {
360     rc = PCRE2_ERROR_BADSUBSPATTERN;
361     goto EXIT;
362     }
363 
364   /* Count substitutions with a paranoid check for integer overflow; surely no
365   real call to this function would ever hit this! */
366 
367   if (subs == INT_MAX)
368     {
369     rc = PCRE2_ERROR_TOOMANYREPLACE;
370     goto EXIT;
371     }
372   subs++;
373 
374   /* Copy the text leading up to the match. */
375 
376   if (rc == 0) rc = ovector_count;
377   fraglength = ovector[0] - start_offset;
378   CHECKMEMCPY(subject + start_offset, fraglength);
379 
380   /* Process the replacement string. Literal mode is set by \Q, but only in
381   extended mode when backslashes are being interpreted. In extended mode we
382   must handle nested substrings that are to be reprocessed. */
383 
384   ptr = replacement;
385   for (;;)
386     {
387     uint32_t ch;
388     unsigned int chlen;
389 
390     /* If at the end of a nested substring, pop the stack. */
391 
392     if (ptr >= repend)
393       {
394       if (ptrstackptr <= 0) break;       /* End of replacement string */
395       repend = ptrstack[--ptrstackptr];
396       ptr = ptrstack[--ptrstackptr];
397       continue;
398       }
399 
400     /* Handle the next character */
401 
402     if (literal)
403       {
404       if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
405         {
406         literal = FALSE;
407         ptr += 2;
408         continue;
409         }
410       goto LOADLITERAL;
411       }
412 
413     /* Not in literal mode. */
414 
415     if (*ptr == CHAR_DOLLAR_SIGN)
416       {
417       int group, n;
418       uint32_t special = 0;
419       BOOL inparens;
420       BOOL star;
421       PCRE2_SIZE sublength;
422       PCRE2_SPTR text1_start = NULL;
423       PCRE2_SPTR text1_end = NULL;
424       PCRE2_SPTR text2_start = NULL;
425       PCRE2_SPTR text2_end = NULL;
426       PCRE2_UCHAR next;
427       PCRE2_UCHAR name[33];
428 
429       if (++ptr >= repend) goto BAD;
430       if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
431 
432       group = -1;
433       n = 0;
434       inparens = FALSE;
435       star = FALSE;
436 
437       if (next == CHAR_LEFT_CURLY_BRACKET)
438         {
439         if (++ptr >= repend) goto BAD;
440         next = *ptr;
441         inparens = TRUE;
442         }
443 
444       if (next == CHAR_ASTERISK)
445         {
446         if (++ptr >= repend) goto BAD;
447         next = *ptr;
448         star = TRUE;
449         }
450 
451       if (!star && next >= CHAR_0 && next <= CHAR_9)
452         {
453         group = next - CHAR_0;
454         while (++ptr < repend)
455           {
456           next = *ptr;
457           if (next < CHAR_0 || next > CHAR_9) break;
458           group = group * 10 + next - CHAR_0;
459 
460           /* A check for a number greater than the hightest captured group
461           is sufficient here; no need for a separate overflow check. If unknown
462           groups are to be treated as unset, just skip over any remaining
463           digits and carry on. */
464 
465           if (group > code->top_bracket)
466             {
467             if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
468               {
469               while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
470               break;
471               }
472             else
473               {
474               rc = PCRE2_ERROR_NOSUBSTRING;
475               goto PTREXIT;
476               }
477             }
478           }
479         }
480       else
481         {
482         const uint8_t *ctypes = code->tables + ctypes_offset;
483         while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
484           {
485           name[n++] = next;
486           if (n > 32) goto BAD;
487           if (++ptr >= repend) break;
488           next = *ptr;
489           }
490         if (n == 0) goto BAD;
491         name[n] = 0;
492         }
493 
494       /* In extended mode we recognize ${name:+set text:unset text} and
495       ${name:-default text}. */
496 
497       if (inparens)
498         {
499         if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
500              !star && ptr < repend - 2 && next == CHAR_COLON)
501           {
502           special = *(++ptr);
503           if (special != CHAR_PLUS && special != CHAR_MINUS)
504             {
505             rc = PCRE2_ERROR_BADSUBSTITUTION;
506             goto PTREXIT;
507             }
508 
509           text1_start = ++ptr;
510           rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
511           if (rc != 0) goto PTREXIT;
512           text1_end = ptr;
513 
514           if (special == CHAR_PLUS && *ptr == CHAR_COLON)
515             {
516             text2_start = ++ptr;
517             rc = find_text_end(code, &ptr, repend, TRUE);
518             if (rc != 0) goto PTREXIT;
519             text2_end = ptr;
520             }
521           }
522 
523         else
524           {
525           if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
526             {
527             rc = PCRE2_ERROR_REPMISSINGBRACE;
528             goto PTREXIT;
529             }
530           }
531 
532         ptr++;
533         }
534 
535       /* Have found a syntactically correct group number or name, or *name.
536       Only *MARK is currently recognized. */
537 
538       if (star)
539         {
540         if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
541           {
542           PCRE2_SPTR mark = pcre2_get_mark(match_data);
543           if (mark != NULL)
544             {
545             PCRE2_SPTR mark_start = mark;
546             while (*mark != 0) mark++;
547             fraglength = mark - mark_start;
548             CHECKMEMCPY(mark_start, fraglength);
549             }
550           }
551         else goto BAD;
552         }
553 
554       /* Substitute the contents of a group. We don't use substring_copy
555       functions any more, in order to support case forcing. */
556 
557       else
558         {
559         PCRE2_SPTR subptr, subptrend;
560 
561         /* Find a number for a named group. In case there are duplicate names,
562         search for the first one that is set. If the name is not found when
563         PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
564         non-existent group. */
565 
566         if (group < 0)
567           {
568           PCRE2_SPTR first, last, entry;
569           rc = pcre2_substring_nametable_scan(code, name, &first, &last);
570           if (rc == PCRE2_ERROR_NOSUBSTRING &&
571               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
572             {
573             group = code->top_bracket + 1;
574             }
575           else
576             {
577             if (rc < 0) goto PTREXIT;
578             for (entry = first; entry <= last; entry += rc)
579               {
580               uint32_t ng = GET2(entry, 0);
581               if (ng < ovector_count)
582                 {
583                 if (group < 0) group = ng;          /* First in ovector */
584                 if (ovector[ng*2] != PCRE2_UNSET)
585                   {
586                   group = ng;                       /* First that is set */
587                   break;
588                   }
589                 }
590               }
591 
592             /* If group is still negative, it means we did not find a group
593             that is in the ovector. Just set the first group. */
594 
595             if (group < 0) group = GET2(first, 0);
596             }
597           }
598 
599         /* We now have a group that is identified by number. Find the length of
600         the captured string. If a group in a non-special substitution is unset
601         when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
602 
603         rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
604         if (rc < 0)
605           {
606           if (rc == PCRE2_ERROR_NOSUBSTRING &&
607               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
608             {
609             rc = PCRE2_ERROR_UNSET;
610             }
611           if (rc != PCRE2_ERROR_UNSET) goto PTREXIT;  /* Non-unset errors */
612           if (special == 0)                           /* Plain substitution */
613             {
614             if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
615             goto PTREXIT;                             /* Else error */
616             }
617           }
618 
619         /* If special is '+' we have a 'set' and possibly an 'unset' text,
620         both of which are reprocessed when used. If special is '-' we have a
621         default text for when the group is unset; it must be reprocessed. */
622 
623         if (special != 0)
624           {
625           if (special == CHAR_MINUS)
626             {
627             if (rc == 0) goto LITERAL_SUBSTITUTE;
628             text2_start = text1_start;
629             text2_end = text1_end;
630             }
631 
632           if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
633           ptrstack[ptrstackptr++] = ptr;
634           ptrstack[ptrstackptr++] = repend;
635 
636           if (rc == 0)
637             {
638             ptr = text1_start;
639             repend = text1_end;
640             }
641           else
642             {
643             ptr = text2_start;
644             repend = text2_end;
645             }
646           continue;
647           }
648 
649         /* Otherwise we have a literal substitution of a group's contents. */
650 
651         LITERAL_SUBSTITUTE:
652         subptr = subject + ovector[group*2];
653         subptrend = subject + ovector[group*2 + 1];
654 
655         /* Substitute a literal string, possibly forcing alphabetic case. */
656 
657         while (subptr < subptrend)
658           {
659           GETCHARINCTEST(ch, subptr);
660           if (forcecase != 0)
661             {
662 #ifdef SUPPORT_UNICODE
663             if (utf)
664               {
665               uint32_t type = UCD_CHARTYPE(ch);
666               if (PRIV(ucp_gentype)[type] == ucp_L &&
667                   type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
668                 ch = UCD_OTHERCASE(ch);
669               }
670             else
671 #endif
672               {
673               if (((code->tables + cbits_offset +
674                   ((forcecase > 0)? cbit_upper:cbit_lower)
675                   )[ch/8] & (1 << (ch%8))) == 0)
676                 ch = (code->tables + fcc_offset)[ch];
677               }
678             forcecase = forcecasereset;
679             }
680 
681 #ifdef SUPPORT_UNICODE
682           if (utf) chlen = PRIV(ord2utf)(ch, temp); else
683 #endif
684             {
685             temp[0] = ch;
686             chlen = 1;
687             }
688           CHECKMEMCPY(temp, chlen);
689           }
690         }
691       }
692 
693     /* Handle an escape sequence in extended mode. We can use check_escape()
694     to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
695     the case-forcing escapes are not supported in pcre2_compile() so must be
696     recognized here. */
697 
698     else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
699               *ptr == CHAR_BACKSLASH)
700       {
701       int errorcode = 0;
702 
703       if (ptr < repend - 1) switch (ptr[1])
704         {
705         case CHAR_L:
706         forcecase = forcecasereset = -1;
707         ptr += 2;
708         continue;
709 
710         case CHAR_l:
711         forcecase = -1;
712         forcecasereset = 0;
713         ptr += 2;
714         continue;
715 
716         case CHAR_U:
717         forcecase = forcecasereset = 1;
718         ptr += 2;
719         continue;
720 
721         case CHAR_u:
722         forcecase = 1;
723         forcecasereset = 0;
724         ptr += 2;
725         continue;
726 
727         default:
728         break;
729         }
730 
731       rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
732         code->overall_options, FALSE, NULL);
733       if (errorcode != 0) goto BADESCAPE;
734       ptr++;
735 
736       switch(rc)
737         {
738         case ESC_E:
739         forcecase = forcecasereset = 0;
740         continue;
741 
742         case ESC_Q:
743         literal = TRUE;
744         continue;
745 
746         case 0:      /* Data character */
747         goto LITERAL;
748 
749         default:
750         goto BADESCAPE;
751         }
752       }
753 
754     /* Handle a literal code unit */
755 
756     else
757       {
758       LOADLITERAL:
759       GETCHARINCTEST(ch, ptr);    /* Get character value, increment pointer */
760 
761       LITERAL:
762       if (forcecase != 0)
763         {
764 #ifdef SUPPORT_UNICODE
765         if (utf)
766           {
767           uint32_t type = UCD_CHARTYPE(ch);
768           if (PRIV(ucp_gentype)[type] == ucp_L &&
769               type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
770             ch = UCD_OTHERCASE(ch);
771           }
772         else
773 #endif
774           {
775           if (((code->tables + cbits_offset +
776               ((forcecase > 0)? cbit_upper:cbit_lower)
777               )[ch/8] & (1 << (ch%8))) == 0)
778             ch = (code->tables + fcc_offset)[ch];
779           }
780         forcecase = forcecasereset;
781         }
782 
783 #ifdef SUPPORT_UNICODE
784       if (utf) chlen = PRIV(ord2utf)(ch, temp); else
785 #endif
786         {
787         temp[0] = ch;
788         chlen = 1;
789         }
790       CHECKMEMCPY(temp, chlen);
791       } /* End handling a literal code unit */
792     }   /* End of loop for scanning the replacement. */
793 
794   /* The replacement has been copied to the output. Update the start offset to
795   point to the rest of the subject string. If we matched an empty string,
796   do the magic for global matches. */
797 
798   start_offset = ovector[1];
799   goptions = (ovector[0] != ovector[1])? 0 :
800     PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
801   } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */
802 
803 /* Copy the rest of the subject. */
804 
805 fraglength = length - start_offset;
806 CHECKMEMCPY(subject + start_offset, fraglength);
807 temp[0] = 0;
808 CHECKMEMCPY(temp , 1);
809 
810 /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
811 and matching has carried on after a full buffer, in order to compute the length
812 needed. Otherwise, an overflow generates an immediate error return. */
813 
814 if (overflowed)
815   {
816   rc = PCRE2_ERROR_NOMEMORY;
817   *blength = buff_length + extra_needed;
818   }
819 
820 /* After a successful execution, return the number of substitutions and set the
821 length of buffer used, excluding the trailing zero. */
822 
823 else
824   {
825   rc = subs;
826   *blength = buff_offset - 1;
827   }
828 
829 EXIT:
830 if (match_data_created) pcre2_match_data_free(match_data);
831   else match_data->rc = rc;
832 return rc;
833 
834 NOROOM:
835 rc = PCRE2_ERROR_NOMEMORY;
836 goto EXIT;
837 
838 BAD:
839 rc = PCRE2_ERROR_BADREPLACEMENT;
840 goto PTREXIT;
841 
842 BADESCAPE:
843 rc = PCRE2_ERROR_BADREPESCAPE;
844 
845 PTREXIT:
846 *blength = (PCRE2_SIZE)(ptr - replacement);
847 goto EXIT;
848 }
849 
850 /* End of pcre2_substitute.c */
851