1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45
46 #include "pcre2_internal.h"
47
48 #define PTR_STACK_SIZE 20
49
50 #define SUBSTITUTE_OPTIONS \
51 (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
52 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
53 PCRE2_SUBSTITUTE_UNSET_EMPTY)
54
55
56
57 /*************************************************
58 * Find end of substitute text *
59 *************************************************/
60
61 /* In extended mode, we recognize ${name:+set text:unset text} and similar
62 constructions. This requires the identification of unescaped : and }
63 characters. This function scans for such. It must deal with nested ${
64 constructions. The pointer to the text is updated, either to the required end
65 character, or to where an error was detected.
66
67 Arguments:
68 code points to the compiled expression (for options)
69 ptrptr points to the pointer to the start of the text (updated)
70 ptrend end of the whole string
71 last TRUE if the last expected string (only } recognized)
72
73 Returns: 0 on success
74 negative error code on failure
75 */
76
77 static int
find_text_end(const pcre2_code * code,PCRE2_SPTR * ptrptr,PCRE2_SPTR ptrend,BOOL last)78 find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
79 BOOL last)
80 {
81 int rc = 0;
82 uint32_t nestlevel = 0;
83 BOOL literal = FALSE;
84 PCRE2_SPTR ptr = *ptrptr;
85
86 for (; ptr < ptrend; ptr++)
87 {
88 if (literal)
89 {
90 if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
91 {
92 literal = FALSE;
93 ptr += 1;
94 }
95 }
96
97 else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
98 {
99 if (nestlevel == 0) goto EXIT;
100 nestlevel--;
101 }
102
103 else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
104
105 else if (*ptr == CHAR_DOLLAR_SIGN)
106 {
107 if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
108 {
109 nestlevel++;
110 ptr += 1;
111 }
112 }
113
114 else if (*ptr == CHAR_BACKSLASH)
115 {
116 int erc;
117 int errorcode = 0;
118 uint32_t ch;
119
120 if (ptr < ptrend - 1) switch (ptr[1])
121 {
122 case CHAR_L:
123 case CHAR_l:
124 case CHAR_U:
125 case CHAR_u:
126 ptr += 1;
127 continue;
128 }
129
130 erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
131 code->overall_options, FALSE, NULL);
132 if (errorcode != 0)
133 {
134 rc = errorcode;
135 goto EXIT;
136 }
137
138 switch(erc)
139 {
140 case 0: /* Data character */
141 case ESC_E: /* Isolated \E is ignored */
142 break;
143
144 case ESC_Q:
145 literal = TRUE;
146 break;
147
148 default:
149 rc = PCRE2_ERROR_BADREPESCAPE;
150 goto EXIT;
151 }
152 }
153 }
154
155 rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */
156
157 EXIT:
158 *ptrptr = ptr;
159 return rc;
160 }
161
162
163
164 /*************************************************
165 * Match and substitute *
166 *************************************************/
167
168 /* This function applies a compiled re to a subject string and creates a new
169 string with substitutions. The first 7 arguments are the same as for
170 pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
171
172 Arguments:
173 code points to the compiled expression
174 subject points to the subject string
175 length length of subject string (may contain binary zeros)
176 start_offset where to start in the subject string
177 options option bits
178 match_data points to a match_data block, or is NULL
179 context points a PCRE2 context
180 replacement points to the replacement string
181 rlength length of replacement string
182 buffer where to put the substituted string
183 blength points to length of buffer; updated to length of string
184
185 Returns: >= 0 number of substitutions made
186 < 0 an error code
187 PCRE2_ERROR_BADREPLACEMENT means invalid use of $
188 */
189
190 /* This macro checks for space in the buffer before copying into it. On
191 overflow, either give an error immediately, or keep on, accumulating the
192 length. */
193
194 #define CHECKMEMCPY(from,length) \
195 if (!overflowed && lengthleft < length) \
196 { \
197 if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
198 overflowed = TRUE; \
199 extra_needed = length - lengthleft; \
200 } \
201 else if (overflowed) \
202 { \
203 extra_needed += length; \
204 } \
205 else \
206 { \
207 memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
208 buff_offset += length; \
209 lengthleft -= length; \
210 }
211
212 /* Here's the function */
213
214 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substitute(const pcre2_code * code,PCRE2_SPTR subject,PCRE2_SIZE length,PCRE2_SIZE start_offset,uint32_t options,pcre2_match_data * match_data,pcre2_match_context * mcontext,PCRE2_SPTR replacement,PCRE2_SIZE rlength,PCRE2_UCHAR * buffer,PCRE2_SIZE * blength)215 pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
216 PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
217 pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
218 PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
219 {
220 int rc;
221 int subs;
222 int forcecase = 0;
223 int forcecasereset = 0;
224 uint32_t ovector_count;
225 uint32_t goptions = 0;
226 uint32_t suboptions;
227 BOOL match_data_created = FALSE;
228 BOOL literal = FALSE;
229 BOOL overflowed = FALSE;
230 #ifdef SUPPORT_UNICODE
231 BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
232 #endif
233 PCRE2_UCHAR temp[6];
234 PCRE2_SPTR ptr;
235 PCRE2_SPTR repend;
236 PCRE2_SIZE extra_needed = 0;
237 PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
238 PCRE2_SIZE *ovector;
239
240 buff_offset = 0;
241 lengthleft = buff_length = *blength;
242 *blength = PCRE2_UNSET;
243
244 /* Partial matching is not valid. */
245
246 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
247 return PCRE2_ERROR_BADOPTION;
248
249 /* If no match data block is provided, create one. */
250
251 if (match_data == NULL)
252 {
253 pcre2_general_context *gcontext = (mcontext == NULL)?
254 (pcre2_general_context *)code :
255 (pcre2_general_context *)mcontext;
256 match_data = pcre2_match_data_create_from_pattern(code, gcontext);
257 if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
258 match_data_created = TRUE;
259 }
260 ovector = pcre2_get_ovector_pointer(match_data);
261 ovector_count = pcre2_get_ovector_count(match_data);
262
263 /* Find lengths of zero-terminated strings and the end of the replacement. */
264
265 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
266 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
267 repend = replacement + rlength;
268
269 /* Check UTF replacement string if necessary. */
270
271 #ifdef SUPPORT_UNICODE
272 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
273 {
274 rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
275 if (rc != 0)
276 {
277 match_data->leftchar = 0;
278 goto EXIT;
279 }
280 }
281 #endif /* SUPPORT_UNICODE */
282
283 /* Save the substitute options and remove them from the match options. */
284
285 suboptions = options & SUBSTITUTE_OPTIONS;
286 options &= ~SUBSTITUTE_OPTIONS;
287
288 /* Copy up to the start offset */
289
290 CHECKMEMCPY(subject, start_offset);
291
292 /* Loop for global substituting. */
293
294 subs = 0;
295 do
296 {
297 PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
298 uint32_t ptrstackptr = 0;
299
300 rc = pcre2_match(code, subject, length, start_offset, options|goptions,
301 match_data, mcontext);
302
303 #ifdef SUPPORT_UNICODE
304 if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
305 #endif
306
307 /* Any error other than no match returns the error code. No match when not
308 doing the special after-empty-match global rematch, or when at the end of the
309 subject, breaks the global loop. Otherwise, advance the starting point by one
310 character, copying it to the output, and try again. */
311
312 if (rc < 0)
313 {
314 PCRE2_SIZE save_start;
315
316 if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
317 if (goptions == 0 || start_offset >= length) break;
318
319 /* Advance by one code point. Then, if CRLF is a valid newline sequence and
320 we have advanced into the middle of it, advance one more code point. In
321 other words, do not start in the middle of CRLF, even if CR and LF on their
322 own are valid newlines. */
323
324 save_start = start_offset++;
325 if (subject[start_offset-1] == CHAR_CR &&
326 code->newline_convention != PCRE2_NEWLINE_CR &&
327 code->newline_convention != PCRE2_NEWLINE_LF &&
328 start_offset < length &&
329 subject[start_offset] == CHAR_LF)
330 start_offset++;
331
332 /* Otherwise, in UTF mode, advance past any secondary code points. */
333
334 else if ((code->overall_options & PCRE2_UTF) != 0)
335 {
336 #if PCRE2_CODE_UNIT_WIDTH == 8
337 while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
338 start_offset++;
339 #elif PCRE2_CODE_UNIT_WIDTH == 16
340 while (start_offset < length &&
341 (subject[start_offset] & 0xfc00) == 0xdc00)
342 start_offset++;
343 #endif
344 }
345
346 /* Copy what we have advanced past, reset the special global options, and
347 continue to the next match. */
348
349 fraglength = start_offset - save_start;
350 CHECKMEMCPY(subject + save_start, fraglength);
351 goptions = 0;
352 continue;
353 }
354
355 /* Handle a successful match. Matches that use \K to end before they start
356 are not supported. */
357
358 if (ovector[1] < ovector[0])
359 {
360 rc = PCRE2_ERROR_BADSUBSPATTERN;
361 goto EXIT;
362 }
363
364 /* Count substitutions with a paranoid check for integer overflow; surely no
365 real call to this function would ever hit this! */
366
367 if (subs == INT_MAX)
368 {
369 rc = PCRE2_ERROR_TOOMANYREPLACE;
370 goto EXIT;
371 }
372 subs++;
373
374 /* Copy the text leading up to the match. */
375
376 if (rc == 0) rc = ovector_count;
377 fraglength = ovector[0] - start_offset;
378 CHECKMEMCPY(subject + start_offset, fraglength);
379
380 /* Process the replacement string. Literal mode is set by \Q, but only in
381 extended mode when backslashes are being interpreted. In extended mode we
382 must handle nested substrings that are to be reprocessed. */
383
384 ptr = replacement;
385 for (;;)
386 {
387 uint32_t ch;
388 unsigned int chlen;
389
390 /* If at the end of a nested substring, pop the stack. */
391
392 if (ptr >= repend)
393 {
394 if (ptrstackptr <= 0) break; /* End of replacement string */
395 repend = ptrstack[--ptrstackptr];
396 ptr = ptrstack[--ptrstackptr];
397 continue;
398 }
399
400 /* Handle the next character */
401
402 if (literal)
403 {
404 if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
405 {
406 literal = FALSE;
407 ptr += 2;
408 continue;
409 }
410 goto LOADLITERAL;
411 }
412
413 /* Not in literal mode. */
414
415 if (*ptr == CHAR_DOLLAR_SIGN)
416 {
417 int group, n;
418 uint32_t special = 0;
419 BOOL inparens;
420 BOOL star;
421 PCRE2_SIZE sublength;
422 PCRE2_SPTR text1_start = NULL;
423 PCRE2_SPTR text1_end = NULL;
424 PCRE2_SPTR text2_start = NULL;
425 PCRE2_SPTR text2_end = NULL;
426 PCRE2_UCHAR next;
427 PCRE2_UCHAR name[33];
428
429 if (++ptr >= repend) goto BAD;
430 if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
431
432 group = -1;
433 n = 0;
434 inparens = FALSE;
435 star = FALSE;
436
437 if (next == CHAR_LEFT_CURLY_BRACKET)
438 {
439 if (++ptr >= repend) goto BAD;
440 next = *ptr;
441 inparens = TRUE;
442 }
443
444 if (next == CHAR_ASTERISK)
445 {
446 if (++ptr >= repend) goto BAD;
447 next = *ptr;
448 star = TRUE;
449 }
450
451 if (!star && next >= CHAR_0 && next <= CHAR_9)
452 {
453 group = next - CHAR_0;
454 while (++ptr < repend)
455 {
456 next = *ptr;
457 if (next < CHAR_0 || next > CHAR_9) break;
458 group = group * 10 + next - CHAR_0;
459
460 /* A check for a number greater than the hightest captured group
461 is sufficient here; no need for a separate overflow check. If unknown
462 groups are to be treated as unset, just skip over any remaining
463 digits and carry on. */
464
465 if (group > code->top_bracket)
466 {
467 if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
468 {
469 while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
470 break;
471 }
472 else
473 {
474 rc = PCRE2_ERROR_NOSUBSTRING;
475 goto PTREXIT;
476 }
477 }
478 }
479 }
480 else
481 {
482 const uint8_t *ctypes = code->tables + ctypes_offset;
483 while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
484 {
485 name[n++] = next;
486 if (n > 32) goto BAD;
487 if (++ptr >= repend) break;
488 next = *ptr;
489 }
490 if (n == 0) goto BAD;
491 name[n] = 0;
492 }
493
494 /* In extended mode we recognize ${name:+set text:unset text} and
495 ${name:-default text}. */
496
497 if (inparens)
498 {
499 if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
500 !star && ptr < repend - 2 && next == CHAR_COLON)
501 {
502 special = *(++ptr);
503 if (special != CHAR_PLUS && special != CHAR_MINUS)
504 {
505 rc = PCRE2_ERROR_BADSUBSTITUTION;
506 goto PTREXIT;
507 }
508
509 text1_start = ++ptr;
510 rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
511 if (rc != 0) goto PTREXIT;
512 text1_end = ptr;
513
514 if (special == CHAR_PLUS && *ptr == CHAR_COLON)
515 {
516 text2_start = ++ptr;
517 rc = find_text_end(code, &ptr, repend, TRUE);
518 if (rc != 0) goto PTREXIT;
519 text2_end = ptr;
520 }
521 }
522
523 else
524 {
525 if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
526 {
527 rc = PCRE2_ERROR_REPMISSINGBRACE;
528 goto PTREXIT;
529 }
530 }
531
532 ptr++;
533 }
534
535 /* Have found a syntactically correct group number or name, or *name.
536 Only *MARK is currently recognized. */
537
538 if (star)
539 {
540 if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
541 {
542 PCRE2_SPTR mark = pcre2_get_mark(match_data);
543 if (mark != NULL)
544 {
545 PCRE2_SPTR mark_start = mark;
546 while (*mark != 0) mark++;
547 fraglength = mark - mark_start;
548 CHECKMEMCPY(mark_start, fraglength);
549 }
550 }
551 else goto BAD;
552 }
553
554 /* Substitute the contents of a group. We don't use substring_copy
555 functions any more, in order to support case forcing. */
556
557 else
558 {
559 PCRE2_SPTR subptr, subptrend;
560
561 /* Find a number for a named group. In case there are duplicate names,
562 search for the first one that is set. If the name is not found when
563 PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
564 non-existent group. */
565
566 if (group < 0)
567 {
568 PCRE2_SPTR first, last, entry;
569 rc = pcre2_substring_nametable_scan(code, name, &first, &last);
570 if (rc == PCRE2_ERROR_NOSUBSTRING &&
571 (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
572 {
573 group = code->top_bracket + 1;
574 }
575 else
576 {
577 if (rc < 0) goto PTREXIT;
578 for (entry = first; entry <= last; entry += rc)
579 {
580 uint32_t ng = GET2(entry, 0);
581 if (ng < ovector_count)
582 {
583 if (group < 0) group = ng; /* First in ovector */
584 if (ovector[ng*2] != PCRE2_UNSET)
585 {
586 group = ng; /* First that is set */
587 break;
588 }
589 }
590 }
591
592 /* If group is still negative, it means we did not find a group
593 that is in the ovector. Just set the first group. */
594
595 if (group < 0) group = GET2(first, 0);
596 }
597 }
598
599 /* We now have a group that is identified by number. Find the length of
600 the captured string. If a group in a non-special substitution is unset
601 when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
602
603 rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
604 if (rc < 0)
605 {
606 if (rc == PCRE2_ERROR_NOSUBSTRING &&
607 (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
608 {
609 rc = PCRE2_ERROR_UNSET;
610 }
611 if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */
612 if (special == 0) /* Plain substitution */
613 {
614 if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
615 goto PTREXIT; /* Else error */
616 }
617 }
618
619 /* If special is '+' we have a 'set' and possibly an 'unset' text,
620 both of which are reprocessed when used. If special is '-' we have a
621 default text for when the group is unset; it must be reprocessed. */
622
623 if (special != 0)
624 {
625 if (special == CHAR_MINUS)
626 {
627 if (rc == 0) goto LITERAL_SUBSTITUTE;
628 text2_start = text1_start;
629 text2_end = text1_end;
630 }
631
632 if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
633 ptrstack[ptrstackptr++] = ptr;
634 ptrstack[ptrstackptr++] = repend;
635
636 if (rc == 0)
637 {
638 ptr = text1_start;
639 repend = text1_end;
640 }
641 else
642 {
643 ptr = text2_start;
644 repend = text2_end;
645 }
646 continue;
647 }
648
649 /* Otherwise we have a literal substitution of a group's contents. */
650
651 LITERAL_SUBSTITUTE:
652 subptr = subject + ovector[group*2];
653 subptrend = subject + ovector[group*2 + 1];
654
655 /* Substitute a literal string, possibly forcing alphabetic case. */
656
657 while (subptr < subptrend)
658 {
659 GETCHARINCTEST(ch, subptr);
660 if (forcecase != 0)
661 {
662 #ifdef SUPPORT_UNICODE
663 if (utf)
664 {
665 uint32_t type = UCD_CHARTYPE(ch);
666 if (PRIV(ucp_gentype)[type] == ucp_L &&
667 type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
668 ch = UCD_OTHERCASE(ch);
669 }
670 else
671 #endif
672 {
673 if (((code->tables + cbits_offset +
674 ((forcecase > 0)? cbit_upper:cbit_lower)
675 )[ch/8] & (1 << (ch%8))) == 0)
676 ch = (code->tables + fcc_offset)[ch];
677 }
678 forcecase = forcecasereset;
679 }
680
681 #ifdef SUPPORT_UNICODE
682 if (utf) chlen = PRIV(ord2utf)(ch, temp); else
683 #endif
684 {
685 temp[0] = ch;
686 chlen = 1;
687 }
688 CHECKMEMCPY(temp, chlen);
689 }
690 }
691 }
692
693 /* Handle an escape sequence in extended mode. We can use check_escape()
694 to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
695 the case-forcing escapes are not supported in pcre2_compile() so must be
696 recognized here. */
697
698 else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
699 *ptr == CHAR_BACKSLASH)
700 {
701 int errorcode = 0;
702
703 if (ptr < repend - 1) switch (ptr[1])
704 {
705 case CHAR_L:
706 forcecase = forcecasereset = -1;
707 ptr += 2;
708 continue;
709
710 case CHAR_l:
711 forcecase = -1;
712 forcecasereset = 0;
713 ptr += 2;
714 continue;
715
716 case CHAR_U:
717 forcecase = forcecasereset = 1;
718 ptr += 2;
719 continue;
720
721 case CHAR_u:
722 forcecase = 1;
723 forcecasereset = 0;
724 ptr += 2;
725 continue;
726
727 default:
728 break;
729 }
730
731 rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
732 code->overall_options, FALSE, NULL);
733 if (errorcode != 0) goto BADESCAPE;
734 ptr++;
735
736 switch(rc)
737 {
738 case ESC_E:
739 forcecase = forcecasereset = 0;
740 continue;
741
742 case ESC_Q:
743 literal = TRUE;
744 continue;
745
746 case 0: /* Data character */
747 goto LITERAL;
748
749 default:
750 goto BADESCAPE;
751 }
752 }
753
754 /* Handle a literal code unit */
755
756 else
757 {
758 LOADLITERAL:
759 GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */
760
761 LITERAL:
762 if (forcecase != 0)
763 {
764 #ifdef SUPPORT_UNICODE
765 if (utf)
766 {
767 uint32_t type = UCD_CHARTYPE(ch);
768 if (PRIV(ucp_gentype)[type] == ucp_L &&
769 type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
770 ch = UCD_OTHERCASE(ch);
771 }
772 else
773 #endif
774 {
775 if (((code->tables + cbits_offset +
776 ((forcecase > 0)? cbit_upper:cbit_lower)
777 )[ch/8] & (1 << (ch%8))) == 0)
778 ch = (code->tables + fcc_offset)[ch];
779 }
780 forcecase = forcecasereset;
781 }
782
783 #ifdef SUPPORT_UNICODE
784 if (utf) chlen = PRIV(ord2utf)(ch, temp); else
785 #endif
786 {
787 temp[0] = ch;
788 chlen = 1;
789 }
790 CHECKMEMCPY(temp, chlen);
791 } /* End handling a literal code unit */
792 } /* End of loop for scanning the replacement. */
793
794 /* The replacement has been copied to the output. Update the start offset to
795 point to the rest of the subject string. If we matched an empty string,
796 do the magic for global matches. */
797
798 start_offset = ovector[1];
799 goptions = (ovector[0] != ovector[1])? 0 :
800 PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
801 } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
802
803 /* Copy the rest of the subject. */
804
805 fraglength = length - start_offset;
806 CHECKMEMCPY(subject + start_offset, fraglength);
807 temp[0] = 0;
808 CHECKMEMCPY(temp , 1);
809
810 /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
811 and matching has carried on after a full buffer, in order to compute the length
812 needed. Otherwise, an overflow generates an immediate error return. */
813
814 if (overflowed)
815 {
816 rc = PCRE2_ERROR_NOMEMORY;
817 *blength = buff_length + extra_needed;
818 }
819
820 /* After a successful execution, return the number of substitutions and set the
821 length of buffer used, excluding the trailing zero. */
822
823 else
824 {
825 rc = subs;
826 *blength = buff_offset - 1;
827 }
828
829 EXIT:
830 if (match_data_created) pcre2_match_data_free(match_data);
831 else match_data->rc = rc;
832 return rc;
833
834 NOROOM:
835 rc = PCRE2_ERROR_NOMEMORY;
836 goto EXIT;
837
838 BAD:
839 rc = PCRE2_ERROR_BADREPLACEMENT;
840 goto PTREXIT;
841
842 BADESCAPE:
843 rc = PCRE2_ERROR_BADREPESCAPE;
844
845 PTREXIT:
846 *blength = (PCRE2_SIZE)(ptr - replacement);
847 goto EXIT;
848 }
849
850 /* End of pcre2_substitute.c */
851