1 /*************************************************
2 *             PCRE2 testing program              *
3 *************************************************/
4 
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11 
12                        Written by Philip Hazel
13      Original code Copyright (c) 1997-2012 University of Cambridge
14          Rewritten code Copyright (c) 2016 University of Cambridge
15 
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19 
20     * Redistributions of source code must retain the above copyright notice,
21       this list of conditions and the following disclaimer.
22 
23     * Redistributions in binary form must reproduce the above copyright
24       notice, this list of conditions and the following disclaimer in the
25       documentation and/or other materials provided with the distribution.
26 
27     * Neither the name of the University of Cambridge nor the names of its
28       contributors may be used to endorse or promote products derived from
29       this software without specific prior written permission.
30 
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44 
45 
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56 
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60 
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68 
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76 
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 
81 /* Both libreadline and libedit are optionally supported. The user-supplied
82 original patch uses readline/readline.h for libedit, but in at least one system
83 it is installed as editline/readline.h, so the configuration code now looks for
84 that first, falling back to readline/readline.h. */
85 
86 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
87 #if defined(SUPPORT_LIBREADLINE)
88 #include <readline/readline.h>
89 #include <readline/history.h>
90 #else
91 #if defined(HAVE_EDITLINE_READLINE_H)
92 #include <editline/readline.h>
93 #else
94 #include <readline/readline.h>
95 #endif
96 #endif
97 #endif
98 
99 /* Put the test for interactive input into a macro so that it can be changed if
100 required for different environments. */
101 
102 #define INTERACTIVE(f) isatty(fileno(f))
103 
104 
105 /* ---------------------- System-specific definitions ---------------------- */
106 
107 /* A number of things vary for Windows builds. Originally, pcretest opened its
108 input and output without "b"; then I was told that "b" was needed in some
109 environments, so it was added for release 5.0 to both the input and output. (It
110 makes no difference on Unix-like systems.) Later I was told that it is wrong
111 for the input on Windows. I've now abstracted the modes into macros that are
112 set here, to make it easier to fiddle with them, and removed "b" from the input
113 mode under Windows. The BINARY versions are used when saving/restoring compiled
114 patterns. */
115 
116 #if defined(_WIN32) || defined(WIN32)
117 #include <io.h>                /* For _setmode() */
118 #include <fcntl.h>             /* For _O_BINARY */
119 #define INPUT_MODE          "r"
120 #define OUTPUT_MODE         "wb"
121 #define BINARY_INPUT_MODE   "rb"
122 #define BINARY_OUTPUT_MODE  "wb"
123 
124 #ifndef isatty
125 #define isatty _isatty         /* This is what Windows calls them, I'm told, */
126 #endif                         /* though in some environments they seem to   */
127                                /* be already defined, hence the #ifndefs.    */
128 #ifndef fileno
129 #define fileno _fileno
130 #endif
131 
132 /* A user sent this fix for Borland Builder 5 under Windows. */
133 
134 #ifdef __BORLANDC__
135 #define _setmode(handle, mode) setmode(handle, mode)
136 #endif
137 
138 /* Not Windows */
139 
140 #else
141 #include <sys/time.h>          /* These two includes are needed */
142 #include <sys/resource.h>      /* for setrlimit(). */
143 #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
144 #define INPUT_MODE   "r"
145 #define OUTPUT_MODE  "w"
146 #define BINARY_INPUT_MODE   "rb"
147 #define BINARY_OUTPUT_MODE  "wb"
148 #else
149 #define INPUT_MODE          "rb"
150 #define OUTPUT_MODE         "wb"
151 #define BINARY_INPUT_MODE   "rb"
152 #define BINARY_OUTPUT_MODE  "wb"
153 #endif
154 #endif
155 
156 #ifdef __VMS
157 #include <ssdef.h>
158 void vms_setsymbol( char *, char *, int );
159 #endif
160 
161 /* ------------------End of system-specific definitions -------------------- */
162 
163 /* Glueing macros that are used in several places below. */
164 
165 #define glue(a,b) a##b
166 #define G(a,b) glue(a,b)
167 
168 /* Miscellaneous parameters and manifests */
169 
170 #ifndef CLOCKS_PER_SEC
171 #ifdef CLK_TCK
172 #define CLOCKS_PER_SEC CLK_TCK
173 #else
174 #define CLOCKS_PER_SEC 100
175 #endif
176 #endif
177 
178 #define CFAIL_UNSET UINT32_MAX  /* Unset value for cfail fields */
179 #define DFA_WS_DIMENSION 1000   /* Size of DFA workspace */
180 #define DEFAULT_OVECCOUNT 15    /* Default ovector count */
181 #define JUNK_OFFSET 0xdeadbeef  /* For initializing ovector */
182 #define LOCALESIZE 32           /* Size of locale name */
183 #define LOOPREPEAT 500000       /* Default loop count for timing */
184 #define PATSTACKSIZE 20         /* Pattern stack for save/restore testing */
185 #define REPLACE_MODSIZE 100     /* Field for reading 8-bit replacement */
186 #define VERSION_SIZE 64         /* Size of buffer for the version strings */
187 
188 /* Make sure the buffer into which replacement strings are copied is big enough
189 to hold them as 32-bit code units. */
190 
191 #define REPLACE_BUFFSIZE 1024   /* This is a byte value */
192 
193 /* Execution modes */
194 
195 #define PCRE8_MODE   8
196 #define PCRE16_MODE 16
197 #define PCRE32_MODE 32
198 
199 /* Processing returns */
200 
201 enum { PR_OK, PR_SKIP, PR_ABEND };
202 
203 /* The macro PRINTABLE determines whether to print an output character as-is or
204 as a hex value when showing compiled patterns. is We use it in cases when the
205 locale has not been explicitly changed, so as to get consistent output from
206 systems that differ in their output from isprint() even in the "C" locale. */
207 
208 #ifdef EBCDIC
209 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
210 #else
211 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
212 #endif
213 
214 #define PRINTOK(c) ((locale_tables != NULL)? isprint(c) : PRINTABLE(c))
215 
216 /* We have to include some of the library source files because we need
217 to use some of the macros, internal structure definitions, and other internal
218 values - pcre2test has "inside information" compared to an application program
219 that strictly follows the PCRE2 API.
220 
221 Before including pcre2_internal.h we define PRIV so that it does not get
222 defined therein. This ensures that PRIV names in the included files do not
223 clash with those in the libraries. Also, although pcre2_internal.h does itself
224 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
225 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
226 for building the library. */
227 
228 #define PRIV(name) name
229 #define PCRE2_CODE_UNIT_WIDTH 0
230 #include "pcre2.h"
231 #include "pcre2posix.h"
232 #include "pcre2_internal.h"
233 
234 /* We need access to some of the data tables that PCRE2 uses. Defining
235 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
236 of PRIV avoids name clashes. */
237 
238 #define PCRE2_PCRE2TEST
239 #include "pcre2_tables.c"
240 #include "pcre2_ucd.c"
241 
242 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
243 check needed for overflow depends on whether long ints are in fact longer than
244 ints. They are defined not to be shorter. */
245 
246 #if ULONG_MAX > UINT32_MAX
247 #define U32OVERFLOW(x) (x > UINT32_MAX)
248 #else
249 #define U32OVERFLOW(x) (x == UINT32_MAX)
250 #endif
251 
252 #if LONG_MAX > INT32_MAX
253 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
254 #else
255 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
256 #endif
257 
258 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
259 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
260 defined. We can now include it for each supported code unit width. Because
261 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
262 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
263 while including these files, and then restore it to a no-op. Because LINK_SIZE
264 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
265 these inclusions should not be changed. */
266 
267 #undef PCRE2_SUFFIX
268 #undef PCRE2_CODE_UNIT_WIDTH
269 
270 #ifdef   SUPPORT_PCRE2_8
271 #define  PCRE2_CODE_UNIT_WIDTH 8
272 #define  PCRE2_SUFFIX(a) G(a,8)
273 #include "pcre2_intmodedep.h"
274 #include "pcre2_printint.c"
275 #undef   PCRE2_CODE_UNIT_WIDTH
276 #undef   PCRE2_SUFFIX
277 #endif   /* SUPPORT_PCRE2_8 */
278 
279 #ifdef   SUPPORT_PCRE2_16
280 #define  PCRE2_CODE_UNIT_WIDTH 16
281 #define  PCRE2_SUFFIX(a) G(a,16)
282 #include "pcre2_intmodedep.h"
283 #include "pcre2_printint.c"
284 #undef   PCRE2_CODE_UNIT_WIDTH
285 #undef   PCRE2_SUFFIX
286 #endif   /* SUPPORT_PCRE2_16 */
287 
288 #ifdef   SUPPORT_PCRE2_32
289 #define  PCRE2_CODE_UNIT_WIDTH 32
290 #define  PCRE2_SUFFIX(a) G(a,32)
291 #include "pcre2_intmodedep.h"
292 #include "pcre2_printint.c"
293 #undef   PCRE2_CODE_UNIT_WIDTH
294 #undef   PCRE2_SUFFIX
295 #endif   /* SUPPORT_PCRE2_32 */
296 
297 #define PCRE2_SUFFIX(a) a
298 
299 /* We need to be able to check input text for UTF-8 validity, whatever code
300 widths are actually available, because the input to pcre2test is always in
301 8-bit code units. So we include the UTF validity checking function for 8-bit
302 code units. */
303 
304 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
305 
306 #define  PCRE2_CODE_UNIT_WIDTH 8
307 #undef   PCRE2_SPTR
308 #define  PCRE2_SPTR PCRE2_SPTR8
309 #include "pcre2_valid_utf.c"
310 #undef   PCRE2_CODE_UNIT_WIDTH
311 #undef   PCRE2_SPTR
312 
313 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
314 support, it can be selected by a command-line option. If there is no 8-bit
315 support, there must be 16- or 32-bit support, so default to one of them. The
316 config function, JIT stack, contexts, and version string are the same in all
317 modes, so use the form of the first that is available. */
318 
319 #if defined SUPPORT_PCRE2_8
320 #define DEFAULT_TEST_MODE PCRE8_MODE
321 #define VERSION_TYPE PCRE2_UCHAR8
322 #define PCRE2_CONFIG pcre2_config_8
323 #define PCRE2_JIT_STACK pcre2_jit_stack_8
324 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
325 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
326 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
327 #define VERSION_TYPE PCRE2_UCHAR8
328 
329 #elif defined SUPPORT_PCRE2_16
330 #define DEFAULT_TEST_MODE PCRE16_MODE
331 #define VERSION_TYPE PCRE2_UCHAR16
332 #define PCRE2_CONFIG pcre2_config_16
333 #define PCRE2_JIT_STACK pcre2_jit_stack_16
334 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
335 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
336 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
337 
338 #elif defined SUPPORT_PCRE2_32
339 #define DEFAULT_TEST_MODE PCRE32_MODE
340 #define VERSION_TYPE PCRE2_UCHAR32
341 #define PCRE2_CONFIG pcre2_config_32
342 #define PCRE2_JIT_STACK pcre2_jit_stack_32
343 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
344 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
345 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
346 #endif
347 
348 /* ------------- Structure and table for handling #-commands ------------- */
349 
350 typedef struct cmdstruct {
351   const char *name;
352   int  value;
353 } cmdstruct;
354 
355 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
356   CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
357 
358 static cmdstruct cmdlist[] = {
359   { "forbid_utf",      CMD_FORBID_UTF },
360   { "load",            CMD_LOAD },
361   { "newline_default", CMD_NEWLINE_DEFAULT },
362   { "pattern",         CMD_PATTERN },
363   { "perltest",        CMD_PERLTEST },
364   { "pop",             CMD_POP },
365   { "popcopy",         CMD_POPCOPY },
366   { "save",            CMD_SAVE },
367   { "subject",         CMD_SUBJECT }};
368 
369 #define cmdlistcount sizeof(cmdlist)/sizeof(cmdstruct)
370 
371 /* ------------- Structures and tables for handling modifiers -------------- */
372 
373 /* Table of names for newline types. Must be kept in step with the definitions
374 of PCRE2_NEWLINE_xx in pcre2.h. */
375 
376 static const char *newlines[] = {
377   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
378 
379 /* Modifier types and applicability */
380 
381 enum { MOD_CTC,    /* Applies to a compile context */
382        MOD_CTM,    /* Applies to a match context */
383        MOD_PAT,    /* Applies to a pattern */
384        MOD_PATP,   /* Ditto, OK for Perl test */
385        MOD_DAT,    /* Applies to a data line */
386        MOD_PD,     /* Applies to a pattern or a data line */
387        MOD_PDP,    /* As MOD_PD, OK for Perl test */
388        MOD_PND,    /* As MOD_PD, but not for a default pattern */
389        MOD_PNDP,   /* As MOD_PND, OK for Perl test */
390        MOD_CTL,    /* Is a control bit */
391        MOD_BSR,    /* Is a BSR value */
392        MOD_IN2,    /* Is one or two unsigned integers */
393        MOD_INS,    /* Is a signed integer */
394        MOD_INT,    /* Is an unsigned integer */
395        MOD_IND,    /* Is an unsigned integer, but no value => default */
396        MOD_NL,     /* Is a newline value */
397        MOD_NN,     /* Is a number or a name; more than one may occur */
398        MOD_OPT,    /* Is an option bit */
399        MOD_SIZ,    /* Is a PCRE2_SIZE value */
400        MOD_STR };  /* Is a string */
401 
402 /* Control bits. Some apply to compiling, some to matching, but some can be set
403 either on a pattern or a data line, so they must all be distinct. There are now
404 so many of them that they are split into two fields. */
405 
406 #define CTL_AFTERTEXT                    0x00000001u
407 #define CTL_ALLAFTERTEXT                 0x00000002u
408 #define CTL_ALLCAPTURES                  0x00000004u
409 #define CTL_ALLUSEDTEXT                  0x00000008u
410 #define CTL_ALTGLOBAL                    0x00000010u
411 #define CTL_BINCODE                      0x00000020u
412 #define CTL_CALLOUT_CAPTURE              0x00000040u
413 #define CTL_CALLOUT_INFO                 0x00000080u
414 #define CTL_CALLOUT_NONE                 0x00000100u
415 #define CTL_DFA                          0x00000200u
416 #define CTL_EXPAND                       0x00000400u
417 #define CTL_FINDLIMITS                   0x00000800u
418 #define CTL_FULLBINCODE                  0x00001000u
419 #define CTL_GETALL                       0x00002000u
420 #define CTL_GLOBAL                       0x00004000u
421 #define CTL_HEXPAT                       0x00008000u
422 #define CTL_INFO                         0x00010000u
423 #define CTL_JITFAST                      0x00020000u
424 #define CTL_JITVERIFY                    0x00040000u
425 #define CTL_MARK                         0x00080000u
426 #define CTL_MEMORY                       0x00100000u
427 #define CTL_NULLCONTEXT                  0x00200000u
428 #define CTL_POSIX                        0x00400000u
429 #define CTL_POSIX_NOSUB                  0x00800000u
430 #define CTL_PUSH                         0x01000000u
431 #define CTL_PUSHCOPY                     0x02000000u
432 #define CTL_STARTCHAR                    0x04000000u
433 #define CTL_ZERO_TERMINATE               0x08000000u
434 /* Spare                                 0x10000000u  */
435 /* Spare                                 0x20000000u  */
436 #define CTL_NL_SET                       0x40000000u  /* Informational */
437 #define CTL_BSR_SET                      0x80000000u  /* Informational */
438 
439 /* Second control word */
440 
441 #define CTL2_SUBSTITUTE_EXTENDED         0x00000001u
442 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH  0x00000002u
443 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET    0x00000004u
444 #define CTL2_SUBSTITUTE_UNSET_EMPTY      0x00000008u
445 
446 /* Combinations */
447 
448 #define CTL_DEBUG            (CTL_FULLBINCODE|CTL_INFO)  /* For setting */
449 #define CTL_ANYINFO          (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
450 #define CTL_ANYGLOB          (CTL_ALTGLOBAL|CTL_GLOBAL)
451 
452 /* These are all the controls that may be set either on a pattern or on a
453 data line. */
454 
455 #define CTL_ALLPD  (CTL_AFTERTEXT|\
456                     CTL_ALLAFTERTEXT|\
457                     CTL_ALLCAPTURES|\
458                     CTL_ALLUSEDTEXT|\
459                     CTL_ALTGLOBAL|\
460                     CTL_GLOBAL|\
461                     CTL_MARK|\
462                     CTL_MEMORY|\
463                     CTL_STARTCHAR)
464 
465 #define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\
466                     CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
467                     CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
468                     CTL2_SUBSTITUTE_UNSET_EMPTY)
469 
470 /* Structures for holding modifier information for patterns and subject strings
471 (data). Fields containing modifiers that can be set either for a pattern or a
472 subject must be at the start and in the same order in both cases so that the
473 same offset in the big table below works for both. */
474 
475 typedef struct patctl {    /* Structure for pattern modifiers. */
476   uint32_t  options;       /* Must be in same position as datctl */
477   uint32_t  control;       /* Must be in same position as datctl */
478   uint32_t  control2;      /* Must be in same position as datctl */
479    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
480   uint32_t  jit;
481   uint32_t  stackguard_test;
482   uint32_t  tables_id;
483   uint32_t  regerror_buffsize;
484    uint8_t  locale[LOCALESIZE];
485 } patctl;
486 
487 #define MAXCPYGET 10
488 #define LENCPYGET 64
489 
490 typedef struct datctl {    /* Structure for data line modifiers. */
491   uint32_t  options;       /* Must be in same position as patctl */
492   uint32_t  control;       /* Must be in same position as patctl */
493   uint32_t  control2;      /* Must be in same position as patctl */
494    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
495   uint32_t  cfail[2];
496    int32_t  callout_data;
497    int32_t  copy_numbers[MAXCPYGET];
498    int32_t  get_numbers[MAXCPYGET];
499   uint32_t  jitstack;
500   uint32_t  oveccount;
501   uint32_t  offset;
502   uint8_t   copy_names[LENCPYGET];
503   uint8_t   get_names[LENCPYGET];
504 } datctl;
505 
506 /* Ids for which context to modify. */
507 
508 enum { CTX_PAT,            /* Active pattern context */
509        CTX_POPPAT,         /* Ditto, for a popped pattern */
510        CTX_DEFPAT,         /* Default pattern context */
511        CTX_DAT,            /* Active data (match) context */
512        CTX_DEFDAT };       /* Default data (match) context */
513 
514 /* Macros to simplify the big table below. */
515 
516 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
517 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
518 #define PO(name) offsetof(patctl, name)
519 #define PD(name) PO(name)
520 #define DO(name) offsetof(datctl, name)
521 
522 /* Table of all long-form modifiers. Must be in collating sequence of modifier
523 name because it is searched by binary chop. */
524 
525 typedef struct modstruct {
526   const char   *name;
527   uint16_t      which;
528   uint16_t      type;
529   uint32_t      value;
530   PCRE2_SIZE    offset;
531 } modstruct;
532 
533 static modstruct modlist[] = {
534   { "aftertext",                  MOD_PNDP, MOD_CTL, CTL_AFTERTEXT,              PO(control) },
535   { "allaftertext",               MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT,           PO(control) },
536   { "allcaptures",                MOD_PND,  MOD_CTL, CTL_ALLCAPTURES,            PO(control) },
537   { "allow_empty_class",          MOD_PAT,  MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS,    PO(options) },
538   { "allusedtext",                MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT,            PO(control) },
539   { "alt_bsux",                   MOD_PAT,  MOD_OPT, PCRE2_ALT_BSUX,             PO(options) },
540   { "alt_circumflex",             MOD_PAT,  MOD_OPT, PCRE2_ALT_CIRCUMFLEX,       PO(options) },
541   { "alt_verbnames",              MOD_PAT,  MOD_OPT, PCRE2_ALT_VERBNAMES,        PO(options) },
542   { "altglobal",                  MOD_PND,  MOD_CTL, CTL_ALTGLOBAL,              PO(control) },
543   { "anchored",                   MOD_PD,   MOD_OPT, PCRE2_ANCHORED,             PD(options) },
544   { "auto_callout",               MOD_PAT,  MOD_OPT, PCRE2_AUTO_CALLOUT,         PO(options) },
545   { "bincode",                    MOD_PAT,  MOD_CTL, CTL_BINCODE,                PO(control) },
546   { "bsr",                        MOD_CTC,  MOD_BSR, 0,                          CO(bsr_convention) },
547   { "callout_capture",            MOD_DAT,  MOD_CTL, CTL_CALLOUT_CAPTURE,        DO(control) },
548   { "callout_data",               MOD_DAT,  MOD_INS, 0,                          DO(callout_data) },
549   { "callout_fail",               MOD_DAT,  MOD_IN2, 0,                          DO(cfail) },
550   { "callout_info",               MOD_PAT,  MOD_CTL, CTL_CALLOUT_INFO,           PO(control) },
551   { "callout_none",               MOD_DAT,  MOD_CTL, CTL_CALLOUT_NONE,           DO(control) },
552   { "caseless",                   MOD_PATP, MOD_OPT, PCRE2_CASELESS,             PO(options) },
553   { "copy",                       MOD_DAT,  MOD_NN,  DO(copy_numbers),           DO(copy_names) },
554   { "debug",                      MOD_PAT,  MOD_CTL, CTL_DEBUG,                  PO(control) },
555   { "dfa",                        MOD_DAT,  MOD_CTL, CTL_DFA,                    DO(control) },
556   { "dfa_restart",                MOD_DAT,  MOD_OPT, PCRE2_DFA_RESTART,          DO(options) },
557   { "dfa_shortest",               MOD_DAT,  MOD_OPT, PCRE2_DFA_SHORTEST,         DO(options) },
558   { "dollar_endonly",             MOD_PAT,  MOD_OPT, PCRE2_DOLLAR_ENDONLY,       PO(options) },
559   { "dotall",                     MOD_PATP, MOD_OPT, PCRE2_DOTALL,               PO(options) },
560   { "dupnames",                   MOD_PATP, MOD_OPT, PCRE2_DUPNAMES,             PO(options) },
561   { "expand",                     MOD_PAT,  MOD_CTL, CTL_EXPAND,                 PO(control) },
562   { "extended",                   MOD_PATP, MOD_OPT, PCRE2_EXTENDED,             PO(options) },
563   { "find_limits",                MOD_DAT,  MOD_CTL, CTL_FINDLIMITS,             DO(control) },
564   { "firstline",                  MOD_PAT,  MOD_OPT, PCRE2_FIRSTLINE,            PO(options) },
565   { "fullbincode",                MOD_PAT,  MOD_CTL, CTL_FULLBINCODE,            PO(control) },
566   { "get",                        MOD_DAT,  MOD_NN,  DO(get_numbers),            DO(get_names) },
567   { "getall",                     MOD_DAT,  MOD_CTL, CTL_GETALL,                 DO(control) },
568   { "global",                     MOD_PNDP, MOD_CTL, CTL_GLOBAL,                 PO(control) },
569   { "hex",                        MOD_PAT,  MOD_CTL, CTL_HEXPAT,                 PO(control) },
570   { "info",                       MOD_PAT,  MOD_CTL, CTL_INFO,                   PO(control) },
571   { "jit",                        MOD_PAT,  MOD_IND, 7,                          PO(jit) },
572   { "jitfast",                    MOD_PAT,  MOD_CTL, CTL_JITFAST,                PO(control) },
573   { "jitstack",                   MOD_DAT,  MOD_INT, 0,                          DO(jitstack) },
574   { "jitverify",                  MOD_PAT,  MOD_CTL, CTL_JITVERIFY,              PO(control) },
575   { "locale",                     MOD_PAT,  MOD_STR, LOCALESIZE,                 PO(locale) },
576   { "mark",                       MOD_PNDP, MOD_CTL, CTL_MARK,                   PO(control) },
577   { "match_limit",                MOD_CTM,  MOD_INT, 0,                          MO(match_limit) },
578   { "match_unset_backref",        MOD_PAT,  MOD_OPT, PCRE2_MATCH_UNSET_BACKREF,  PO(options) },
579   { "max_pattern_length",         MOD_CTC,  MOD_SIZ, 0,                          CO(max_pattern_length) },
580   { "memory",                     MOD_PD,   MOD_CTL, CTL_MEMORY,                 PD(control) },
581   { "multiline",                  MOD_PATP, MOD_OPT, PCRE2_MULTILINE,            PO(options) },
582   { "never_backslash_c",          MOD_PAT,  MOD_OPT, PCRE2_NEVER_BACKSLASH_C,    PO(options) },
583   { "never_ucp",                  MOD_PAT,  MOD_OPT, PCRE2_NEVER_UCP,            PO(options) },
584   { "never_utf",                  MOD_PAT,  MOD_OPT, PCRE2_NEVER_UTF,            PO(options) },
585   { "newline",                    MOD_CTC,  MOD_NL,  0,                          CO(newline_convention) },
586   { "no_auto_capture",            MOD_PAT,  MOD_OPT, PCRE2_NO_AUTO_CAPTURE,      PO(options) },
587   { "no_auto_possess",            MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS,      PO(options) },
588   { "no_dotstar_anchor",          MOD_PAT,  MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR,    PO(options) },
589   { "no_jit",                     MOD_DAT,  MOD_OPT, PCRE2_NO_JIT,               DO(options) },
590   { "no_start_optimize",          MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE,    PO(options) },
591   { "no_utf_check",               MOD_PD,   MOD_OPT, PCRE2_NO_UTF_CHECK,         PD(options) },
592   { "notbol",                     MOD_DAT,  MOD_OPT, PCRE2_NOTBOL,               DO(options) },
593   { "notempty",                   MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY,             DO(options) },
594   { "notempty_atstart",           MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY_ATSTART,     DO(options) },
595   { "noteol",                     MOD_DAT,  MOD_OPT, PCRE2_NOTEOL,               DO(options) },
596   { "null_context",               MOD_PD,   MOD_CTL, CTL_NULLCONTEXT,            PO(control) },
597   { "offset",                     MOD_DAT,  MOD_INT, 0,                          DO(offset) },
598   { "offset_limit",               MOD_CTM,  MOD_SIZ, 0,                          MO(offset_limit)},
599   { "ovector",                    MOD_DAT,  MOD_INT, 0,                          DO(oveccount) },
600   { "parens_nest_limit",          MOD_CTC,  MOD_INT, 0,                          CO(parens_nest_limit) },
601   { "partial_hard",               MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
602   { "partial_soft",               MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
603   { "ph",                         MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
604   { "posix",                      MOD_PAT,  MOD_CTL, CTL_POSIX,                  PO(control) },
605   { "posix_nosub",                MOD_PAT,  MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB,  PO(control) },
606   { "ps",                         MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
607   { "push",                       MOD_PAT,  MOD_CTL, CTL_PUSH,                   PO(control) },
608   { "pushcopy",                   MOD_PAT,  MOD_CTL, CTL_PUSHCOPY,              PO(control) },
609   { "recursion_limit",            MOD_CTM,  MOD_INT, 0,                          MO(recursion_limit) },
610   { "regerror_buffsize",          MOD_PAT,  MOD_INT, 0,                          PO(regerror_buffsize) },
611   { "replace",                    MOD_PND,  MOD_STR, REPLACE_MODSIZE,            PO(replacement) },
612   { "stackguard",                 MOD_PAT,  MOD_INT, 0,                          PO(stackguard_test) },
613   { "startchar",                  MOD_PND,  MOD_CTL, CTL_STARTCHAR,              PO(control) },
614   { "startoffset",                MOD_DAT,  MOD_INT, 0,                          DO(offset) },
615   { "substitute_extended",        MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_EXTENDED,   PO(control2) },
616   { "substitute_overflow_length", MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
617   { "substitute_unknown_unset",   MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
618   { "substitute_unset_empty",     MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
619   { "tables",                     MOD_PAT,  MOD_INT, 0,                          PO(tables_id) },
620   { "ucp",                        MOD_PATP, MOD_OPT, PCRE2_UCP,                  PO(options) },
621   { "ungreedy",                   MOD_PAT,  MOD_OPT, PCRE2_UNGREEDY,             PO(options) },
622   { "use_offset_limit",           MOD_PAT,  MOD_OPT, PCRE2_USE_OFFSET_LIMIT,     PO(options) },
623   { "utf",                        MOD_PATP, MOD_OPT, PCRE2_UTF,                  PO(options) },
624   { "zero_terminate",             MOD_DAT,  MOD_CTL, CTL_ZERO_TERMINATE,         DO(control) }
625 };
626 
627 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
628 
629 /* Controls and options that are supported for use with the POSIX interface. */
630 
631 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
632   PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_UCP|PCRE2_UTF| \
633   PCRE2_UNGREEDY)
634 
635 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
636   CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_POSIX|CTL_POSIX_NOSUB)
637 
638 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
639 
640 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
641   PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
642 
643 #define POSIX_SUPPORTED_MATCH_CONTROLS  (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
644 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
645 
646 /* Control bits that are not ignored with 'push'. */
647 
648 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
649   CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
650   CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET)
651 
652 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (0)
653 
654 /* Controls that apply only at compile time with 'push'. */
655 
656 #define PUSH_COMPILE_ONLY_CONTROLS   CTL_JITVERIFY
657 #define PUSH_COMPILE_ONLY_CONTROLS2  (0)
658 
659 /* Controls that are forbidden with #pop or #popcopy. */
660 
661 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
662   CTL_PUSHCOPY)
663 
664 /* Pattern controls that are mutually exclusive. At present these are all in
665 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
666 CTL_POSIX, so it doesn't need its own entries. */
667 
668 static uint32_t exclusive_pat_controls[] = {
669   CTL_POSIX  | CTL_HEXPAT,
670   CTL_POSIX  | CTL_PUSH,
671   CTL_POSIX  | CTL_PUSHCOPY,
672   CTL_EXPAND | CTL_HEXPAT };
673 
674 /* Data controls that are mutually exclusive. At present these are all in the
675 first control word. */
676 static uint32_t exclusive_dat_controls[] = {
677   CTL_ALLUSEDTEXT | CTL_STARTCHAR,
678   CTL_FINDLIMITS  | CTL_NULLCONTEXT };
679 
680 /* Table of single-character abbreviated modifiers. The index field is
681 initialized to -1, but the first time the modifier is encountered, it is filled
682 in with the index of the full entry in modlist, to save repeated searching when
683 processing multiple test items. This short list is searched serially, so its
684 order does not matter. */
685 
686 typedef struct c1modstruct {
687   const char *fullname;
688   uint32_t    onechar;
689   int         index;
690 } c1modstruct;
691 
692 static c1modstruct c1modlist[] = {
693   { "bincode",      'B',           -1 },
694   { "info",         'I',           -1 },
695   { "global",       'g',           -1 },
696   { "caseless",     'i',           -1 },
697   { "multiline",    'm',           -1 },
698   { "dotall",       's',           -1 },
699   { "extended",     'x',           -1 }
700 };
701 
702 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
703 
704 /* Table of arguments for the -C command line option. Use macros to make the
705 table itself easier to read. */
706 
707 #if defined SUPPORT_PCRE2_8
708 #define SUPPORT_8 1
709 #endif
710 #if defined SUPPORT_PCRE2_16
711 #define SUPPORT_16 1
712 #endif
713 #if defined SUPPORT_PCRE2_32
714 #define SUPPORT_32 1
715 #endif
716 
717 #ifndef SUPPORT_8
718 #define SUPPORT_8 0
719 #endif
720 #ifndef SUPPORT_16
721 #define SUPPORT_16 0
722 #endif
723 #ifndef SUPPORT_32
724 #define SUPPORT_32 0
725 #endif
726 
727 #ifdef EBCDIC
728 #define SUPPORT_EBCDIC 1
729 #define EBCDIC_NL CHAR_LF
730 #else
731 #define SUPPORT_EBCDIC 0
732 #define EBCDIC_NL 0
733 #endif
734 
735 #ifdef NEVER_BACKSLASH_C
736 #define BACKSLASH_C 0
737 #else
738 #define BACKSLASH_C 1
739 #endif
740 
741 typedef struct coptstruct {
742   const char *name;
743   uint32_t    type;
744   uint32_t    value;
745 } coptstruct;
746 
747 enum { CONF_BSR,
748        CONF_FIX,
749        CONF_FIZ,
750        CONF_INT,
751        CONF_NL
752 };
753 
754 static coptstruct coptlist[] = {
755   { "backslash-C", CONF_FIX, BACKSLASH_C },
756   { "bsr",         CONF_BSR, PCRE2_CONFIG_BSR },
757   { "ebcdic",      CONF_FIX, SUPPORT_EBCDIC },
758   { "ebcdic-nl",   CONF_FIZ, EBCDIC_NL },
759   { "jit",         CONF_INT, PCRE2_CONFIG_JIT },
760   { "linksize",    CONF_INT, PCRE2_CONFIG_LINKSIZE },
761   { "newline",     CONF_NL,  PCRE2_CONFIG_NEWLINE },
762   { "pcre2-16",    CONF_FIX, SUPPORT_16 },
763   { "pcre2-32",    CONF_FIX, SUPPORT_32 },
764   { "pcre2-8",     CONF_FIX, SUPPORT_8 },
765   { "unicode",     CONF_INT, PCRE2_CONFIG_UNICODE }
766 };
767 
768 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
769 
770 #undef SUPPORT_8
771 #undef SUPPORT_16
772 #undef SUPPORT_32
773 #undef SUPPORT_EBCDIC
774 
775 
776 /* ----------------------- Static variables ------------------------ */
777 
778 static FILE *infile;
779 static FILE *outfile;
780 
781 static const void *last_callout_mark;
782 static PCRE2_JIT_STACK *jit_stack = NULL;
783 static size_t jit_stack_size = 0;
784 
785 static BOOL first_callout;
786 static BOOL jit_was_used;
787 static BOOL restrict_for_perl_test = FALSE;
788 static BOOL show_memory = FALSE;
789 
790 static int code_unit_size;                    /* Bytes */
791 static int jitrc;                             /* Return from JIT compile */
792 static int test_mode = DEFAULT_TEST_MODE;
793 static int timeit = 0;
794 static int timeitm = 0;
795 
796 clock_t total_compile_time = 0;
797 clock_t total_jit_compile_time = 0;
798 clock_t total_match_time = 0;
799 
800 static uint32_t dfa_matched;
801 static uint32_t forbid_utf = 0;
802 static uint32_t maxlookbehind;
803 static uint32_t max_oveccount;
804 static uint32_t callout_count;
805 
806 static uint16_t local_newline_default = 0;
807 
808 static VERSION_TYPE jittarget[VERSION_SIZE];
809 static VERSION_TYPE version[VERSION_SIZE];
810 static VERSION_TYPE uversion[VERSION_SIZE];
811 
812 static patctl def_patctl;
813 static patctl pat_patctl;
814 static datctl def_datctl;
815 static datctl dat_datctl;
816 
817 static void *patstack[PATSTACKSIZE];
818 static int patstacknext = 0;
819 
820 #ifdef SUPPORT_PCRE2_8
821 static regex_t preg = { NULL, NULL, 0, 0, 0 };
822 #endif
823 
824 static int *dfa_workspace = NULL;
825 static const uint8_t *locale_tables = NULL;
826 static uint8_t locale_name[32];
827 
828 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
829 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
830 buffer is where all input lines are read. Its size is the same as pbuffer8.
831 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
832 are actually compiled from pbuffer16 or pbuffer32. */
833 
834 static size_t    pbuffer8_size  = 50000;        /* Initial size, bytes */
835 static uint8_t  *pbuffer8 = NULL;
836 static uint8_t  *buffer = NULL;
837 
838 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
839 is cast as needed. For long data lines it grows as necessary. */
840 
841 static size_t dbuffer_size = 1u << 14;    /* Initial size, bytes */
842 static uint8_t *dbuffer = NULL;
843 
844 
845 /* ---------------- Mode-dependent variables -------------------*/
846 
847 #ifdef SUPPORT_PCRE2_8
848 static pcre2_code_8             *compiled_code8;
849 static pcre2_general_context_8  *general_context8, *general_context_copy8;
850 static pcre2_compile_context_8  *pat_context8, *default_pat_context8;
851 static pcre2_match_context_8    *dat_context8, *default_dat_context8;
852 static pcre2_match_data_8       *match_data8;
853 #endif
854 
855 #ifdef SUPPORT_PCRE2_16
856 static pcre2_code_16            *compiled_code16;
857 static pcre2_general_context_16 *general_context16, *general_context_copy16;
858 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
859 static pcre2_match_context_16   *dat_context16, *default_dat_context16;
860 static pcre2_match_data_16      *match_data16;
861 static PCRE2_SIZE pbuffer16_size = 0;   /* Set only when needed */
862 static uint16_t *pbuffer16 = NULL;
863 #endif
864 
865 #ifdef SUPPORT_PCRE2_32
866 static pcre2_code_32            *compiled_code32;
867 static pcre2_general_context_32 *general_context32, *general_context_copy32;
868 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
869 static pcre2_match_context_32   *dat_context32, *default_dat_context32;
870 static pcre2_match_data_32      *match_data32;
871 static PCRE2_SIZE pbuffer32_size = 0;   /* Set only when needed */
872 static uint32_t *pbuffer32 = NULL;
873 #endif
874 
875 
876 /* ---------------- Macros that work in all modes ----------------- */
877 
878 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
879 #define SET(x,y) SETOP(x,y,=)
880 #define SETPLUS(x,y) SETOP(x,y,+=)
881 #define strlen8(x) strlen((char *)x)
882 
883 
884 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
885 
886 /* Define macros for variables and functions that must be selected dynamically
887 depending on the mode setting (8, 16, 32). These are dependent on which modes
888 are supported. */
889 
890 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
891      defined (SUPPORT_PCRE2_32)) >= 2
892 
893 /* ----- All three modes supported ----- */
894 
895 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
896 
897 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
898   (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
899 
900 #define CASTVAR(t,x) ( \
901   (test_mode == PCRE8_MODE)? (t)G(x,8) : \
902   (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
903 
904 #define CODE_UNIT(a,b) ( \
905   (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
906   (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
907   (uint32_t)(((PCRE2_SPTR32)(a))[b]))
908 
909 #define DATCTXCPY(a,b) \
910   if (test_mode == PCRE8_MODE) \
911     memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
912   else if (test_mode == PCRE16_MODE) \
913     memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
914   else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
915 
916 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
917   (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
918 
919 #define PATCTXCPY(a,b) \
920   if (test_mode == PCRE8_MODE) \
921     memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
922   else if (test_mode == PCRE16_MODE) \
923     memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
924   else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
925 
926 #define PCHARS(lv, p, offset, len, utf, f) \
927   if (test_mode == PCRE32_MODE) \
928     lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
929   else if (test_mode == PCRE16_MODE) \
930     lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
931   else \
932     lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
933 
934 #define PCHARSV(p, offset, len, utf, f) \
935   if (test_mode == PCRE32_MODE) \
936     (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
937   else if (test_mode == PCRE16_MODE) \
938     (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
939   else \
940     (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
941 
942 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
943   if (test_mode == PCRE8_MODE) \
944      a = pcre2_callout_enumerate_8(compiled_code8, \
945        (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
946   else if (test_mode == PCRE16_MODE) \
947      a = pcre2_callout_enumerate_16(compiled_code16, \
948        (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
949   else \
950      a = pcre2_callout_enumerate_32(compiled_code32, \
951        (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
952 
953 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
954   if (test_mode == PCRE8_MODE) \
955     G(a,8) = pcre2_code_copy_8(b); \
956   else if (test_mode == PCRE16_MODE) \
957     G(a,16) = pcre2_code_copy_16(b); \
958   else \
959     G(a,32) = pcre2_code_copy_32(b)
960 
961 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
962   if (test_mode == PCRE8_MODE) \
963     a = (void *)pcre2_code_copy_8(G(b,8)); \
964   else if (test_mode == PCRE16_MODE) \
965     a = (void *)pcre2_code_copy_16(G(b,16)); \
966   else \
967     a = (void *)pcre2_code_copy_32(G(b,32))
968 
969 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
970   if (test_mode == PCRE8_MODE) \
971     G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
972   else if (test_mode == PCRE16_MODE) \
973     G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
974   else \
975     G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
976 
977 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
978   if (test_mode == PCRE8_MODE) \
979     a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
980   else if (test_mode == PCRE16_MODE) \
981     a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
982   else \
983     a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
984 
985 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
986   if (test_mode == PCRE8_MODE) \
987     r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
988   else if (test_mode == PCRE16_MODE) \
989     r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size)); \
990   else \
991     r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size))
992 
993 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
994   if (test_mode == PCRE8_MODE) \
995     a = pcre2_get_ovector_count_8(G(b,8)); \
996   else if (test_mode == PCRE16_MODE) \
997     a = pcre2_get_ovector_count_16(G(b,16)); \
998   else \
999     a = pcre2_get_ovector_count_32(G(b,32))
1000 
1001 #define PCRE2_GET_STARTCHAR(a,b) \
1002   if (test_mode == PCRE8_MODE) \
1003     a = pcre2_get_startchar_8(G(b,8)); \
1004   else if (test_mode == PCRE16_MODE) \
1005     a = pcre2_get_startchar_16(G(b,16)); \
1006   else \
1007     a = pcre2_get_startchar_32(G(b,32))
1008 
1009 #define PCRE2_JIT_COMPILE(r,a,b) \
1010   if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1011   else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1012   else r = pcre2_jit_compile_32(G(a,32),b)
1013 
1014 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1015   if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1016   else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1017   else pcre2_jit_free_unused_memory_32(G(a,32))
1018 
1019 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1020   if (test_mode == PCRE8_MODE) \
1021     a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1022   else if (test_mode == PCRE16_MODE) \
1023     a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1024   else \
1025     a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1026 
1027 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1028   if (test_mode == PCRE8_MODE) \
1029     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1030   else if (test_mode == PCRE16_MODE) \
1031     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1032   else \
1033     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1034 
1035 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1036   if (test_mode == PCRE8_MODE) \
1037     pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1038   else if (test_mode == PCRE16_MODE) \
1039     pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1040   else \
1041     pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1042 
1043 #define PCRE2_JIT_STACK_FREE(a) \
1044   if (test_mode == PCRE8_MODE) \
1045     pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1046   else if (test_mode == PCRE16_MODE) \
1047     pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1048   else \
1049     pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1050 
1051 #define PCRE2_MAKETABLES(a) \
1052   if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1053   else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1054   else a = pcre2_maketables_32(NULL)
1055 
1056 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1057   if (test_mode == PCRE8_MODE) \
1058     a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1059   else if (test_mode == PCRE16_MODE) \
1060     a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1061   else \
1062     a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1063 
1064 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1065   if (test_mode == PCRE8_MODE) \
1066     G(a,8) = pcre2_match_data_create_8(b,c); \
1067   else if (test_mode == PCRE16_MODE) \
1068     G(a,16) = pcre2_match_data_create_16(b,c); \
1069   else \
1070     G(a,32) = pcre2_match_data_create_32(b,c)
1071 
1072 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1073   if (test_mode == PCRE8_MODE) \
1074     G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1075   else if (test_mode == PCRE16_MODE) \
1076     G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1077   else \
1078     G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1079 
1080 #define PCRE2_MATCH_DATA_FREE(a) \
1081   if (test_mode == PCRE8_MODE) \
1082     pcre2_match_data_free_8(G(a,8)); \
1083   else if (test_mode == PCRE16_MODE) \
1084     pcre2_match_data_free_16(G(a,16)); \
1085   else \
1086     pcre2_match_data_free_32(G(a,32))
1087 
1088 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1089   if (test_mode == PCRE8_MODE) \
1090     a = pcre2_pattern_info_8(G(b,8),c,d); \
1091   else if (test_mode == PCRE16_MODE) \
1092     a = pcre2_pattern_info_16(G(b,16),c,d); \
1093   else \
1094     a = pcre2_pattern_info_32(G(b,32),c,d)
1095 
1096 #define PCRE2_PRINTINT(a) \
1097   if (test_mode == PCRE8_MODE) \
1098     pcre2_printint_8(compiled_code8,outfile,a); \
1099   else if (test_mode == PCRE16_MODE) \
1100     pcre2_printint_16(compiled_code16,outfile,a); \
1101   else \
1102     pcre2_printint_32(compiled_code32,outfile,a)
1103 
1104 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1105   if (test_mode == PCRE8_MODE) \
1106     r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1107   else if (test_mode == PCRE16_MODE) \
1108     r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1109   else \
1110     r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1111 
1112 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1113   if (test_mode == PCRE8_MODE) \
1114     r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1115   else if (test_mode == PCRE16_MODE) \
1116     r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1117   else \
1118     r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1119 
1120 #define PCRE2_SERIALIZE_FREE(a) \
1121   if (test_mode == PCRE8_MODE) \
1122     pcre2_serialize_free_8(a); \
1123   else if (test_mode == PCRE16_MODE) \
1124     pcre2_serialize_free_16(a); \
1125   else \
1126     pcre2_serialize_free_32(a)
1127 
1128 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1129   if (test_mode == PCRE8_MODE) \
1130     r = pcre2_serialize_get_number_of_codes_8(a); \
1131   else if (test_mode == PCRE16_MODE) \
1132     r = pcre2_serialize_get_number_of_codes_16(a); \
1133   else \
1134     r = pcre2_serialize_get_number_of_codes_32(a); \
1135 
1136 #define PCRE2_SET_CALLOUT(a,b,c) \
1137   if (test_mode == PCRE8_MODE) \
1138     pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1139   else if (test_mode == PCRE16_MODE) \
1140     pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1141   else \
1142     pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1143 
1144 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1145   if (test_mode == PCRE8_MODE) \
1146     pcre2_set_character_tables_8(G(a,8),b); \
1147   else if (test_mode == PCRE16_MODE) \
1148     pcre2_set_character_tables_16(G(a,16),b); \
1149   else \
1150     pcre2_set_character_tables_32(G(a,32),b)
1151 
1152 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1153   if (test_mode == PCRE8_MODE) \
1154     pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1155   else if (test_mode == PCRE16_MODE) \
1156     pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1157   else \
1158     pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1159 
1160 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1161   if (test_mode == PCRE8_MODE) \
1162     pcre2_set_match_limit_8(G(a,8),b); \
1163   else if (test_mode == PCRE16_MODE) \
1164     pcre2_set_match_limit_16(G(a,16),b); \
1165   else \
1166     pcre2_set_match_limit_32(G(a,32),b)
1167 
1168 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1169   if (test_mode == PCRE8_MODE) \
1170     pcre2_set_max_pattern_length_8(G(a,8),b); \
1171   else if (test_mode == PCRE16_MODE) \
1172     pcre2_set_max_pattern_length_16(G(a,16),b); \
1173   else \
1174     pcre2_set_max_pattern_length_32(G(a,32),b)
1175 
1176 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1177   if (test_mode == PCRE8_MODE) \
1178     pcre2_set_offset_limit_8(G(a,8),b); \
1179   else if (test_mode == PCRE16_MODE) \
1180     pcre2_set_offset_limit_16(G(a,16),b); \
1181   else \
1182     pcre2_set_offset_limit_32(G(a,32),b)
1183 
1184 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1185   if (test_mode == PCRE8_MODE) \
1186     pcre2_set_parens_nest_limit_8(G(a,8),b); \
1187   else if (test_mode == PCRE16_MODE) \
1188     pcre2_set_parens_nest_limit_16(G(a,16),b); \
1189   else \
1190     pcre2_set_parens_nest_limit_32(G(a,32),b)
1191 
1192 #define PCRE2_SET_RECURSION_LIMIT(a,b) \
1193   if (test_mode == PCRE8_MODE) \
1194     pcre2_set_recursion_limit_8(G(a,8),b); \
1195   else if (test_mode == PCRE16_MODE) \
1196     pcre2_set_recursion_limit_16(G(a,16),b); \
1197   else \
1198     pcre2_set_recursion_limit_32(G(a,32),b)
1199 
1200 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1201   if (test_mode == PCRE8_MODE) \
1202     a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
1203       (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1204   else if (test_mode == PCRE16_MODE) \
1205     a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
1206       (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1207   else \
1208     a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
1209       (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1210 
1211 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1212   if (test_mode == PCRE8_MODE) \
1213     a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1214   else if (test_mode == PCRE16_MODE) \
1215     a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1216   else \
1217     a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1218 
1219 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1220   if (test_mode == PCRE8_MODE) \
1221     a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1222   else if (test_mode == PCRE16_MODE) \
1223     a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1224   else \
1225     a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1226 
1227 #define PCRE2_SUBSTRING_FREE(a) \
1228   if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1229   else if (test_mode == PCRE16_MODE) \
1230     pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1231   else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1232 
1233 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1234   if (test_mode == PCRE8_MODE) \
1235     a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1236   else if (test_mode == PCRE16_MODE) \
1237     a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1238   else \
1239     a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1240 
1241 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1242   if (test_mode == PCRE8_MODE) \
1243     a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1244   else if (test_mode == PCRE16_MODE) \
1245     a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1246   else \
1247     a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1248 
1249 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1250   if (test_mode == PCRE8_MODE) \
1251     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1252   else if (test_mode == PCRE16_MODE) \
1253     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1254   else \
1255     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1256 
1257 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1258   if (test_mode == PCRE8_MODE) \
1259     a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1260   else if (test_mode == PCRE16_MODE) \
1261     a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1262   else \
1263     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1264 
1265 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1266   if (test_mode == PCRE8_MODE) \
1267     a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1268   else if (test_mode == PCRE16_MODE) \
1269     a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1270   else \
1271     a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1272 
1273 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1274   if (test_mode == PCRE8_MODE) \
1275     pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1276   else if (test_mode == PCRE16_MODE) \
1277     pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1278   else \
1279     pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1280 
1281 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1282   if (test_mode == PCRE8_MODE) \
1283     a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1284   else if (test_mode == PCRE16_MODE) \
1285     a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1286   else \
1287     a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1288 
1289 #define PTR(x) ( \
1290   (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1291   (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1292   (void *)G(x,32))
1293 
1294 #define SETFLD(x,y,z) \
1295   if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1296   else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1297   else G(x,32)->y = z
1298 
1299 #define SETFLDVEC(x,y,v,z) \
1300   if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1301   else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1302   else G(x,32)->y[v] = z
1303 
1304 #define SETOP(x,y,z) \
1305   if (test_mode == PCRE8_MODE) G(x,8) z y; \
1306   else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1307   else G(x,32) z y
1308 
1309 #define SETCASTPTR(x,y) \
1310   if (test_mode == PCRE8_MODE) \
1311     G(x,8) = (uint8_t *)(y); \
1312   else if (test_mode == PCRE16_MODE) \
1313     G(x,16) = (uint16_t *)(y); \
1314   else \
1315     G(x,32) = (uint32_t *)(y)
1316 
1317 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1318   (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1319   ((int)strlen32((PCRE2_SPTR32)p)))
1320 
1321 #define SUB1(a,b) \
1322   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1323   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1324   else G(a,32)(G(b,32))
1325 
1326 #define SUB2(a,b,c) \
1327   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1328   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1329   else G(a,32)(G(b,32),G(c,32))
1330 
1331 #define TEST(x,r,y) ( \
1332   (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1333   (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1334   (test_mode == PCRE32_MODE && G(x,32) r (y)))
1335 
1336 #define TESTFLD(x,f,r,y) ( \
1337   (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1338   (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1339   (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1340 
1341 
1342 
1343 /* ----- Two out of three modes are supported ----- */
1344 
1345 #else
1346 
1347 /* We can use some macro trickery to make a single set of definitions work in
1348 the three different cases. */
1349 
1350 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1351 
1352 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1353 #define BITONE 32
1354 #define BITTWO 16
1355 
1356 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1357 
1358 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1359 #define BITONE 32
1360 #define BITTWO 8
1361 
1362 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1363 
1364 #else
1365 #define BITONE 16
1366 #define BITTWO 8
1367 #endif
1368 
1369 
1370 /* ----- Common macros for two-mode cases ----- */
1371 
1372 #define CASTFLD(t,a,b) \
1373   ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1374     (t)(G(a,BITTWO)->b))
1375 
1376 #define CASTVAR(t,x) ( \
1377   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1378     (t)G(x,BITONE) : (t)G(x,BITTWO))
1379 
1380 #define CODE_UNIT(a,b) ( \
1381   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1382   (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1383   (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1384 
1385 #define DATCTXCPY(a,b) \
1386   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1387     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1388   else \
1389     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1390 
1391 #define FLD(a,b) \
1392   ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1393 
1394 #define PATCTXCPY(a,b) \
1395   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1396     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1397   else \
1398     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1399 
1400 #define PCHARS(lv, p, offset, len, utf, f) \
1401   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1402     lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1403   else \
1404     lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1405 
1406 #define PCHARSV(p, offset, len, utf, f) \
1407   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1408     (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1409   else \
1410     (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1411 
1412 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1413   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1414      a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1415        (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1416   else \
1417      a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1418        (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1419 
1420 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1421   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1422     G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1423   else \
1424     G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1425 
1426 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1427   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1428     a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1429   else \
1430     a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1431 
1432 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1433   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1434     G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1435   else \
1436     G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1437 
1438 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1439   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1440     a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1441       G(g,BITONE),h,i,j); \
1442   else \
1443     a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1444       G(g,BITTWO),h,i,j)
1445 
1446 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1447   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1448     r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size)); \
1449   else \
1450     r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size))
1451 
1452 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1453   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1454     a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1455   else \
1456     a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1457 
1458 #define PCRE2_GET_STARTCHAR(a,b) \
1459   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1460     a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1461   else \
1462     a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1463 
1464 #define PCRE2_JIT_COMPILE(r,a,b) \
1465   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1466     r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1467   else \
1468     r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1469 
1470 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1471   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1472     G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1473   else \
1474     G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1475 
1476 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1477   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1478     a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1479       G(g,BITONE),h); \
1480   else \
1481     a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1482       G(g,BITTWO),h)
1483 
1484 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1485   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1486     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1487   else \
1488     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1489 
1490 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1491   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1492     G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1493   else \
1494     G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1495 
1496 #define PCRE2_JIT_STACK_FREE(a) \
1497   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1498     G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1499   else \
1500     G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1501 
1502 #define PCRE2_MAKETABLES(a) \
1503   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1504     a = G(pcre2_maketables_,BITONE)(NULL); \
1505   else \
1506     a = G(pcre2_maketables_,BITTWO)(NULL)
1507 
1508 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1509   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1510     a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1511       G(g,BITONE),h); \
1512   else \
1513     a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1514       G(g,BITTWO),h)
1515 
1516 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1517   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1518     G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1519   else \
1520     G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1521 
1522 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1523   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1524     G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1525   else \
1526     G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1527 
1528 #define PCRE2_MATCH_DATA_FREE(a) \
1529   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1530     G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1531   else \
1532     G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1533 
1534 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1535   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1536     a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1537   else \
1538     a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1539 
1540 #define PCRE2_PRINTINT(a) \
1541  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1542     G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1543   else \
1544     G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1545 
1546 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1547  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1548     r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1549   else \
1550     r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1551 
1552 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1553  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1554     r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1555   else \
1556     r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1557 
1558 #define PCRE2_SERIALIZE_FREE(a) \
1559  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1560     G(pcre2_serialize_free_,BITONE)(a); \
1561   else \
1562     G(pcre2_serialize_free_,BITTWO)(a)
1563 
1564 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1565  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1566     r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1567   else \
1568     r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1569 
1570 #define PCRE2_SET_CALLOUT(a,b,c) \
1571   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1572     G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1573       (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1574   else \
1575     G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1576       (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1577 
1578 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1579   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1580     G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1581   else \
1582     G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1583 
1584 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1585   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1586     G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1587   else \
1588     G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1589 
1590 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1591   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1592     G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1593   else \
1594     G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1595 
1596 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1597   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1598     G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1599   else \
1600     G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1601 
1602 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1603   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1604     G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1605   else \
1606     G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1607 
1608 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1609   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1610     G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1611   else \
1612     G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1613 
1614 #define PCRE2_SET_RECURSION_LIMIT(a,b) \
1615   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1616     G(pcre2_set_recursion_limit_,BITONE)(G(a,BITONE),b); \
1617   else \
1618     G(pcre2_set_recursion_limit_,BITTWO)(G(a,BITTWO),b)
1619 
1620 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1621   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1622     a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1623       G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \
1624       (G(PCRE2_UCHAR,BITONE) *)k,l); \
1625   else \
1626     a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1627       G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \
1628       (G(PCRE2_UCHAR,BITTWO) *)k,l)
1629 
1630 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1631   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1632     a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1633       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1634   else \
1635     a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1636       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1637 
1638 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1639   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1640     a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1641       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1642   else \
1643     a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1644       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1645 
1646 #define PCRE2_SUBSTRING_FREE(a) \
1647   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1648     G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1649   else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1650 
1651 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1652   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1653     a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1654       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1655   else \
1656     a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1657       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1658 
1659 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1660   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1661     a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1662       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1663   else \
1664     a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1665       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1666 
1667 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1668   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1669     a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1670   else \
1671     a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1672 
1673 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1674   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1675     a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1676   else \
1677     a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1678 
1679 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1680   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1681     a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1682       (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1683   else \
1684     a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1685       (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1686 
1687 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1688   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1689     G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1690   else \
1691     G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1692 
1693 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1694   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1695     a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1696   else \
1697     a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1698 
1699 #define PTR(x) ( \
1700   (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1701   (void *)G(x,BITTWO))
1702 
1703 #define SETFLD(x,y,z) \
1704   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1705   else G(x,BITTWO)->y = z
1706 
1707 #define SETFLDVEC(x,y,v,z) \
1708   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1709   else G(x,BITTWO)->y[v] = z
1710 
1711 #define SETOP(x,y,z) \
1712   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1713   else G(x,BITTWO) z y
1714 
1715 #define SETCASTPTR(x,y) \
1716   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1717     G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1718   else \
1719     G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1720 
1721 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1722   G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1723   G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1724 
1725 #define SUB1(a,b) \
1726   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1727     G(a,BITONE)(G(b,BITONE)); \
1728   else \
1729     G(a,BITTWO)(G(b,BITTWO))
1730 
1731 #define SUB2(a,b,c) \
1732   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1733     G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
1734   else \
1735     G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
1736 
1737 #define TEST(x,r,y) ( \
1738   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
1739   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
1740 
1741 #define TESTFLD(x,f,r,y) ( \
1742   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
1743   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
1744 
1745 
1746 #endif  /* Two out of three modes */
1747 
1748 /* ----- End of cases where more than one mode is supported ----- */
1749 
1750 
1751 /* ----- Only 8-bit mode is supported ----- */
1752 
1753 #elif defined SUPPORT_PCRE2_8
1754 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
1755 #define CASTVAR(t,x) (t)G(x,8)
1756 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
1757 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
1758 #define FLD(a,b) G(a,8)->b
1759 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
1760 #define PCHARS(lv, p, offset, len, utf, f) \
1761   lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1762 #define PCHARSV(p, offset, len, utf, f) \
1763   (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1764 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1765    a = pcre2_callout_enumerate_8(compiled_code8, \
1766      (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
1767 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
1768 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
1769 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1770   G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
1771 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1772   a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
1773 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1774   r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
1775 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
1776 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
1777 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
1778 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
1779 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1780   a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1781 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1782   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
1783 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1784   pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
1785 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
1786 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
1787 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1788   a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1789 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
1790 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1791   G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
1792 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
1793 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
1794 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
1795 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1796   r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
1797 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1798   r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
1799 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
1800 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1801   r = pcre2_serialize_get_number_of_codes_8(a)
1802 #define PCRE2_SET_CALLOUT(a,b,c) \
1803   pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
1804 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
1805 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1806   pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
1807 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
1808 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
1809 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
1810 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
1811 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
1812 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1813   a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
1814     (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
1815 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1816   a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
1817 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1818   a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
1819 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
1820 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1821   a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
1822 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1823   a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
1824 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1825     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
1826 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1827     a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
1828 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1829   a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
1830 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1831   pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
1832 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1833   a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
1834 #define PTR(x) (void *)G(x,8)
1835 #define SETFLD(x,y,z) G(x,8)->y = z
1836 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
1837 #define SETOP(x,y,z) G(x,8) z y
1838 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
1839 #define STRLEN(p) (int)strlen((char *)p)
1840 #define SUB1(a,b) G(a,8)(G(b,8))
1841 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
1842 #define TEST(x,r,y) (G(x,8) r (y))
1843 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
1844 
1845 
1846 /* ----- Only 16-bit mode is supported ----- */
1847 
1848 #elif defined SUPPORT_PCRE2_16
1849 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
1850 #define CASTVAR(t,x) (t)G(x,16)
1851 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
1852 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
1853 #define FLD(a,b) G(a,16)->b
1854 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
1855 #define PCHARS(lv, p, offset, len, utf, f) \
1856   lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
1857 #define PCHARSV(p, offset, len, utf, f) \
1858   (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
1859 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1860    a = pcre2_callout_enumerate_16(compiled_code16, \
1861      (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
1862 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
1863 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
1864 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1865   G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
1866 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1867   a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
1868 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1869   r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size))
1870 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
1871 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
1872 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
1873 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
1874 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1875   a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
1876 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1877   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
1878 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1879   pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
1880 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
1881 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
1882 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1883   a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
1884 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
1885 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1886   G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
1887 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
1888 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
1889 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
1890 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1891   r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
1892 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1893   r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
1894 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
1895 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1896   r = pcre2_serialize_get_number_of_codes_16(a)
1897 #define PCRE2_SET_CALLOUT(a,b,c) \
1898   pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
1899 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
1900 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1901   pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
1902 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
1903 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
1904 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
1905 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
1906 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
1907 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1908   a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
1909     (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
1910 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1911   a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
1912 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1913   a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
1914 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
1915 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1916   a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
1917 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1918   a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
1919 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1920     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
1921 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1922     a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
1923 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1924   a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
1925 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1926   pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
1927 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1928   a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
1929 #define PTR(x) (void *)G(x,16)
1930 #define SETFLD(x,y,z) G(x,16)->y = z
1931 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
1932 #define SETOP(x,y,z) G(x,16) z y
1933 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
1934 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
1935 #define SUB1(a,b) G(a,16)(G(b,16))
1936 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
1937 #define TEST(x,r,y) (G(x,16) r (y))
1938 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
1939 
1940 
1941 /* ----- Only 32-bit mode is supported ----- */
1942 
1943 #elif defined SUPPORT_PCRE2_32
1944 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
1945 #define CASTVAR(t,x) (t)G(x,32)
1946 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
1947 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1948 #define FLD(a,b) G(a,32)->b
1949 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1950 #define PCHARS(lv, p, offset, len, utf, f) \
1951   lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
1952 #define PCHARSV(p, offset, len, utf, f) \
1953   (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
1954 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1955    a = pcre2_callout_enumerate_32(compiled_code32, \
1956      (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1957 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
1958 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
1959 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1960   G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1961 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1962   a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1963 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1964   r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size))
1965 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
1966 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
1967 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
1968 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
1969 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1970   a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1971 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1972   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1973 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1974   pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1975 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1976 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
1977 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1978   a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1979 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
1980 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1981   G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1982 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
1983 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
1984 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
1985 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1986   r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1987 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1988   r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1989 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
1990 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1991   r = pcre2_serialize_get_number_of_codes_32(a)
1992 #define PCRE2_SET_CALLOUT(a,b,c) \
1993   pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1994 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
1995 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1996   pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1997 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
1998 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
1999 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2000 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2001 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
2002 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2003   a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
2004     (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2005 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2006   a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2007 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2008   a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2009 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2010 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2011   a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2012 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2013   a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2014 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2015     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2016 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2017     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2018 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2019   a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2020 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2021   pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2022 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2023   a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2024 #define PTR(x) (void *)G(x,32)
2025 #define SETFLD(x,y,z) G(x,32)->y = z
2026 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2027 #define SETOP(x,y,z) G(x,32) z y
2028 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2029 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2030 #define SUB1(a,b) G(a,32)(G(b,32))
2031 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2032 #define TEST(x,r,y) (G(x,32) r (y))
2033 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2034 
2035 #endif
2036 
2037 /* ----- End of mode-specific function call macros ----- */
2038 
2039 
2040 
2041 
2042 /*************************************************
2043 *         Alternate character tables             *
2044 *************************************************/
2045 
2046 /* By default, the "tables" pointer in the compile context when calling
2047 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2048 library. However, the tables modifier can be used to select alternate sets of
2049 tables, for different kinds of testing. Note that the locale modifier also
2050 adjusts the tables. */
2051 
2052 /* This is the set of tables distributed as default with PCRE2. It recognizes
2053 only ASCII characters. */
2054 
2055 static const uint8_t tables1[] = {
2056 
2057 /* This table is a lower casing table. */
2058 
2059     0,  1,  2,  3,  4,  5,  6,  7,
2060     8,  9, 10, 11, 12, 13, 14, 15,
2061    16, 17, 18, 19, 20, 21, 22, 23,
2062    24, 25, 26, 27, 28, 29, 30, 31,
2063    32, 33, 34, 35, 36, 37, 38, 39,
2064    40, 41, 42, 43, 44, 45, 46, 47,
2065    48, 49, 50, 51, 52, 53, 54, 55,
2066    56, 57, 58, 59, 60, 61, 62, 63,
2067    64, 97, 98, 99,100,101,102,103,
2068   104,105,106,107,108,109,110,111,
2069   112,113,114,115,116,117,118,119,
2070   120,121,122, 91, 92, 93, 94, 95,
2071    96, 97, 98, 99,100,101,102,103,
2072   104,105,106,107,108,109,110,111,
2073   112,113,114,115,116,117,118,119,
2074   120,121,122,123,124,125,126,127,
2075   128,129,130,131,132,133,134,135,
2076   136,137,138,139,140,141,142,143,
2077   144,145,146,147,148,149,150,151,
2078   152,153,154,155,156,157,158,159,
2079   160,161,162,163,164,165,166,167,
2080   168,169,170,171,172,173,174,175,
2081   176,177,178,179,180,181,182,183,
2082   184,185,186,187,188,189,190,191,
2083   192,193,194,195,196,197,198,199,
2084   200,201,202,203,204,205,206,207,
2085   208,209,210,211,212,213,214,215,
2086   216,217,218,219,220,221,222,223,
2087   224,225,226,227,228,229,230,231,
2088   232,233,234,235,236,237,238,239,
2089   240,241,242,243,244,245,246,247,
2090   248,249,250,251,252,253,254,255,
2091 
2092 /* This table is a case flipping table. */
2093 
2094     0,  1,  2,  3,  4,  5,  6,  7,
2095     8,  9, 10, 11, 12, 13, 14, 15,
2096    16, 17, 18, 19, 20, 21, 22, 23,
2097    24, 25, 26, 27, 28, 29, 30, 31,
2098    32, 33, 34, 35, 36, 37, 38, 39,
2099    40, 41, 42, 43, 44, 45, 46, 47,
2100    48, 49, 50, 51, 52, 53, 54, 55,
2101    56, 57, 58, 59, 60, 61, 62, 63,
2102    64, 97, 98, 99,100,101,102,103,
2103   104,105,106,107,108,109,110,111,
2104   112,113,114,115,116,117,118,119,
2105   120,121,122, 91, 92, 93, 94, 95,
2106    96, 65, 66, 67, 68, 69, 70, 71,
2107    72, 73, 74, 75, 76, 77, 78, 79,
2108    80, 81, 82, 83, 84, 85, 86, 87,
2109    88, 89, 90,123,124,125,126,127,
2110   128,129,130,131,132,133,134,135,
2111   136,137,138,139,140,141,142,143,
2112   144,145,146,147,148,149,150,151,
2113   152,153,154,155,156,157,158,159,
2114   160,161,162,163,164,165,166,167,
2115   168,169,170,171,172,173,174,175,
2116   176,177,178,179,180,181,182,183,
2117   184,185,186,187,188,189,190,191,
2118   192,193,194,195,196,197,198,199,
2119   200,201,202,203,204,205,206,207,
2120   208,209,210,211,212,213,214,215,
2121   216,217,218,219,220,221,222,223,
2122   224,225,226,227,228,229,230,231,
2123   232,233,234,235,236,237,238,239,
2124   240,241,242,243,244,245,246,247,
2125   248,249,250,251,252,253,254,255,
2126 
2127 /* This table contains bit maps for various character classes. Each map is 32
2128 bytes long and the bits run from the least significant end of each byte. The
2129 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2130 graph, print, punct, and cntrl. Other classes are built from combinations. */
2131 
2132   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2133   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2134   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2135   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2136 
2137   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2138   0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2139   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2140   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2141 
2142   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2143   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2144   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2145   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2146 
2147   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2148   0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2149   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2150   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2151 
2152   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2153   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2154   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2155   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2156 
2157   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2158   0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2159   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2160   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2161 
2162   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2163   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2164   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2165   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2166 
2167   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2168   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2169   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2170   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2171 
2172   0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2173   0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2174   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2175   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2176 
2177   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2178   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2179   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2180   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2181 
2182 /* This table identifies various classes of character by individual bits:
2183   0x01   white space character
2184   0x02   letter
2185   0x04   decimal digit
2186   0x08   hexadecimal digit
2187   0x10   alphanumeric or '_'
2188   0x80   regular expression metacharacter or binary zero
2189 */
2190 
2191   0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
2192   0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /*   8- 15 */
2193   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
2194   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
2195   0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
2196   0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
2197   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
2198   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
2199   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
2200   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
2201   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
2202   0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
2203   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
2204   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
2205   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
2206   0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
2207   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2208   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2209   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2210   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2211   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2212   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2213   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2214   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2215   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2216   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2217   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2218   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2219   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2220   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2221   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2222   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2223 
2224 /* This is a set of tables that came originally from a Windows user. It seems
2225 to be at least an approximation of ISO 8859. In particular, there are
2226 characters greater than 128 that are marked as spaces, letters, etc. */
2227 
2228 static const uint8_t tables2[] = {
2229 0,1,2,3,4,5,6,7,
2230 8,9,10,11,12,13,14,15,
2231 16,17,18,19,20,21,22,23,
2232 24,25,26,27,28,29,30,31,
2233 32,33,34,35,36,37,38,39,
2234 40,41,42,43,44,45,46,47,
2235 48,49,50,51,52,53,54,55,
2236 56,57,58,59,60,61,62,63,
2237 64,97,98,99,100,101,102,103,
2238 104,105,106,107,108,109,110,111,
2239 112,113,114,115,116,117,118,119,
2240 120,121,122,91,92,93,94,95,
2241 96,97,98,99,100,101,102,103,
2242 104,105,106,107,108,109,110,111,
2243 112,113,114,115,116,117,118,119,
2244 120,121,122,123,124,125,126,127,
2245 128,129,130,131,132,133,134,135,
2246 136,137,138,139,140,141,142,143,
2247 144,145,146,147,148,149,150,151,
2248 152,153,154,155,156,157,158,159,
2249 160,161,162,163,164,165,166,167,
2250 168,169,170,171,172,173,174,175,
2251 176,177,178,179,180,181,182,183,
2252 184,185,186,187,188,189,190,191,
2253 224,225,226,227,228,229,230,231,
2254 232,233,234,235,236,237,238,239,
2255 240,241,242,243,244,245,246,215,
2256 248,249,250,251,252,253,254,223,
2257 224,225,226,227,228,229,230,231,
2258 232,233,234,235,236,237,238,239,
2259 240,241,242,243,244,245,246,247,
2260 248,249,250,251,252,253,254,255,
2261 0,1,2,3,4,5,6,7,
2262 8,9,10,11,12,13,14,15,
2263 16,17,18,19,20,21,22,23,
2264 24,25,26,27,28,29,30,31,
2265 32,33,34,35,36,37,38,39,
2266 40,41,42,43,44,45,46,47,
2267 48,49,50,51,52,53,54,55,
2268 56,57,58,59,60,61,62,63,
2269 64,97,98,99,100,101,102,103,
2270 104,105,106,107,108,109,110,111,
2271 112,113,114,115,116,117,118,119,
2272 120,121,122,91,92,93,94,95,
2273 96,65,66,67,68,69,70,71,
2274 72,73,74,75,76,77,78,79,
2275 80,81,82,83,84,85,86,87,
2276 88,89,90,123,124,125,126,127,
2277 128,129,130,131,132,133,134,135,
2278 136,137,138,139,140,141,142,143,
2279 144,145,146,147,148,149,150,151,
2280 152,153,154,155,156,157,158,159,
2281 160,161,162,163,164,165,166,167,
2282 168,169,170,171,172,173,174,175,
2283 176,177,178,179,180,181,182,183,
2284 184,185,186,187,188,189,190,191,
2285 224,225,226,227,228,229,230,231,
2286 232,233,234,235,236,237,238,239,
2287 240,241,242,243,244,245,246,215,
2288 248,249,250,251,252,253,254,223,
2289 192,193,194,195,196,197,198,199,
2290 200,201,202,203,204,205,206,207,
2291 208,209,210,211,212,213,214,247,
2292 216,217,218,219,220,221,222,255,
2293 0,62,0,0,1,0,0,0,
2294 0,0,0,0,0,0,0,0,
2295 32,0,0,0,1,0,0,0,
2296 0,0,0,0,0,0,0,0,
2297 0,0,0,0,0,0,255,3,
2298 126,0,0,0,126,0,0,0,
2299 0,0,0,0,0,0,0,0,
2300 0,0,0,0,0,0,0,0,
2301 0,0,0,0,0,0,255,3,
2302 0,0,0,0,0,0,0,0,
2303 0,0,0,0,0,0,12,2,
2304 0,0,0,0,0,0,0,0,
2305 0,0,0,0,0,0,0,0,
2306 254,255,255,7,0,0,0,0,
2307 0,0,0,0,0,0,0,0,
2308 255,255,127,127,0,0,0,0,
2309 0,0,0,0,0,0,0,0,
2310 0,0,0,0,254,255,255,7,
2311 0,0,0,0,0,4,32,4,
2312 0,0,0,128,255,255,127,255,
2313 0,0,0,0,0,0,255,3,
2314 254,255,255,135,254,255,255,7,
2315 0,0,0,0,0,4,44,6,
2316 255,255,127,255,255,255,127,255,
2317 0,0,0,0,254,255,255,255,
2318 255,255,255,255,255,255,255,127,
2319 0,0,0,0,254,255,255,255,
2320 255,255,255,255,255,255,255,255,
2321 0,2,0,0,255,255,255,255,
2322 255,255,255,255,255,255,255,127,
2323 0,0,0,0,255,255,255,255,
2324 255,255,255,255,255,255,255,255,
2325 0,0,0,0,254,255,0,252,
2326 1,0,0,248,1,0,0,120,
2327 0,0,0,0,254,255,255,255,
2328 0,0,128,0,0,0,128,0,
2329 255,255,255,255,0,0,0,0,
2330 0,0,0,0,0,0,0,128,
2331 255,255,255,255,0,0,0,0,
2332 0,0,0,0,0,0,0,0,
2333 128,0,0,0,0,0,0,0,
2334 0,1,1,0,1,1,0,0,
2335 0,0,0,0,0,0,0,0,
2336 0,0,0,0,0,0,0,0,
2337 1,0,0,0,128,0,0,0,
2338 128,128,128,128,0,0,128,0,
2339 28,28,28,28,28,28,28,28,
2340 28,28,0,0,0,0,0,128,
2341 0,26,26,26,26,26,26,18,
2342 18,18,18,18,18,18,18,18,
2343 18,18,18,18,18,18,18,18,
2344 18,18,18,128,128,0,128,16,
2345 0,26,26,26,26,26,26,18,
2346 18,18,18,18,18,18,18,18,
2347 18,18,18,18,18,18,18,18,
2348 18,18,18,128,128,0,0,0,
2349 0,0,0,0,0,1,0,0,
2350 0,0,0,0,0,0,0,0,
2351 0,0,0,0,0,0,0,0,
2352 0,0,0,0,0,0,0,0,
2353 1,0,0,0,0,0,0,0,
2354 0,0,18,0,0,0,0,0,
2355 0,0,20,20,0,18,0,0,
2356 0,20,18,0,0,0,0,0,
2357 18,18,18,18,18,18,18,18,
2358 18,18,18,18,18,18,18,18,
2359 18,18,18,18,18,18,18,0,
2360 18,18,18,18,18,18,18,18,
2361 18,18,18,18,18,18,18,18,
2362 18,18,18,18,18,18,18,18,
2363 18,18,18,18,18,18,18,0,
2364 18,18,18,18,18,18,18,18
2365 };
2366 
2367 
2368 
2369 /*************************************************
2370 *            Local memory functions              *
2371 *************************************************/
2372 
2373 /* Alternative memory functions, to test functionality. */
2374 
my_malloc(size_t size,void * data)2375 static void *my_malloc(size_t size, void *data)
2376 {
2377 void *block = malloc(size);
2378 (void)data;
2379 if (show_memory)
2380   fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
2381 return block;
2382 }
2383 
my_free(void * block,void * data)2384 static void my_free(void *block, void *data)
2385 {
2386 (void)data;
2387 if (show_memory)
2388   fprintf(outfile, "free             %p\n", block);
2389 free(block);
2390 }
2391 
2392 /* For recursion malloc/free, to test stacking calls */
2393 
2394 #ifdef HEAP_MATCH_RECURSE
my_stack_malloc(size_t size,void * data)2395 static void *my_stack_malloc(size_t size, void *data)
2396 {
2397 void *block = malloc(size);
2398 (void)data;
2399 if (show_memory)
2400   fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2401 return block;
2402 }
2403 
my_stack_free(void * block,void * data)2404 static void my_stack_free(void *block, void *data)
2405 {
2406 (void)data;
2407 if (show_memory)
2408   fprintf(outfile, "stack_free       %p\n", block);
2409 free(block);
2410 }
2411 #endif  /* HEAP_MATCH_RECURSE */
2412 
2413 
2414 /*************************************************
2415 *       Callback function for stack guard        *
2416 *************************************************/
2417 
2418 /* This is set up to be called from pcre2_compile() when the stackguard=n
2419 modifier sets a value greater than zero. The test we do is whether the
2420 parenthesis nesting depth is greater than the value set by the modifier.
2421 
2422 Argument:  the current parenthesis nesting depth
2423 Returns:   non-zero to kill the compilation
2424 */
2425 
2426 static int
stack_guard(uint32_t depth,void * user_data)2427 stack_guard(uint32_t depth, void *user_data)
2428 {
2429 (void)user_data;
2430 return depth > pat_patctl.stackguard_test;
2431 }
2432 
2433 
2434 /*************************************************
2435 *         JIT memory callback                    *
2436 *************************************************/
2437 
2438 static PCRE2_JIT_STACK*
jit_callback(void * arg)2439 jit_callback(void *arg)
2440 {
2441 jit_was_used = TRUE;
2442 return (PCRE2_JIT_STACK *)arg;
2443 }
2444 
2445 
2446 /*************************************************
2447 *      Convert UTF-8 character to code point     *
2448 *************************************************/
2449 
2450 /* This function reads one or more bytes that represent a UTF-8 character,
2451 and returns the codepoint of that character. Note that the function supports
2452 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2453 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2454 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2455 checking, and also for generating 32-bit non-UTF data values above the UTF
2456 limit.
2457 
2458 Argument:
2459   utf8bytes   a pointer to the byte vector
2460   vptr        a pointer to an int to receive the value
2461 
2462 Returns:      >  0 => the number of bytes consumed
2463               -6 to 0 => malformed UTF-8 character at offset = (-return)
2464 */
2465 
2466 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2467 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2468 {
2469 uint32_t c = *utf8bytes++;
2470 uint32_t d = c;
2471 int i, j, s;
2472 
2473 for (i = -1; i < 6; i++)               /* i is number of additional bytes */
2474   {
2475   if ((d & 0x80) == 0) break;
2476   d <<= 1;
2477   }
2478 
2479 if (i == -1) { *vptr = c; return 1; }  /* ascii character */
2480 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
2481 
2482 /* i now has a value in the range 1-5 */
2483 
2484 s = 6*i;
2485 d = (c & utf8_table3[i]) << s;
2486 
2487 for (j = 0; j < i; j++)
2488   {
2489   c = *utf8bytes++;
2490   if ((c & 0xc0) != 0x80) return -(j+1);
2491   s -= 6;
2492   d |= (c & 0x3f) << s;
2493   }
2494 
2495 /* Check that encoding was the correct unique one */
2496 
2497 for (j = 0; j < utf8_table1_size; j++)
2498   if (d <= (uint32_t)utf8_table1[j]) break;
2499 if (j != i) return -(i+1);
2500 
2501 /* Valid value */
2502 
2503 *vptr = d;
2504 return i+1;
2505 }
2506 
2507 
2508 
2509 /*************************************************
2510 *             Print one character                *
2511 *************************************************/
2512 
2513 /* Print a single character either literally, or as a hex escape, and count how
2514 many printed characters are used.
2515 
2516 Arguments:
2517   c            the character
2518   utf          TRUE in UTF mode
2519   f            the FILE to print to, or NULL just to count characters
2520 
2521 Returns:       number of characters written
2522 */
2523 
2524 static int
pchar(uint32_t c,BOOL utf,FILE * f)2525 pchar(uint32_t c, BOOL utf, FILE *f)
2526 {
2527 int n = 0;
2528 if (PRINTOK(c))
2529   {
2530   if (f != NULL) fprintf(f, "%c", c);
2531   return 1;
2532   }
2533 
2534 if (c < 0x100)
2535   {
2536   if (utf)
2537     {
2538     if (f != NULL) fprintf(f, "\\x{%02x}", c);
2539     return 6;
2540     }
2541   else
2542     {
2543     if (f != NULL) fprintf(f, "\\x%02x", c);
2544     return 4;
2545     }
2546   }
2547 
2548 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2549 return n >= 0 ? n : 0;
2550 }
2551 
2552 
2553 
2554 #ifdef SUPPORT_PCRE2_16
2555 /*************************************************
2556 *    Find length of 0-terminated 16-bit string   *
2557 *************************************************/
2558 
strlen16(PCRE2_SPTR16 p)2559 static size_t strlen16(PCRE2_SPTR16 p)
2560 {
2561 PCRE2_SPTR16 pp = p;
2562 while (*pp != 0) pp++;
2563 return (int)(pp - p);
2564 }
2565 #endif  /* SUPPORT_PCRE2_16 */
2566 
2567 
2568 
2569 #ifdef SUPPORT_PCRE2_32
2570 /*************************************************
2571 *    Find length of 0-terminated 32-bit string   *
2572 *************************************************/
2573 
strlen32(PCRE2_SPTR32 p)2574 static size_t strlen32(PCRE2_SPTR32 p)
2575 {
2576 PCRE2_SPTR32 pp = p;
2577 while (*pp != 0) pp++;
2578 return (int)(pp - p);
2579 }
2580 #endif  /* SUPPORT_PCRE2_32 */
2581 
2582 
2583 #ifdef SUPPORT_PCRE2_8
2584 /*************************************************
2585 *         Print 8-bit character string           *
2586 *************************************************/
2587 
2588 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2589 For printing *MARK strings, a negative length is given. If handed a NULL file,
2590 just counts chars without printing (because pchar() does that). */
2591 
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)2592 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2593 {
2594 uint32_t c = 0;
2595 int yield = 0;
2596 
2597 if (length < 0) length = p[-1];
2598 while (length-- > 0)
2599   {
2600   if (utf)
2601     {
2602     int rc = utf82ord(p, &c);
2603     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
2604       {
2605       length -= rc - 1;
2606       p += rc;
2607       yield += pchar(c, utf, f);
2608       continue;
2609       }
2610     }
2611   c = *p++;
2612   yield += pchar(c, utf, f);
2613   }
2614 
2615 return yield;
2616 }
2617 #endif
2618 
2619 
2620 #ifdef SUPPORT_PCRE2_16
2621 /*************************************************
2622 *           Print 16-bit character string        *
2623 *************************************************/
2624 
2625 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2626 For printing *MARK strings, a negative length is given. If handed a NULL file,
2627 just counts chars without printing. */
2628 
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)2629 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
2630 {
2631 int yield = 0;
2632 if (length < 0) length = p[-1];
2633 while (length-- > 0)
2634   {
2635   uint32_t c = *p++ & 0xffff;
2636   if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2637     {
2638     int d = *p & 0xffff;
2639     if (d >= 0xDC00 && d <= 0xDFFF)
2640       {
2641       c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2642       length--;
2643       p++;
2644       }
2645     }
2646   yield += pchar(c, utf, f);
2647   }
2648 return yield;
2649 }
2650 #endif  /* SUPPORT_PCRE2_16 */
2651 
2652 
2653 
2654 #ifdef SUPPORT_PCRE2_32
2655 /*************************************************
2656 *           Print 32-bit character string        *
2657 *************************************************/
2658 
2659 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2660 For printing *MARK strings, a negative length is given.If handed a NULL file,
2661 just counts chars without printing. */
2662 
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)2663 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
2664 {
2665 int yield = 0;
2666 (void)(utf);  /* Avoid compiler warning */
2667 if (length < 0) length = p[-1];
2668 while (length-- > 0)
2669   {
2670   uint32_t c = *p++;
2671   yield += pchar(c, utf, f);
2672   }
2673 return yield;
2674 }
2675 #endif  /* SUPPORT_PCRE2_32 */
2676 
2677 
2678 
2679 
2680 #ifdef SUPPORT_PCRE2_8
2681 /*************************************************
2682 *       Convert character value to UTF-8         *
2683 *************************************************/
2684 
2685 /* This function takes an integer value in the range 0 - 0x7fffffff
2686 and encodes it as a UTF-8 character in 0 to 6 bytes.
2687 
2688 Arguments:
2689   cvalue     the character value
2690   utf8bytes  pointer to buffer for result - at least 6 bytes long
2691 
2692 Returns:     number of characters placed in the buffer
2693 */
2694 
2695 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)2696 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
2697 {
2698 register int i, j;
2699 if (cvalue > 0x7fffffffu)
2700   return -1;
2701 for (i = 0; i < utf8_table1_size; i++)
2702   if (cvalue <= (uint32_t)utf8_table1[i]) break;
2703 utf8bytes += i;
2704 for (j = i; j > 0; j--)
2705  {
2706  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
2707  cvalue >>= 6;
2708  }
2709 *utf8bytes = utf8_table2[i] | cvalue;
2710 return i + 1;
2711 }
2712 #endif  /* SUPPORT_PCRE2_8 */
2713 
2714 
2715 
2716 #ifdef SUPPORT_PCRE2_16
2717 /*************************************************
2718 *          Convert pattern to 16-bit             *
2719 *************************************************/
2720 
2721 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
2722 all the input bytes are ASCII, the space needed for a 16-bit string is exactly
2723 double the 8-bit size. Otherwise, the size needed for a 16-bit string is no
2724 more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 but
2725 possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in
2726 UTF-16. The result is always left in pbuffer16. Impose a minimum size to save
2727 repeated re-sizing.
2728 
2729 Note that this function does not object to surrogate values. This is
2730 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
2731 for the purpose of testing that they are correctly faulted.
2732 
2733 Arguments:
2734   p          points to a byte string
2735   utf        non-zero if converting to UTF-16
2736   lenptr     points to number of bytes in the string (excluding trailing zero)
2737 
2738 Returns:     0 on success, with the length updated to the number of 16-bit
2739                data items used (excluding the trailing zero)
2740              OR -1 if a UTF-8 string is malformed
2741              OR -2 if a value > 0x10ffff is encountered in UTF mode
2742              OR -3 if a value > 0xffff is encountered when not in UTF mode
2743 */
2744 
2745 static PCRE2_SIZE
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)2746 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
2747 {
2748 uint16_t *pp;
2749 PCRE2_SIZE len = *lenptr;
2750 
2751 if (pbuffer16_size < 2*len + 2)
2752   {
2753   if (pbuffer16 != NULL) free(pbuffer16);
2754   pbuffer16_size = 2*len + 2;
2755   if (pbuffer16_size < 256) pbuffer16_size = 256;
2756   pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
2757   if (pbuffer16 == NULL)
2758     {
2759     fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n",
2760       (unsigned long int)pbuffer16_size);
2761     exit(1);
2762     }
2763   }
2764 
2765 pp = pbuffer16;
2766 if (!utf)
2767   {
2768   for (; len > 0; len--) *pp++ = *p++;
2769   }
2770 else while (len > 0)
2771   {
2772   uint32_t c;
2773   int chlen = utf82ord(p, &c);
2774   if (chlen <= 0) return -1;
2775   if (c > 0x10ffff) return -2;
2776   p += chlen;
2777   len -= chlen;
2778   if (c < 0x10000) *pp++ = c; else
2779     {
2780     if (!utf) return -3;
2781     c -= 0x10000;
2782     *pp++ = 0xD800 | (c >> 10);
2783     *pp++ = 0xDC00 | (c & 0x3ff);
2784     }
2785   }
2786 
2787 *pp = 0;
2788 *lenptr = pp - pbuffer16;
2789 return 0;
2790 }
2791 #endif
2792 
2793 
2794 
2795 #ifdef SUPPORT_PCRE2_32
2796 /*************************************************
2797 *          Convert pattern to 32-bit             *
2798 *************************************************/
2799 
2800 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
2801 all the input bytes are ASCII, the space needed for a 32-bit string is exactly
2802 four times the 8-bit size. Otherwise, the size needed for a 32-bit string is no
2803 more than four times, because the number of characters must be less than the
2804 number of bytes. The result is always left in pbuffer32. Impose a minimum size
2805 to save repeated re-sizing.
2806 
2807 Note that this function does not object to surrogate values. This is
2808 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
2809 for the purpose of testing that they are correctly faulted.
2810 
2811 Arguments:
2812   p          points to a byte string
2813   utf        true if UTF-8 (to be converted to UTF-32)
2814   lenptr     points to number of bytes in the string (excluding trailing zero)
2815 
2816 Returns:     0 on success, with the length updated to the number of 32-bit
2817                data items used (excluding the trailing zero)
2818              OR -1 if a UTF-8 string is malformed
2819              OR -2 if a value > 0x10ffff is encountered in UTF mode
2820 */
2821 
2822 static PCRE2_SIZE
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)2823 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
2824 {
2825 uint32_t *pp;
2826 PCRE2_SIZE len = *lenptr;
2827 
2828 if (pbuffer32_size < 4*len + 4)
2829   {
2830   if (pbuffer32 != NULL) free(pbuffer32);
2831   pbuffer32_size = 4*len + 4;
2832   if (pbuffer32_size < 256) pbuffer32_size = 256;
2833   pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
2834   if (pbuffer32 == NULL)
2835     {
2836     fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n",
2837       (unsigned long int)pbuffer32_size);
2838     exit(1);
2839     }
2840   }
2841 
2842 pp = pbuffer32;
2843 if (!utf)
2844   {
2845   for (; len > 0; len--) *pp++ = *p++;
2846   }
2847 else while (len > 0)
2848   {
2849   uint32_t c;
2850   int chlen = utf82ord(p, &c);
2851   if (chlen <= 0) return -1;
2852   if (utf && c > 0x10ffff) return -2;
2853   p += chlen;
2854   len -= chlen;
2855   *pp++ = c;
2856   }
2857 
2858 *pp = 0;
2859 *lenptr = pp - pbuffer32;
2860 return 0;
2861 }
2862 #endif /* SUPPORT_PCRE2_32 */
2863 
2864 
2865 
2866 /*************************************************
2867 *         Move back by so many characters        *
2868 *************************************************/
2869 
2870 /* Given a code unit offset in a subject string, move backwards by a number of
2871 characters, and return the resulting offset.
2872 
2873 Arguments:
2874   subject   pointer to the string
2875   offset    start offset
2876   count     count to move back by
2877   utf       TRUE if in UTF mode
2878 
2879 Returns:   a possibly changed offset
2880 */
2881 
2882 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)2883 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
2884 {
2885 if (!utf || test_mode == PCRE32_MODE)
2886   return (count >= offset)? 0 : (offset - count);
2887 
2888 else if (test_mode == PCRE8_MODE)
2889   {
2890   PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
2891   for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
2892     {
2893     pp--;
2894     while ((*pp & 0xc0) == 0x80) pp--;
2895     }
2896   return pp - (PCRE2_SPTR8)subject;
2897   }
2898 
2899 else  /* 16-bit mode */
2900   {
2901   PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
2902   for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
2903     {
2904     pp--;
2905     if ((*pp & 0xfc00) == 0xdc00) pp--;
2906     }
2907   return pp - (PCRE2_SPTR16)subject;
2908   }
2909 }
2910 
2911 
2912 
2913 /*************************************************
2914 *           Expand input buffers                 *
2915 *************************************************/
2916 
2917 /* This function doubles the size of the input buffer and the buffer for
2918 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
2919 the new ones.
2920 
2921 Arguments: none
2922 Returns:   nothing (aborts if malloc() fails)
2923 */
2924 
2925 static void
expand_input_buffers(void)2926 expand_input_buffers(void)
2927 {
2928 int new_pbuffer8_size = 2*pbuffer8_size;
2929 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
2930 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
2931 
2932 if (new_buffer == NULL || new_pbuffer8 == NULL)
2933   {
2934   fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
2935   exit(1);
2936   }
2937 
2938 memcpy(new_buffer, buffer, pbuffer8_size);
2939 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
2940 
2941 pbuffer8_size = new_pbuffer8_size;
2942 
2943 free(buffer);
2944 free(pbuffer8);
2945 
2946 buffer = new_buffer;
2947 pbuffer8 = new_pbuffer8;
2948 }
2949 
2950 
2951 
2952 /*************************************************
2953 *        Read or extend an input line            *
2954 *************************************************/
2955 
2956 /* Input lines are read into buffer, but both patterns and data lines can be
2957 continued over multiple input lines. In addition, if the buffer fills up, we
2958 want to automatically expand it so as to be able to handle extremely large
2959 lines that are needed for certain stress tests, although this is less likely
2960 now that there are repetition features for both patterns and data. When the
2961 input buffer is expanded, the other two buffers must also be expanded likewise,
2962 and the contents of pbuffer, which are a copy of the input for callouts, must
2963 be preserved (for when expansion happens for a data line). This is not the most
2964 optimal way of handling this, but hey, this is just a test program!
2965 
2966 Arguments:
2967   f            the file to read
2968   start        where in buffer to start (this *must* be within buffer)
2969   prompt       for stdin or readline()
2970 
2971 Returns:       pointer to the start of new data
2972                could be a copy of start, or could be moved
2973                NULL if no data read and EOF reached
2974 */
2975 
2976 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)2977 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
2978 {
2979 uint8_t *here = start;
2980 
2981 for (;;)
2982   {
2983   size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
2984 
2985   if (rlen > 1000)
2986     {
2987     size_t dlen;
2988 
2989     /* If libreadline or libedit support is required, use readline() to read a
2990     line if the input is a terminal. Note that readline() removes the trailing
2991     newline, so we must put it back again, to be compatible with fgets(). */
2992 
2993 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2994     if (INTERACTIVE(f))
2995       {
2996       size_t len;
2997       char *s = readline(prompt);
2998       if (s == NULL) return (here == start)? NULL : start;
2999       len = strlen(s);
3000       if (len > 0) add_history(s);
3001       if (len > rlen - 1) len = rlen - 1;
3002       memcpy(here, s, len);
3003       here[len] = '\n';
3004       here[len+1] = 0;
3005       free(s);
3006       }
3007     else
3008 #endif
3009 
3010     /* Read the next line by normal means, prompting if the file is a tty. */
3011 
3012       {
3013       if (INTERACTIVE(f)) printf("%s", prompt);
3014       if (fgets((char *)here, rlen,  f) == NULL)
3015         return (here == start)? NULL : start;
3016       }
3017 
3018     dlen = strlen((char *)here);
3019     here += dlen;
3020 
3021     /* Check for end of line reached. Take care not to read data from before
3022     start (dlen will be zero for a file starting with a binary zero). */
3023 
3024     if (here > start && here[-1] == '\n') return start;
3025 
3026     /* If we have not read a newline when reading a file, we have either filled
3027     the buffer or reached the end of the file. We can detect the former by
3028     checking that the string fills the buffer, and the latter by feof(). If
3029     neither of these is true, it means we read a binary zero which has caused
3030     strlen() to give a short length. This is a hard error because pcre2test
3031     expects to work with C strings. */
3032 
3033     if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3034       {
3035       fprintf(outfile, "** Binary zero encountered in input\n");
3036       fprintf(outfile, "** pcre2test run abandoned\n");
3037       exit(1);
3038       }
3039     }
3040 
3041   else
3042     {
3043     size_t start_offset = start - buffer;
3044     size_t here_offset = here - buffer;
3045     expand_input_buffers();
3046     start = buffer + start_offset;
3047     here = buffer + here_offset;
3048     }
3049   }
3050 
3051 /* Control never gets here */
3052 }
3053 
3054 
3055 
3056 /*************************************************
3057 *         Case-independent strncmp() function    *
3058 *************************************************/
3059 
3060 /*
3061 Arguments:
3062   s         first string
3063   t         second string
3064   n         number of characters to compare
3065 
3066 Returns:    < 0, = 0, or > 0, according to the comparison
3067 */
3068 
3069 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3070 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3071 {
3072 while (n--)
3073   {
3074   int c = tolower(*s++) - tolower(*t++);
3075   if (c) return c;
3076   }
3077 return 0;
3078 }
3079 
3080 
3081 
3082 /*************************************************
3083 *          Scan the main modifier list           *
3084 *************************************************/
3085 
3086 /* This function searches the modifier list for a long modifier name.
3087 
3088 Argument:
3089   p         start of the name
3090   lenp      length of the name
3091 
3092 Returns:    an index in the modifier list, or -1 on failure
3093 */
3094 
3095 static int
scan_modifiers(const uint8_t * p,unsigned int len)3096 scan_modifiers(const uint8_t *p, unsigned int len)
3097 {
3098 int bot = 0;
3099 int top = MODLISTCOUNT;
3100 
3101 while (top > bot)
3102   {
3103   int mid = (bot + top)/2;
3104   unsigned int mlen = strlen(modlist[mid].name);
3105   int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3106   if (c == 0)
3107     {
3108     if (len == mlen) return mid;
3109     c = (int)len - (int)mlen;
3110     }
3111   if (c > 0) bot = mid + 1; else top = mid;
3112   }
3113 
3114 return -1;
3115 
3116 }
3117 
3118 
3119 
3120 /*************************************************
3121 *        Check a modifer and find its field      *
3122 *************************************************/
3123 
3124 /* This function is called when a modifier has been identified. We check that
3125 it is allowed here and find the field that is to be changed.
3126 
3127 Arguments:
3128   m          the modifier list entry
3129   ctx        CTX_PAT     => pattern context
3130              CTX_POPPAT  => pattern context for popped pattern
3131              CTX_DEFPAT  => default pattern context
3132              CTX_DAT     => data context
3133              CTX_DEFDAT  => default data context
3134   pctl       point to pattern control block
3135   dctl       point to data control block
3136   c          a single character or 0
3137 
3138 Returns:     a field pointer or NULL
3139 */
3140 
3141 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3142 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3143 {
3144 void *field = NULL;
3145 PCRE2_SIZE offset = m->offset;
3146 
3147 if (restrict_for_perl_test) switch(m->which)
3148   {
3149   case MOD_PNDP:
3150   case MOD_PATP:
3151   case MOD_PDP:
3152   break;
3153 
3154   default:
3155   fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3156     m->name);
3157   return NULL;
3158   }
3159 
3160 switch (m->which)
3161   {
3162   case MOD_CTC:  /* Compile context modifier */
3163   if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3164     else if (ctx == CTX_PAT) field = PTR(pat_context);
3165   break;
3166 
3167   case MOD_CTM:  /* Match context modifier */
3168   if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3169     else if (ctx == CTX_DAT) field = PTR(dat_context);
3170   break;
3171 
3172   case MOD_DAT:  /* Data line modifier */
3173   if (dctl != NULL) field = dctl;
3174   break;
3175 
3176   case MOD_PAT:    /* Pattern modifier */
3177   case MOD_PATP:   /* Allowed for Perl test */
3178   if (pctl != NULL) field = pctl;
3179   break;
3180 
3181   case MOD_PD:   /* Pattern or data line modifier */
3182   case MOD_PDP:  /* Ditto, allowed for Perl test */
3183   case MOD_PND:  /* Ditto, but not default pattern */
3184   case MOD_PNDP: /* Ditto, allowed for Perl test */
3185   if (dctl != NULL) field = dctl;
3186     else if (pctl != NULL && (m->which == MOD_PD || ctx != CTX_DEFPAT))
3187       field = pctl;
3188   break;
3189   }
3190 
3191 if (field == NULL)
3192   {
3193   if (c == 0)
3194     fprintf(outfile, "** '%s' is not valid here\n", m->name);
3195   else
3196     fprintf(outfile, "** /%c is not valid here\n", c);
3197   return NULL;
3198   }
3199 
3200 return (char *)field + offset;
3201 }
3202 
3203 
3204 
3205 /*************************************************
3206 *            Decode a modifier list              *
3207 *************************************************/
3208 
3209 /* A pointer to a control block is NULL when called in cases when that block is
3210 not relevant. They are never all relevant in one call. At least one of patctl
3211 and datctl is NULL. The second argument specifies which context to use for
3212 modifiers that apply to contexts.
3213 
3214 Arguments:
3215   p          point to modifier string
3216   ctx        CTX_PAT     => pattern context
3217              CTX_POPPAT  => pattern context for popped pattern
3218              CTX_DEFPAT  => default pattern context
3219              CTX_DAT     => data context
3220              CTX_DEFDAT  => default data context
3221   pctl       point to pattern control block
3222   dctl       point to data control block
3223 
3224 Returns: TRUE if successful decode, FALSE otherwise
3225 */
3226 
3227 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3228 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3229 {
3230 uint8_t *ep, *pp;
3231 long li;
3232 unsigned long uli;
3233 BOOL first = TRUE;
3234 
3235 for (;;)
3236   {
3237   void *field;
3238   modstruct *m;
3239   BOOL off = FALSE;
3240   unsigned int i, len;
3241   int index;
3242   char *endptr;
3243 
3244   /* Skip white space and commas. */
3245 
3246   while (isspace(*p) || *p == ',') p++;
3247   if (*p == 0) break;
3248 
3249   /* Find the end of the item; lose trailing whitespace at end of line. */
3250 
3251   for (ep = p; *ep != 0 && *ep != ','; ep++);
3252   if (*ep == 0)
3253     {
3254     while (ep > p && isspace(ep[-1])) ep--;
3255     *ep = 0;
3256     }
3257 
3258   /* Remember if the first character is '-'. */
3259 
3260   if (*p == '-')
3261     {
3262     off = TRUE;
3263     p++;
3264     }
3265 
3266   /* Find the length of a full-length modifier name, and scan for it. */
3267 
3268   pp = p;
3269   while (pp < ep && *pp != '=') pp++;
3270   index = scan_modifiers(p, pp - p);
3271 
3272   /* If the first modifier is unrecognized, try to interpret it as a sequence
3273   of single-character abbreviated modifiers. None of these modifiers have any
3274   associated data. They just set options or control bits. */
3275 
3276   if (index < 0)
3277     {
3278     uint32_t cc;
3279     uint8_t *mp = p;
3280 
3281     if (!first)
3282       {
3283       fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3284       if (ep - p == 1)
3285         fprintf(outfile, "** Single-character modifiers must come first\n");
3286       return FALSE;
3287       }
3288 
3289     for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3290       {
3291       for (i = 0; i < C1MODLISTCOUNT; i++)
3292         if (cc == c1modlist[i].onechar) break;
3293 
3294       if (i >= C1MODLISTCOUNT)
3295         {
3296         fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3297           *p, (int)(ep-mp), mp);
3298         return FALSE;
3299         }
3300 
3301       if (c1modlist[i].index >= 0)
3302         {
3303         index = c1modlist[i].index;
3304         }
3305 
3306       else
3307         {
3308         index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3309           strlen(c1modlist[i].fullname));
3310         if (index < 0)
3311           {
3312           fprintf(outfile, "** Internal error: single-character equivalent "
3313             "modifier '%s' not found\n", c1modlist[i].fullname);
3314           return FALSE;
3315           }
3316         c1modlist[i].index = index;     /* Cache for next time */
3317         }
3318 
3319       field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3320       if (field == NULL) return FALSE;
3321       *((uint32_t *)field) |= modlist[index].value;
3322       }
3323 
3324     continue;    /* With tne next (fullname) modifier */
3325     }
3326 
3327   /* We have a match on a full-name modifier. Check for the existence of data
3328   when needed. */
3329 
3330   m = modlist + index;      /* Save typing */
3331   if (m->type != MOD_CTL && m->type != MOD_OPT &&
3332       (m->type != MOD_IND || *pp == '='))
3333     {
3334     if (*pp++ != '=')
3335       {
3336       fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3337       return FALSE;
3338       }
3339     if (off)
3340       {
3341       fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3342       return FALSE;
3343       }
3344     }
3345 
3346   /* These on/off types have no data. */
3347 
3348   else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3349     {
3350     fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3351     return FALSE;
3352     }
3353 
3354   /* Set the data length for those types that have data. Then find the field
3355   that is to be set. If check_modifier() returns NULL, it has already output an
3356   error message. */
3357 
3358   len = ep - pp;
3359   field = check_modifier(m, ctx, pctl, dctl, 0);
3360   if (field == NULL) return FALSE;
3361 
3362   /* Process according to data type. */
3363 
3364   switch (m->type)
3365     {
3366     case MOD_CTL:
3367     case MOD_OPT:
3368     if (off) *((uint32_t *)field) &= ~m->value;
3369       else *((uint32_t *)field) |= m->value;
3370     break;
3371 
3372     case MOD_BSR:
3373     if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3374       {
3375 #ifdef BSR_ANYCRLF
3376       *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3377 #else
3378       *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3379 #endif
3380       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_BSR_SET;
3381         else dctl->control &= ~CTL_BSR_SET;
3382       }
3383     else
3384       {
3385       if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3386         *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3387       else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3388         *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3389       else goto INVALID_VALUE;
3390       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_BSR_SET;
3391         else dctl->control |= CTL_BSR_SET;
3392       }
3393     pp = ep;
3394     break;
3395 
3396     case MOD_IN2:    /* One or two unsigned integers */
3397     if (!isdigit(*pp)) goto INVALID_VALUE;
3398     uli = strtoul((const char *)pp, &endptr, 10);
3399     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3400     ((uint32_t *)field)[0] = (uint32_t)uli;
3401     if (*endptr == ':')
3402       {
3403       uli = strtoul((const char *)endptr+1, &endptr, 10);
3404       if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3405       ((uint32_t *)field)[1] = (uint32_t)uli;
3406       }
3407     else ((uint32_t *)field)[1] = 0;
3408     pp = (uint8_t *)endptr;
3409     break;
3410 
3411     /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3412     less than ULONG_MAX. So first test for overflowing the long int, and then
3413     test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3414 
3415     case MOD_SIZ:    /* PCRE2_SIZE value */
3416     if (!isdigit(*pp)) goto INVALID_VALUE;
3417     uli = strtoul((const char *)pp, &endptr, 10);
3418     if (uli == ULONG_MAX) goto INVALID_VALUE;
3419 #if ULONG_MAX > PCRE2_SIZE_MAX
3420     if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3421 #endif
3422     *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3423     pp = (uint8_t *)endptr;
3424     break;
3425 
3426     case MOD_IND:    /* Unsigned integer with default */
3427     if (len == 0)
3428       {
3429       *((uint32_t *)field) = (uint32_t)(m->value);
3430       break;
3431       }
3432     /* Fall through */
3433 
3434     case MOD_INT:    /* Unsigned integer */
3435     if (!isdigit(*pp)) goto INVALID_VALUE;
3436     uli = strtoul((const char *)pp, &endptr, 10);
3437     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3438     *((uint32_t *)field) = (uint32_t)uli;
3439     pp = (uint8_t *)endptr;
3440     break;
3441 
3442     case MOD_INS:   /* Signed integer */
3443     if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3444     li = strtol((const char *)pp, &endptr, 10);
3445     if (S32OVERFLOW(li)) goto INVALID_VALUE;
3446     *((int32_t *)field) = (int32_t)li;
3447     pp = (uint8_t *)endptr;
3448     break;
3449 
3450     case MOD_NL:
3451     for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3452       if (len == strlen(newlines[i]) &&
3453         strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3454     if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3455     if (i == 0)
3456       {
3457       *((uint16_t *)field) = NEWLINE_DEFAULT;
3458       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_NL_SET;
3459         else dctl->control &= ~CTL_NL_SET;
3460       }
3461     else
3462       {
3463       *((uint16_t *)field) = i;
3464       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_NL_SET;
3465         else dctl->control |= CTL_NL_SET;
3466       }
3467     pp = ep;
3468     break;
3469 
3470     case MOD_NN:              /* Name or (signed) number; may be several */
3471     if (isdigit(*pp) || *pp == '-')
3472       {
3473       int ct = MAXCPYGET - 1;
3474       int32_t value;
3475       li = strtol((const char *)pp, &endptr, 10);
3476       if (S32OVERFLOW(li)) goto INVALID_VALUE;
3477       value = (int32_t)li;
3478       field = (char *)field - m->offset + m->value;      /* Adjust field ptr */
3479       if (value >= 0)                                    /* Add new number */
3480         {
3481         while (*((int32_t *)field) >= 0 && ct-- > 0)   /* Skip previous */
3482           field = (char *)field + sizeof(int32_t);
3483         if (ct <= 0)
3484           {
3485           fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3486           return FALSE;
3487           }
3488         }
3489       *((int32_t *)field) = value;
3490       if (ct > 0) ((int32_t *)field)[1] = -1;
3491       pp = (uint8_t *)endptr;
3492       }
3493 
3494     /* Multiple strings are put end to end. */
3495 
3496     else
3497       {
3498       char *nn = (char *)field;
3499       if (len > 0)                    /* Add new name */
3500         {
3501         while (*nn != 0) nn += strlen(nn) + 1;
3502         if (nn + len + 1 - (char *)field > LENCPYGET)
3503           {
3504           fprintf(outfile, "** Too many named '%s' modifiers\n", m->name);
3505           return FALSE;
3506           }
3507         memcpy(nn, pp, len);
3508         }
3509       nn[len] = 0 ;
3510       nn[len+1] = 0;
3511       pp = ep;
3512       }
3513     break;
3514 
3515     case MOD_STR:
3516     if (len + 1 > m->value)
3517       {
3518       fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3519         m->name, m->value - 1);
3520       return FALSE;
3521       }
3522     memcpy(field, pp, len);
3523     ((uint8_t *)field)[len] = 0;
3524     pp = ep;
3525     break;
3526     }
3527 
3528   if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3529     {
3530     fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
3531     return FALSE;
3532     }
3533 
3534   p = pp;
3535   first = FALSE;
3536 
3537   if (ctx == CTX_POPPAT &&
3538      (pctl->options != 0 ||
3539       pctl->tables_id != 0 ||
3540       pctl->locale[0] != 0 ||
3541       (pctl->control & NOTPOP_CONTROLS) != 0))
3542     {
3543     fprintf(outfile, "** '%s' is not valid here\n", m->name);
3544     return FALSE;
3545     }
3546   }
3547 
3548 return TRUE;
3549 
3550 INVALID_VALUE:
3551 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
3552 return FALSE;
3553 }
3554 
3555 
3556 /*************************************************
3557 *             Get info from a pattern            *
3558 *************************************************/
3559 
3560 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
3561 pattern.
3562 
3563 Arguments:
3564   what        code for the required information
3565   where       where to put the answer
3566   unsetok     PCRE2_ERROR_UNSET is an "expected" result
3567 
3568 Returns:      the return from pcre2_pattern_info()
3569 */
3570 
3571 static int
pattern_info(int what,void * where,BOOL unsetok)3572 pattern_info(int what, void *where, BOOL unsetok)
3573 {
3574 int rc;
3575 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
3576 if (rc >= 0) return 0;
3577 if (rc != PCRE2_ERROR_UNSET || !unsetok)
3578   {
3579   fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
3580     what);
3581   if (rc == PCRE2_ERROR_BADMODE)
3582     fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3583       "%d-bit mode\n", test_mode,
3584       8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
3585   }
3586 return rc;
3587 }
3588 
3589 
3590 
3591 #ifdef SUPPORT_PCRE2_8
3592 /*************************************************
3593 *             Show something in a list           *
3594 *************************************************/
3595 
3596 /* This function just helps to keep the code that uses it tidier. It's used for
3597 various lists of things where there needs to be introductory text before the
3598 first item. As these calls are all in the POSIX-support code, they happen only
3599 when 8-bit mode is supported. */
3600 
3601 static void
prmsg(const char ** msg,const char * s)3602 prmsg(const char **msg, const char *s)
3603 {
3604 fprintf(outfile, "%s %s", *msg, s);
3605 *msg = "";
3606 }
3607 #endif  /* SUPPORT_PCRE2_8 */
3608 
3609 
3610 
3611 /*************************************************
3612 *                Show control bits               *
3613 *************************************************/
3614 
3615 /* Called for mutually exclusive controls and for unsupported POSIX controls.
3616 Because the bits are unique, this can be used for both pattern and data control
3617 words.
3618 
3619 Arguments:
3620   controls    control bits
3621   controls2   more control bits
3622   before      text to print before
3623 
3624 Returns:      nothing
3625 */
3626 
3627 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)3628 show_controls(uint32_t controls, uint32_t controls2, const char *before)
3629 {
3630 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
3631   before,
3632   ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
3633   ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
3634   ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
3635   ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
3636   ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
3637   ((controls & CTL_BINCODE) != 0)? " bincode" : "",
3638   ((controls & CTL_BSR_SET) != 0)? " bsr" : "",
3639   ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
3640   ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
3641   ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
3642   ((controls & CTL_DFA) != 0)? " dfa" : "",
3643   ((controls & CTL_EXPAND) != 0)? " expand" : "",
3644   ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
3645   ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
3646   ((controls & CTL_GETALL) != 0)? " getall" : "",
3647   ((controls & CTL_GLOBAL) != 0)? " global" : "",
3648   ((controls & CTL_HEXPAT) != 0)? " hex" : "",
3649   ((controls & CTL_INFO) != 0)? " info" : "",
3650   ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
3651   ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
3652   ((controls & CTL_MARK) != 0)? " mark" : "",
3653   ((controls & CTL_MEMORY) != 0)? " memory" : "",
3654   ((controls & CTL_NL_SET) != 0)? " newline" : "",
3655   ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
3656   ((controls & CTL_POSIX) != 0)? " posix" : "",
3657   ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
3658   ((controls & CTL_PUSH) != 0)? " push" : "",
3659   ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
3660   ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
3661   ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
3662   ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
3663   ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
3664   ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
3665   ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
3666 }
3667 
3668 
3669 
3670 /*************************************************
3671 *                Show compile options            *
3672 *************************************************/
3673 
3674 /* Called from show_pattern_info() and for unsupported POSIX options.
3675 
3676 Arguments:
3677   options     an options word
3678   before      text to print before
3679   after       text to print after
3680 
3681 Returns:      nothing
3682 */
3683 
3684 static void
show_compile_options(uint32_t options,const char * before,const char * after)3685 show_compile_options(uint32_t options, const char *before, const char *after)
3686 {
3687 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
3688 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
3689   before,
3690   ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
3691   ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
3692   ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
3693   ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
3694   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
3695   ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
3696   ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
3697   ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3698   ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
3699   ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
3700   ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
3701   ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
3702   ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
3703   ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
3704   ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
3705   ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
3706   ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
3707   ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3708   ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
3709   ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
3710   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
3711   ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3712   ((options & PCRE2_UCP) != 0)? " ucp" : "",
3713   ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
3714   ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
3715   ((options & PCRE2_UTF) != 0)? " utf" : "",
3716   after);
3717 }
3718 
3719 
3720 
3721 #ifdef SUPPORT_PCRE2_8
3722 /*************************************************
3723 *                Show match options              *
3724 *************************************************/
3725 
3726 /* Called for unsupported POSIX options. */
3727 
3728 static void
show_match_options(uint32_t options)3729 show_match_options(uint32_t options)
3730 {
3731 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s",
3732   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
3733   ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
3734   ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
3735   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
3736   ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
3737   ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
3738   ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
3739   ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
3740   ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
3741   ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
3742 }
3743 #endif  /* SUPPORT_PCRE2_8 */
3744 
3745 
3746 
3747 /*************************************************
3748 *      Show memory usage info for a pattern      *
3749 *************************************************/
3750 
3751 static void
show_memory_info(void)3752 show_memory_info(void)
3753 {
3754 uint32_t name_count, name_entry_size;
3755 size_t size, cblock_size;
3756 
3757 /* One of the test_mode values will always be true, but to stop a compiler
3758 warning we must initialize cblock_size. */
3759 
3760 cblock_size = 0;
3761 #ifdef SUPPORT_PCRE2_8
3762 if (test_mode == 8) cblock_size = sizeof(pcre2_real_code_8);
3763 #endif
3764 #ifdef SUPPORT_PCRE2_16
3765 if (test_mode == 16) cblock_size = sizeof(pcre2_real_code_16);
3766 #endif
3767 #ifdef SUPPORT_PCRE2_32
3768 if (test_mode == 32) cblock_size = sizeof(pcre2_real_code_32);
3769 #endif
3770 
3771 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
3772 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
3773 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
3774 fprintf(outfile, "Memory allocation (code space): %d\n",
3775   (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
3776 if (pat_patctl.jit != 0)
3777   {
3778   (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
3779   fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
3780   }
3781 }
3782 
3783 
3784 
3785 /*************************************************
3786 *     Callback function for callout enumeration  *
3787 *************************************************/
3788 
3789 /* The only differences in the callout emumeration block for different code
3790 unit widths are that the pointers to the subject, the most recent MARK, and a
3791 callout argument string point to strings of the appropriate width. Casts can be
3792 used to deal with this.
3793 
3794 Argument:
3795   cb            pointer to enumerate block
3796   callout_data  user data
3797 
3798 Returns:    0
3799 */
3800 
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)3801 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
3802   void *callout_data)
3803 {
3804 uint32_t i;
3805 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
3806 
3807 (void)callout_data;  /* Not currently displayed */
3808 
3809 fprintf(outfile, "Callout ");
3810 if (cb->callout_string != NULL)
3811   {
3812   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
3813   fprintf(outfile, "%c", delimiter);
3814   PCHARSV(cb->callout_string, 0,
3815     cb->callout_string_length, utf, outfile);
3816   for (i = 0; callout_start_delims[i] != 0; i++)
3817     if (delimiter == callout_start_delims[i])
3818       {
3819       delimiter = callout_end_delims[i];
3820       break;
3821       }
3822   fprintf(outfile, "%c  ", delimiter);
3823   }
3824 else fprintf(outfile, "%d  ", cb->callout_number);
3825 
3826 fprintf(outfile, "%.*s\n",
3827   (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
3828   pbuffer8 + cb->pattern_position);
3829 
3830 return 0;
3831 }
3832 
3833 
3834 
3835 /*************************************************
3836 *        Show information about a pattern        *
3837 *************************************************/
3838 
3839 /* This function is called after a pattern has been compiled if any of the
3840 information-requesting controls have been set.
3841 
3842 Arguments:  none
3843 
3844 Returns:    PR_OK     continue processing next line
3845             PR_SKIP   skip to a blank line
3846             PR_ABEND  abort the pcre2test run
3847 */
3848 
3849 static int
show_pattern_info(void)3850 show_pattern_info(void)
3851 {
3852 uint32_t compile_options, overall_options;
3853 
3854 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
3855   {
3856   fprintf(outfile, "------------------------------------------------------------------\n");
3857   PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
3858   }
3859 
3860 if ((pat_patctl.control & CTL_INFO) != 0)
3861   {
3862   void *nametable;
3863   uint8_t *start_bits;
3864   BOOL match_limit_set, recursion_limit_set;
3865   uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
3866     hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
3867     match_limit, minlength, nameentrysize, namecount, newline_convention,
3868     recursion_limit;
3869 
3870   /* These info requests may return PCRE2_ERROR_UNSET. */
3871 
3872   switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
3873     {
3874     case 0:
3875     match_limit_set = TRUE;
3876     break;
3877 
3878     case PCRE2_ERROR_UNSET:
3879     match_limit_set = FALSE;
3880     break;
3881 
3882     default:
3883     return PR_ABEND;
3884     }
3885 
3886   switch(pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit, TRUE))
3887     {
3888     case 0:
3889     recursion_limit_set = TRUE;
3890     break;
3891 
3892     case PCRE2_ERROR_UNSET:
3893     recursion_limit_set = FALSE;
3894     break;
3895 
3896     default:
3897     return PR_ABEND;
3898     }
3899 
3900   /* These info requests should always succeed. */
3901 
3902   if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
3903       pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
3904       pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
3905       pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
3906       pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
3907       pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
3908       pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
3909       pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
3910       pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
3911       pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
3912       pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
3913       pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
3914       pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
3915       pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
3916       pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
3917       pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
3918       pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
3919       != 0)
3920     return PR_ABEND;
3921 
3922   fprintf(outfile, "Capturing subpattern count = %d\n", capture_count);
3923 
3924   if (backrefmax > 0)
3925     fprintf(outfile, "Max back reference = %d\n", backrefmax);
3926 
3927   if (maxlookbehind > 0)
3928     fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3929 
3930   if (match_limit_set)
3931     fprintf(outfile, "Match limit = %u\n", match_limit);
3932 
3933   if (recursion_limit_set)
3934     fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
3935 
3936   if (namecount > 0)
3937     {
3938     fprintf(outfile, "Named capturing subpatterns:\n");
3939     for (; namecount > 0; namecount--)
3940       {
3941       int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
3942       uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
3943       fprintf(outfile, "  ");
3944       PCHARSV(nametable, imm2_size, length, FALSE, outfile);
3945       while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3946 #ifdef SUPPORT_PCRE2_32
3947       if (test_mode == PCRE32_MODE)
3948         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
3949 #endif
3950 #ifdef SUPPORT_PCRE2_16
3951       if (test_mode == PCRE16_MODE)
3952         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
3953 #endif
3954 #ifdef SUPPORT_PCRE2_8
3955       if (test_mode == PCRE8_MODE)
3956         fprintf(outfile, "%3d\n", (int)(
3957         ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
3958 #endif
3959       nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
3960       }
3961     }
3962 
3963   if (hascrorlf)     fprintf(outfile, "Contains explicit CR or LF match\n");
3964   if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
3965   if (match_empty)   fprintf(outfile, "May match empty string\n");
3966 
3967   pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
3968   pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
3969 
3970   /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
3971   cluttering up the verification output of non-UTF test files. */
3972 
3973   if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
3974     {
3975     compile_options &= ~PCRE2_NEVER_UTF;
3976     overall_options &= ~PCRE2_NEVER_UTF;
3977     }
3978 
3979   if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
3980     {
3981     compile_options &= ~PCRE2_NEVER_UCP;
3982     overall_options &= ~PCRE2_NEVER_UCP;
3983     }
3984 
3985   if ((compile_options|overall_options) != 0)
3986     {
3987     if (compile_options == overall_options)
3988       show_compile_options(compile_options, "Options:", "\n");
3989     else
3990       {
3991       show_compile_options(compile_options, "Compile options:", "\n");
3992       show_compile_options(overall_options, "Overall options:", "\n");
3993       }
3994     }
3995 
3996   if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3997 
3998   if ((pat_patctl.control & CTL_BSR_SET) != 0 ||
3999       (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4000     fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4001       "any Unicode newline" : "CR, LF, or CRLF");
4002 
4003   if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4004     {
4005     switch (newline_convention)
4006       {
4007       case PCRE2_NEWLINE_CR:
4008       fprintf(outfile, "Forced newline is CR\n");
4009       break;
4010 
4011       case PCRE2_NEWLINE_LF:
4012       fprintf(outfile, "Forced newline is LF\n");
4013       break;
4014 
4015       case PCRE2_NEWLINE_CRLF:
4016       fprintf(outfile, "Forced newline is CRLF\n");
4017       break;
4018 
4019       case PCRE2_NEWLINE_ANYCRLF:
4020       fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4021       break;
4022 
4023       case PCRE2_NEWLINE_ANY:
4024       fprintf(outfile, "Forced newline is any Unicode newline\n");
4025       break;
4026 
4027       default:
4028       break;
4029       }
4030     }
4031 
4032   if (first_ctype == 2)
4033     {
4034     fprintf(outfile, "First code unit at start or follows newline\n");
4035     }
4036   else if (first_ctype == 1)
4037     {
4038     const char *caseless =
4039       ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4040       "" : " (caseless)";
4041     if (PRINTOK(first_cunit))
4042       fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4043     else
4044       {
4045       fprintf(outfile, "First code unit = ");
4046       pchar(first_cunit, FALSE, outfile);
4047       fprintf(outfile, "%s\n", caseless);
4048       }
4049     }
4050   else if (start_bits != NULL)
4051     {
4052     int i;
4053     int c = 24;
4054     fprintf(outfile, "Starting code units: ");
4055     for (i = 0; i < 256; i++)
4056       {
4057       if ((start_bits[i/8] & (1<<(i&7))) != 0)
4058         {
4059         if (c > 75)
4060           {
4061           fprintf(outfile, "\n  ");
4062           c = 2;
4063           }
4064         if (PRINTOK(i) && i != ' ')
4065           {
4066           fprintf(outfile, "%c ", i);
4067           c += 2;
4068           }
4069         else
4070           {
4071           fprintf(outfile, "\\x%02x ", i);
4072           c += 5;
4073           }
4074         }
4075       }
4076     fprintf(outfile, "\n");
4077     }
4078 
4079   if (last_ctype != 0)
4080     {
4081     const char *caseless =
4082       ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4083       "" : " (caseless)";
4084     if (PRINTOK(last_cunit))
4085       fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4086     else
4087       {
4088       fprintf(outfile, "Last code unit = ");
4089       pchar(last_cunit, FALSE, outfile);
4090       fprintf(outfile, "%s\n", caseless);
4091       }
4092     }
4093 
4094   fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4095 
4096   if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4097     {
4098     if (FLD(compiled_code, executable_jit) != NULL)
4099       fprintf(outfile, "JIT compilation was successful\n");
4100     else
4101       {
4102 #ifdef SUPPORT_JIT
4103       int len;
4104       fprintf(outfile, "JIT compilation was not successful");
4105       if (jitrc != 0)
4106         {
4107         fprintf(outfile, " (");
4108         PCRE2_GET_ERROR_MESSAGE(len, jitrc, pbuffer);
4109         PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4110         fprintf(outfile, ")");
4111         }
4112       fprintf(outfile, "\n");
4113 #else
4114       fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4115 #endif
4116       }
4117     }
4118   }
4119 
4120 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4121   {
4122   int errorcode;
4123   PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4124   if (errorcode != 0)
4125     {
4126     int len;
4127     fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4128     if (errorcode < 0)
4129       {
4130       PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4131       PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4132       }
4133     fprintf(outfile, "\n");
4134     return PR_SKIP;
4135     }
4136   }
4137 
4138 return PR_OK;
4139 }
4140 
4141 
4142 
4143 /*************************************************
4144 *              Handle serialization error        *
4145 *************************************************/
4146 
4147 /* Print an error message after a serialization failure.
4148 
4149 Arguments:
4150   rc         the error code
4151   msg        an initial message for what failed
4152 
4153 Returns:     nothing
4154 */
4155 
4156 static void
serial_error(int rc,const char * msg)4157 serial_error(int rc, const char *msg)
4158 {
4159 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4160 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
4161 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
4162 fprintf(outfile, "\n");
4163 }
4164 
4165 
4166 
4167 /*************************************************
4168 *        Open file for save/load commands        *
4169 *************************************************/
4170 
4171 /* This function decodes the file name and opens the file.
4172 
4173 Arguments:
4174   buffptr     point after the #command
4175   mode        open mode
4176   fptr        points to the FILE variable
4177 
4178 Returns:      PR_OK or PR_ABEND
4179 */
4180 
4181 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr)4182 open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
4183 {
4184 char *endf;
4185 char *filename = (char *)buffptr;
4186 while (isspace(*filename)) filename++;
4187 endf = filename + strlen8(filename);
4188 while (endf > filename && isspace(endf[-1])) endf--;
4189 
4190 if (endf == filename)
4191   {
4192   fprintf(outfile, "** File name expected after #save\n");
4193   return PR_ABEND;
4194   }
4195 
4196 *endf = 0;
4197 *fptr = fopen((const char *)filename, mode);
4198 if (*fptr == NULL)
4199   {
4200   fprintf(outfile, "** Failed to open '%s'\n", filename);
4201   return PR_ABEND;
4202   }
4203 
4204 return PR_OK;
4205 }
4206 
4207 
4208 
4209 /*************************************************
4210 *               Process command line             *
4211 *************************************************/
4212 
4213 /* This function is called for lines beginning with # and a character that is
4214 not ! or whitespace, when encountered between tests, which means that there is
4215 no compiled pattern (compiled_code is NULL). The line is in buffer.
4216 
4217 Arguments:  none
4218 
4219 Returns:    PR_OK     continue processing next line
4220             PR_SKIP   skip to a blank line
4221             PR_ABEND  abort the pcre2test run
4222 */
4223 
4224 static int
process_command(void)4225 process_command(void)
4226 {
4227 FILE *f;
4228 PCRE2_SIZE serial_size;
4229 size_t i;
4230 int rc, cmd, cmdlen;
4231 uint16_t first_listed_newline;
4232 const char *cmdname;
4233 uint8_t *argptr, *serial;
4234 
4235 if (restrict_for_perl_test)
4236   {
4237   fprintf(outfile, "** #-commands are not allowed after #perltest\n");
4238   return PR_ABEND;
4239   }
4240 
4241 cmd = CMD_UNKNOWN;
4242 cmdlen = 0;
4243 
4244 for (i = 0; i < cmdlistcount; i++)
4245   {
4246   cmdname = cmdlist[i].name;
4247   cmdlen = strlen(cmdname);
4248   if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4249       isspace(buffer[cmdlen+1]))
4250     {
4251     cmd = cmdlist[i].value;
4252     break;
4253     }
4254   }
4255 
4256 argptr = buffer + cmdlen + 1;
4257 
4258 switch(cmd)
4259   {
4260   case CMD_UNKNOWN:
4261   fprintf(outfile, "** Unknown command: %s", buffer);
4262   break;
4263 
4264   case CMD_FORBID_UTF:
4265   forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4266   break;
4267 
4268   case CMD_PERLTEST:
4269   restrict_for_perl_test = TRUE;
4270   break;
4271 
4272   /* Set default pattern modifiers */
4273 
4274   case CMD_PATTERN:
4275   (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4276   if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4277     def_patctl.jit = 7;
4278   break;
4279 
4280   /* Set default subject modifiers */
4281 
4282   case CMD_SUBJECT:
4283   (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4284   break;
4285 
4286   /* Check the default newline, and if not one of those listed, set up the
4287   first one to be forced. An empty list unsets. */
4288 
4289   case CMD_NEWLINE_DEFAULT:
4290   local_newline_default = 0;   /* Unset */
4291   first_listed_newline = 0;
4292   for (;;)
4293     {
4294     while (isspace(*argptr)) argptr++;
4295     if (*argptr == 0) break;
4296     for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4297       {
4298       size_t nlen = strlen(newlines[i]);
4299       if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4300           isspace(argptr[nlen]))
4301         {
4302         if (i == NEWLINE_DEFAULT) return PR_OK;  /* Default is valid */
4303         if (first_listed_newline == 0) first_listed_newline = i;
4304         }
4305       }
4306     while (*argptr != 0 && !isspace(*argptr)) argptr++;
4307     }
4308   local_newline_default = first_listed_newline;
4309   break;
4310 
4311   /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4312   the compiled pattern (e.g. to give information) are permitted. The default
4313   pattern modifiers are ignored. */
4314 
4315   case CMD_POP:
4316   case CMD_POPCOPY:
4317   if (patstacknext <= 0)
4318     {
4319     fprintf(outfile, "** Can't pop off an empty stack\n");
4320     return PR_SKIP;
4321     }
4322   memset(&pat_patctl, 0, sizeof(patctl));   /* Completely unset */
4323   if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4324     return PR_SKIP;
4325 
4326   if (cmd == CMD_POP)
4327     {
4328     SET(compiled_code, patstack[--patstacknext]);
4329     }
4330   else
4331     {
4332     PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4333     }
4334 
4335   if (pat_patctl.jit != 0)
4336     {
4337     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4338     }
4339   if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4340   if ((pat_patctl.control & CTL_ANYINFO) != 0)
4341     {
4342     rc = show_pattern_info();
4343     if (rc != PR_OK) return rc;
4344     }
4345   break;
4346 
4347   /* Save the stack of compiled patterns to a file, then empty the stack. */
4348 
4349   case CMD_SAVE:
4350   if (patstacknext <= 0)
4351     {
4352     fprintf(outfile, "** No stacked patterns to save\n");
4353     return PR_OK;
4354     }
4355 
4356   rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
4357   if (rc != PR_OK) return rc;
4358 
4359   PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4360     general_context);
4361   if (rc < 0)
4362     {
4363     serial_error(rc, "Serialization");
4364     break;
4365     }
4366 
4367   /* Write the length at the start of the file to make it straightforward to
4368   get the right memory when re-loading. This saves having to read the file size
4369   in different operating systems. To allow for different endianness (even
4370   though reloading with the opposite endianness does not work), write the
4371   length byte-by-byte. */
4372 
4373   for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
4374   if (fwrite(serial, 1, serial_size, f) != serial_size)
4375     {
4376     fprintf(outfile, "** Wrong return from fwrite()\n");
4377     return PR_ABEND;
4378     }
4379 
4380   fclose(f);
4381   PCRE2_SERIALIZE_FREE(serial);
4382   while(patstacknext > 0)
4383     {
4384     SET(compiled_code, patstack[--patstacknext]);
4385     SUB1(pcre2_code_free, compiled_code);
4386     }
4387   SET(compiled_code, NULL);
4388   break;
4389 
4390   /* Load a set of compiled patterns from a file onto the stack */
4391 
4392   case CMD_LOAD:
4393   rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
4394   if (rc != PR_OK) return rc;
4395 
4396   serial_size = 0;
4397   for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
4398 
4399   serial = malloc(serial_size);
4400   if (serial == NULL)
4401     {
4402     fprintf(outfile, "** Failed to get memory (size %lu) for #load\n",
4403       (unsigned long int)serial_size);
4404     return PR_ABEND;
4405     }
4406 
4407   if (fread(serial, 1, serial_size, f) != serial_size)
4408     {
4409     fprintf(outfile, "** Wrong return from fread()\n");
4410     return PR_ABEND;
4411     }
4412   fclose(f);
4413 
4414   PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
4415   if (rc < 0) serial_error(rc, "Get number of codes"); else
4416     {
4417     if (rc + patstacknext > PATSTACKSIZE)
4418       {
4419       fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
4420         rc, (rc == 1)? "" : "s");
4421       rc = PATSTACKSIZE - patstacknext;
4422       fprintf(outfile, "** Decoding %d pattern%s\n", rc,
4423         (rc == 1)? "" : "s");
4424       }
4425     PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
4426       general_context);
4427     if (rc < 0) serial_error(rc, "Deserialization");
4428       else patstacknext += rc;
4429     }
4430 
4431   free(serial);
4432   break;
4433   }
4434 
4435 return PR_OK;
4436 }
4437 
4438 
4439 
4440 /*************************************************
4441 *               Process pattern line             *
4442 *************************************************/
4443 
4444 /* This function is called when the input buffer contains the start of a
4445 pattern. The first character is known to be a valid delimiter. The pattern is
4446 read, modifiers are interpreted, and a suitable local context is set up for
4447 this test. The pattern is then compiled.
4448 
4449 Arguments:  none
4450 
4451 Returns:    PR_OK     continue processing next line
4452             PR_SKIP   skip to a blank line
4453             PR_ABEND  abort the pcre2test run
4454 */
4455 
4456 static int
process_pattern(void)4457 process_pattern(void)
4458 {
4459 BOOL utf;
4460 uint32_t k;
4461 uint8_t *p = buffer;
4462 const uint8_t *use_tables;
4463 unsigned int delimiter = *p++;
4464 int errorcode;
4465 void *use_pat_context;
4466 PCRE2_SIZE patlen;
4467 PCRE2_SIZE erroroffset;
4468 
4469 /* Initialize the context and pattern/data controls for this test from the
4470 defaults. */
4471 
4472 PATCTXCPY(pat_context, default_pat_context);
4473 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
4474 
4475 /* Find the end of the pattern, reading more lines if necessary. */
4476 
4477 for(;;)
4478   {
4479   while (*p != 0)
4480     {
4481     if (*p == '\\' && p[1] != 0) p++;
4482       else if (*p == delimiter) break;
4483     p++;
4484     }
4485   if (*p != 0) break;
4486   if ((p = extend_inputline(infile, p, "    > ")) == NULL)
4487     {
4488     fprintf(outfile, "** Unexpected EOF\n");
4489     return PR_ABEND;
4490     }
4491   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
4492   }
4493 
4494 /* If the first character after the delimiter is backslash, make the pattern
4495 end with backslash. This is purely to provide a way of testing for the error
4496 message when a pattern ends with backslash. */
4497 
4498 if (p[1] == '\\') *p++ = '\\';
4499 
4500 /* Terminate the pattern at the delimiter, and compute the length. */
4501 
4502 *p++ = 0;
4503 patlen = p - buffer - 2;
4504 
4505 /* Look for modifiers and options after the final delimiter. */
4506 
4507 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
4508 utf = (pat_patctl.options & PCRE2_UTF) != 0;
4509 
4510 /* Check for mutually exclusive modifiers. At present, these are all in the
4511 first control word. */
4512 
4513 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
4514   {
4515   uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
4516   if (c != 0 && c != (c & (~c+1)))
4517     {
4518     show_controls(c, 0, "** Not allowed together:");
4519     fprintf(outfile, "\n");
4520     return PR_SKIP;
4521     }
4522   }
4523 
4524 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
4525 specified. */
4526 
4527 if (pat_patctl.jit == 0 &&
4528     (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
4529   pat_patctl.jit = 7;
4530 
4531 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
4532 in callouts. Convert from hex if requested (literal strings in quotes may be
4533 present within the hexadecimal pairs). The result must necessarily be fewer
4534 characters so will always fit in pbuffer8. */
4535 
4536 if ((pat_patctl.control & CTL_HEXPAT) != 0)
4537   {
4538   uint8_t *pp, *pt;
4539   uint32_t c, d;
4540 
4541   pt = pbuffer8;
4542   for (pp = buffer + 1; *pp != 0; pp++)
4543     {
4544     if (isspace(*pp)) continue;
4545     c = *pp++;
4546 
4547     /* Handle a literal substring */
4548 
4549     if (c == '\'' || c == '"')
4550       {
4551       for (;; pp++)
4552         {
4553         d = *pp;
4554         if (d == 0)
4555           {
4556           fprintf(outfile, "** Missing closing quote in hex pattern\n");
4557           return PR_SKIP;
4558           }
4559         if (d == c) break;
4560         *pt++ = d;
4561         }
4562       }
4563 
4564     /* Expect a hex pair */
4565 
4566     else
4567       {
4568       if (!isxdigit(c))
4569         {
4570         fprintf(outfile, "** Unexpected non-hex-digit '%c' in hex pattern: "
4571           "quote missing?\n", c);
4572         return PR_SKIP;
4573         }
4574       if (*pp == 0)
4575         {
4576         fprintf(outfile, "** Odd number of digits in hex pattern\n");
4577         return PR_SKIP;
4578         }
4579       d = *pp;
4580       if (!isxdigit(d))
4581         {
4582         fprintf(outfile, "** Unexpected non-hex-digit '%c' in hex pattern: "
4583           "quote missing?\n", d);
4584         return PR_SKIP;
4585         }
4586       c = toupper(c);
4587       d = toupper(d);
4588       *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
4589                (isdigit(d)? (d - '0') : (d - 'A' + 10));
4590       }
4591     }
4592   *pt = 0;
4593   patlen = pt - pbuffer8;
4594   }
4595 
4596 /* If not a hex string, process for repetition expansion if requested. */
4597 
4598 else if ((pat_patctl.control & CTL_EXPAND) != 0)
4599   {
4600   uint8_t *pp, *pt;
4601 
4602   pt = pbuffer8;
4603   for (pp = buffer + 1; *pp != 0; pp++)
4604     {
4605     uint8_t *pc = pp;
4606     uint32_t count = 1;
4607     size_t length = 1;
4608 
4609     /* Check for replication syntax; if not found, the defaults just set will
4610     prevail and one character will be copied. */
4611 
4612     if (pp[0] == '\\' && pp[1] == '[')
4613       {
4614       uint8_t *pe;
4615       for (pe = pp + 2; *pe != 0; pe++)
4616         {
4617         if (pe[0] == ']' && pe[1] == '{')
4618           {
4619           uint32_t clen = pe - pc - 2;
4620           uint32_t i = 0;
4621           unsigned long uli;
4622           char *endptr;
4623 
4624           pe += 2;
4625           uli = strtoul((const char *)pe, &endptr, 10);
4626           if (U32OVERFLOW(uli))
4627             {
4628             fprintf(outfile, "** Pattern repeat count too large\n");
4629             return PR_SKIP;
4630             }
4631 
4632           i = (uint32_t)uli;
4633           pe = (uint8_t *)endptr;
4634           if (*pe == '}')
4635             {
4636             if (i == 0)
4637               {
4638               fprintf(outfile, "** Zero repeat not allowed\n");
4639               return PR_SKIP;
4640               }
4641             pc += 2;
4642             count = i;
4643             length = clen;
4644             pp = pe;
4645             break;
4646             }
4647           }
4648         }
4649       }
4650 
4651     /* Add to output. If the buffer is too small expand it. The function for
4652     expanding buffers always keeps buffer and pbuffer8 in step as far as their
4653     size goes. */
4654 
4655     while (pt + count * length > pbuffer8 + pbuffer8_size)
4656       {
4657       size_t pc_offset = pc - buffer;
4658       size_t pp_offset = pp - buffer;
4659       size_t pt_offset = pt - pbuffer8;
4660       expand_input_buffers();
4661       pc = buffer + pc_offset;
4662       pp = buffer + pp_offset;
4663       pt = pbuffer8 + pt_offset;
4664       }
4665 
4666     for (; count > 0; count--)
4667       {
4668       memcpy(pt, pc, length);
4669       pt += length;
4670       }
4671     }
4672 
4673   *pt = 0;
4674   patlen = pt - pbuffer8;
4675 
4676   if ((pat_patctl.control & CTL_INFO) != 0)
4677     fprintf(outfile, "Expanded: %s\n", pbuffer8);
4678   }
4679 
4680 /* Neither hex nor expanded, just copy the input verbatim. */
4681 
4682 else
4683   {
4684   strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
4685   }
4686 
4687 /* Sort out character tables */
4688 
4689 if (pat_patctl.locale[0] != 0)
4690   {
4691   if (pat_patctl.tables_id != 0)
4692     {
4693     fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
4694     return PR_SKIP;
4695     }
4696   if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
4697     {
4698     fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
4699     return PR_SKIP;
4700     }
4701   if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
4702     {
4703     strcpy((char *)locale_name, (char *)pat_patctl.locale);
4704     if (locale_tables != NULL) free((void *)locale_tables);
4705     PCRE2_MAKETABLES(locale_tables);
4706     }
4707   use_tables = locale_tables;
4708   }
4709 
4710 else switch (pat_patctl.tables_id)
4711   {
4712   case 0: use_tables = NULL; break;
4713   case 1: use_tables = tables1; break;
4714   case 2: use_tables = tables2; break;
4715   default:
4716   fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
4717   return PR_SKIP;
4718   }
4719 
4720 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
4721 
4722 /* Set up for the stackguard test. */
4723 
4724 if (pat_patctl.stackguard_test != 0)
4725   {
4726   PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
4727   }
4728 
4729 /* Handle compiling via the POSIX interface, which doesn't support the
4730 timing, showing, or debugging options, nor the ability to pass over
4731 local character tables. Neither does it have 16-bit or 32-bit support. */
4732 
4733 if ((pat_patctl.control & CTL_POSIX) != 0)
4734   {
4735 #ifdef SUPPORT_PCRE2_8
4736   int rc;
4737   int cflags = 0;
4738   const char *msg = "** Ignored with POSIX interface:";
4739 #endif
4740 
4741   if (test_mode != 8)
4742     {
4743     fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
4744     return PR_SKIP;
4745     }
4746 
4747 #ifdef SUPPORT_PCRE2_8
4748   /* Check for features that the POSIX interface does not support. */
4749 
4750   if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
4751   if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
4752   if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
4753   if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
4754   if (timeit > 0) prmsg(&msg, "timing");
4755   if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
4756 
4757   if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
4758     {
4759     show_compile_options(
4760       pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
4761     msg = "";
4762     }
4763   if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
4764       (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
4765     {
4766     show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
4767       pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
4768     msg = "";
4769     }
4770 
4771   if (local_newline_default != 0) prmsg(&msg, "#newline_default");
4772 
4773   if (msg[0] == 0) fprintf(outfile, "\n");
4774 
4775   /* Translate PCRE2 options to POSIX options and then compile. */
4776 
4777   if (utf) cflags |= REG_UTF;
4778   if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
4779   if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
4780   if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
4781   if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
4782   if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
4783   if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
4784 
4785   rc = regcomp(&preg, (char *)pbuffer8, cflags);
4786 
4787   /* Compiling failed */
4788 
4789   if (rc != 0)
4790     {
4791     size_t bsize, usize;
4792     int psize;
4793 
4794     preg.re_pcre2_code = NULL;     /* In case something was left in there */
4795     preg.re_match_data = NULL;
4796 
4797     bsize = (pat_patctl.regerror_buffsize != 0)?
4798       pat_patctl.regerror_buffsize : pbuffer8_size;
4799     if (bsize + 8 < pbuffer8_size)
4800       memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
4801     usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
4802 
4803     /* Inside regerror(), snprintf() is used. If the buffer is too small, some
4804     versions of snprintf() put a zero byte at the end, but others do not.
4805     Therefore, we print a maximum of one less than the size of the buffer. */
4806 
4807     psize = (int)bsize - 1;
4808     fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
4809     if (usize > bsize)
4810       {
4811       fprintf(outfile, "** regerror() message truncated\n");
4812       if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
4813         fprintf(outfile, "** regerror() buffer overflow\n");
4814       }
4815     return PR_SKIP;
4816     }
4817 
4818   /* Compiling succeeded. Check that the values in the preg block are sensible.
4819   It can happen that pcre2test is accidentally linked with a different POSIX
4820   library which succeeds, but of course puts different things into preg. In
4821   this situation, calling regfree() may cause a segfault (or invalid free() in
4822   valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
4823   calling of regfree() on exit. */
4824 
4825   if (preg.re_pcre2_code == NULL ||
4826       ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
4827       ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
4828       preg.re_match_data == NULL ||
4829       preg.re_cflags != cflags)
4830     {
4831     fprintf(outfile,
4832       "** The regcomp() function returned zero (success), but the values set\n"
4833       "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
4834       "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
4835       "** some other POSIX regex library.\n**\n");
4836     preg.re_pcre2_code = NULL;
4837     return PR_ABEND;
4838     }
4839 
4840   return PR_OK;
4841 #endif  /* SUPPORT_PCRE2_8 */
4842   }
4843 
4844 /* Handle compiling via the native interface. Controls that act later are
4845 ignored with "push". Replacements are locked out. */
4846 
4847 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY)) != 0)
4848   {
4849   if (pat_patctl.replacement[0] != 0)
4850     {
4851     fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
4852     return PR_OK;
4853     }
4854   if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
4855       (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
4856     {
4857     show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
4858                   pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
4859       "** Ignored when compiled pattern is stacked with 'push':");
4860     fprintf(outfile, "\n");
4861     }
4862   if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
4863       (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
4864     {
4865     show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
4866                   pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
4867       "** Applies only to compile when pattern is stacked with 'push':");
4868     fprintf(outfile, "\n");
4869     }
4870   }
4871 
4872 /* Convert the input in non-8-bit modes. */
4873 
4874 errorcode = 0;
4875 
4876 #ifdef SUPPORT_PCRE2_16
4877 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
4878 #endif
4879 
4880 #ifdef SUPPORT_PCRE2_32
4881 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
4882 #endif
4883 
4884 switch(errorcode)
4885   {
4886   case -1:
4887   fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
4888     "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
4889   return PR_SKIP;
4890 
4891   case -2:
4892   fprintf(outfile, "** Failed: character value greater than 0x10ffff "
4893     "cannot be converted to UTF\n");
4894   return PR_SKIP;
4895 
4896   case -3:
4897   fprintf(outfile, "** Failed: character value greater than 0xffff "
4898     "cannot be converted to 16-bit in non-UTF mode\n");
4899   return PR_SKIP;
4900 
4901   default:
4902   break;
4903   }
4904 
4905 /* The pattern is now in pbuffer[8|16|32], with the length in patlen. By
4906 default, however, we pass a zero-terminated pattern. The length is passed only
4907 if we had a hex pattern. */
4908 
4909 if ((pat_patctl.control & CTL_HEXPAT) == 0) patlen = PCRE2_ZERO_TERMINATED;
4910 
4911 /* If #newline_default has been used and the library was not compiled with an
4912 appropriate default newline setting, local_newline_default will be non-zero. We
4913 use this if there is no explicit newline modifier. */
4914 
4915 if ((pat_patctl.control & CTL_NL_SET) == 0 && local_newline_default != 0)
4916   {
4917   SETFLD(pat_context, newline_convention, local_newline_default);
4918   }
4919 
4920 /* The nullcontext modifier is used to test calling pcre2_compile() with a NULL
4921 context. */
4922 
4923 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
4924   NULL : PTR(pat_context);
4925 
4926 /* Compile many times when timing. */
4927 
4928 if (timeit > 0)
4929   {
4930   register int i;
4931   clock_t time_taken = 0;
4932   for (i = 0; i < timeit; i++)
4933     {
4934     clock_t start_time = clock();
4935     PCRE2_COMPILE(compiled_code, pbuffer, patlen,
4936       pat_patctl.options|forbid_utf, &errorcode, &erroroffset, use_pat_context);
4937     time_taken += clock() - start_time;
4938     if (TEST(compiled_code, !=, NULL))
4939       { SUB1(pcre2_code_free, compiled_code); }
4940     }
4941   total_compile_time += time_taken;
4942   fprintf(outfile, "Compile time %.4f milliseconds\n",
4943     (((double)time_taken * 1000.0) / (double)timeit) /
4944       (double)CLOCKS_PER_SEC);
4945   }
4946 
4947 /* A final compile that is used "for real". */
4948 
4949 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf,
4950   &errorcode, &erroroffset, use_pat_context);
4951 
4952 /* Compilation failed; go back for another re, skipping to blank line
4953 if non-interactive. */
4954 
4955 if (TEST(compiled_code, ==, NULL))
4956   {
4957   int len;
4958   fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
4959     (int)erroroffset);
4960   PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4961   PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4962   fprintf(outfile, "\n");
4963   return PR_SKIP;
4964   }
4965 
4966 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
4967 locked out at compile time, but we must also check for occurrences of \P, \p,
4968 and \X, which are only supported when Unicode is supported. */
4969 
4970 if (forbid_utf != 0)
4971   {
4972   if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
4973     {
4974     fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
4975       "#forbid_utf command\n");
4976     return PR_SKIP;
4977     }
4978   }
4979 
4980 /* Remember the maximum lookbehind, for partial matching. */
4981 
4982 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
4983   return PR_ABEND;
4984 
4985 /* Call the JIT compiler if requested. When timing, we must free and recompile
4986 the pattern each time because that is the only way to free the JIT compiled
4987 code. We know that compilation will always succeed. */
4988 
4989 if (pat_patctl.jit != 0)
4990   {
4991   if (timeit > 0)
4992     {
4993     register int i;
4994     clock_t time_taken = 0;
4995     for (i = 0; i < timeit; i++)
4996       {
4997       clock_t start_time;
4998       SUB1(pcre2_code_free, compiled_code);
4999       PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5000         pat_patctl.options|forbid_utf, &errorcode, &erroroffset,
5001         use_pat_context);
5002       start_time = clock();
5003       PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit);
5004       time_taken += clock() - start_time;
5005       }
5006     total_jit_compile_time += time_taken;
5007     fprintf(outfile, "JIT compile  %.4f milliseconds\n",
5008       (((double)time_taken * 1000.0) / (double)timeit) /
5009         (double)CLOCKS_PER_SEC);
5010     }
5011   else
5012     {
5013     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5014     }
5015   }
5016 
5017 /* If an explicit newline modifier was given, set the information flag in the
5018 pattern so that it is preserved over push/pop. */
5019 
5020 if ((pat_patctl.control & CTL_NL_SET) != 0)
5021   {
5022   SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5023   }
5024 
5025 /* Output code size and other information if requested. */
5026 
5027 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5028 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5029   {
5030   int rc = show_pattern_info();
5031   if (rc != PR_OK) return rc;
5032   }
5033 
5034 /* The "push" control requests that the compiled pattern be remembered on a
5035 stack. This is mainly for testing the serialization functionality. */
5036 
5037 if ((pat_patctl.control & CTL_PUSH) != 0)
5038   {
5039   if (patstacknext >= PATSTACKSIZE)
5040     {
5041     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5042     return PR_ABEND;
5043     }
5044   patstack[patstacknext++] = PTR(compiled_code);
5045   SET(compiled_code, NULL);
5046   }
5047 
5048 /* The "pushcopy" control is similar, but pushes a copy of the pattern. This
5049 tests the pcre2_code_copy() function. */
5050 
5051 if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5052   {
5053   if (patstacknext >= PATSTACKSIZE)
5054     {
5055     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5056     return PR_ABEND;
5057     }
5058   PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5059   }
5060 
5061 return PR_OK;
5062 }
5063 
5064 
5065 
5066 /*************************************************
5067 *        Check match or recursion limit          *
5068 *************************************************/
5069 
5070 static int
check_match_limit(uint8_t * pp,size_t ulen,int errnumber,const char * msg)5071 check_match_limit(uint8_t *pp, size_t ulen, int errnumber, const char *msg)
5072 {
5073 int capcount;
5074 uint32_t min = 0;
5075 uint32_t mid = 64;
5076 uint32_t max = UINT32_MAX;
5077 
5078 PCRE2_SET_MATCH_LIMIT(dat_context, max);
5079 PCRE2_SET_RECURSION_LIMIT(dat_context, max);
5080 
5081 for (;;)
5082   {
5083   if (errnumber == PCRE2_ERROR_MATCHLIMIT)
5084     {
5085     PCRE2_SET_MATCH_LIMIT(dat_context, mid);
5086     }
5087   else
5088     {
5089     PCRE2_SET_RECURSION_LIMIT(dat_context, mid);
5090     }
5091 
5092   if ((pat_patctl.control & CTL_JITFAST) != 0)
5093     PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5094       dat_datctl.options, match_data, PTR(dat_context));
5095   else
5096     PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5097       dat_datctl.options, match_data, PTR(dat_context));
5098 
5099   if (capcount == errnumber)
5100     {
5101     min = mid;
5102     mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
5103     }
5104   else if (capcount >= 0 ||
5105            capcount == PCRE2_ERROR_NOMATCH ||
5106            capcount == PCRE2_ERROR_PARTIAL)
5107     {
5108     if (mid == min + 1)
5109       {
5110       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
5111       break;
5112       }
5113     max = mid;
5114     mid = (min + mid)/2;
5115     }
5116   else break;    /* Some other error */
5117   }
5118 
5119 return capcount;
5120 }
5121 
5122 
5123 
5124 /*************************************************
5125 *              Callout function                  *
5126 *************************************************/
5127 
5128 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
5129 we are in the match. Yield zero unless more callouts than the fail count, or
5130 the callout data is not zero. The only differences in the callout block for
5131 different code unit widths are that the pointers to the subject, the most
5132 recent MARK, and a callout argument string point to strings of the appropriate
5133 width. Casts can be used to deal with this.
5134 
5135 Argument:  a pointer to a callout block
5136 Return:
5137 */
5138 
5139 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)5140 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
5141 {
5142 uint32_t i, pre_start, post_start, subject_length;
5143 PCRE2_SIZE current_position;
5144 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5145 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
5146 
5147 /* This FILE is used for echoing the subject. This is done only once in simple
5148 cases. */
5149 
5150 FILE *f = (first_callout || callout_capture || cb->callout_string != NULL)?
5151   outfile : NULL;
5152 
5153 /* For a callout with a string argument, show the string first because there
5154 isn't a tidy way to fit it in the rest of the data. */
5155 
5156 if (cb->callout_string != NULL)
5157   {
5158   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
5159   fprintf(outfile, "Callout (%lu): %c",
5160     (unsigned long int)cb->callout_string_offset, delimiter);
5161   PCHARSV(cb->callout_string, 0,
5162     cb->callout_string_length, utf, outfile);
5163   for (i = 0; callout_start_delims[i] != 0; i++)
5164     if (delimiter == callout_start_delims[i])
5165       {
5166       delimiter = callout_end_delims[i];
5167       break;
5168       }
5169   fprintf(outfile, "%c", delimiter);
5170   if (!callout_capture) fprintf(outfile, "\n");
5171   }
5172 
5173 /* Show captured strings if required */
5174 
5175 if (callout_capture)
5176   {
5177   if (cb->callout_string == NULL)
5178     fprintf(outfile, "Callout %d:", cb->callout_number);
5179   fprintf(outfile, " last capture = %d\n", cb->capture_last);
5180   for (i = 0; i < cb->capture_top * 2; i += 2)
5181     {
5182     fprintf(outfile, "%2d: ", i/2);
5183     if (cb->offset_vector[i] == PCRE2_UNSET)
5184       fprintf(outfile, "<unset>");
5185     else
5186       {
5187       PCHARSV(cb->subject, cb->offset_vector[i],
5188         cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
5189       }
5190     fprintf(outfile, "\n");
5191     }
5192   }
5193 
5194 /* Re-print the subject in canonical form (with escapes for non-printing
5195 characters), the first time, or if giving full details. On subsequent calls in
5196 the same match, we use PCHARS() just to find the printed lengths of the
5197 substrings. */
5198 
5199 if (f != NULL) fprintf(f, "--->");
5200 
5201 /* The subject before the match start. */
5202 
5203 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
5204 
5205 /* If a lookbehind is involved, the current position may be earlier than the
5206 match start. If so, use the match start instead. */
5207 
5208 current_position = (cb->current_position >= cb->start_match)?
5209   cb->current_position : cb->start_match;
5210 
5211 /* The subject between the match start and the current position. */
5212 
5213 PCHARS(post_start, cb->subject, cb->start_match,
5214   current_position - cb->start_match, utf, f);
5215 
5216 /* Print from the current position to the end. */
5217 
5218 PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
5219   utf, f);
5220 
5221 /* Calculate the total subject printed length (no print). */
5222 
5223 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
5224 
5225 if (f != NULL) fprintf(f, "\n");
5226 
5227 /* For automatic callouts, show the pattern offset. Otherwise, for a numerical
5228 callout whose number has not already been shown with captured strings, show the
5229 number here. A callout with a string argument has been displayed above. */
5230 
5231 if (cb->callout_number == 255)
5232   {
5233   fprintf(outfile, "%+3d ", (int)cb->pattern_position);
5234   if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
5235   }
5236 else
5237   {
5238   if (callout_capture || cb->callout_string != NULL) fprintf(outfile, "    ");
5239     else fprintf(outfile, "%3d ", cb->callout_number);
5240   }
5241 
5242 /* Now show position indicators */
5243 
5244 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
5245 fprintf(outfile, "^");
5246 
5247 if (post_start > 0)
5248   {
5249   for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
5250   fprintf(outfile, "^");
5251   }
5252 
5253 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
5254   fprintf(outfile, " ");
5255 
5256 fprintf(outfile, "%.*s",
5257   (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
5258   pbuffer8 + cb->pattern_position);
5259 
5260 fprintf(outfile, "\n");
5261 first_callout = FALSE;
5262 
5263 if (cb->mark != last_callout_mark)
5264   {
5265   if (cb->mark == NULL)
5266     fprintf(outfile, "Latest Mark: <unset>\n");
5267   else
5268     {
5269     fprintf(outfile, "Latest Mark: ");
5270     PCHARSV(cb->mark, 0, -1, utf, outfile);
5271     putc('\n', outfile);
5272     }
5273   last_callout_mark = cb->mark;
5274   }
5275 
5276 if (callout_data_ptr != NULL)
5277   {
5278   int callout_data = *((int32_t *)callout_data_ptr);
5279   if (callout_data != 0)
5280     {
5281     fprintf(outfile, "Callout data = %d\n", callout_data);
5282     return callout_data;
5283     }
5284   }
5285 
5286 return (cb->callout_number != dat_datctl.cfail[0])? 0 :
5287        (++callout_count >= dat_datctl.cfail[1])? 1 : 0;
5288 }
5289 
5290 
5291 
5292 /*************************************************
5293 *       Handle *MARK and copy/get tests          *
5294 *************************************************/
5295 
5296 /* This function is called after complete and partial matches. It runs the
5297 tests for substring extraction.
5298 
5299 Arguments:
5300   utf       TRUE for utf
5301   capcount  return from pcre2_match()
5302 
5303 Returns:    nothing
5304 */
5305 
5306 static void
copy_and_get(BOOL utf,int capcount)5307 copy_and_get(BOOL utf, int capcount)
5308 {
5309 int i;
5310 uint8_t *nptr;
5311 
5312 /* Test copy strings by number */
5313 
5314 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
5315   {
5316   int rc;
5317   PCRE2_SIZE length, length2;
5318   uint32_t copybuffer[256];
5319   uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
5320   length = sizeof(copybuffer)/code_unit_size;
5321   PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
5322   if (rc < 0)
5323     {
5324     fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
5325     PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5326     PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5327     fprintf(outfile, "\n");
5328     }
5329   else
5330     {
5331     PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
5332     if (rc < 0)
5333       {
5334       fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
5335       PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5336       PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5337       fprintf(outfile, "\n");
5338       }
5339     else if (length2 != length)
5340       {
5341       fprintf(outfile, "Mismatched substring lengths: %lu %lu\n",
5342         (unsigned long int)length, (unsigned long int)length2);
5343       }
5344     fprintf(outfile, "%2dC ", n);
5345     PCHARSV(copybuffer, 0, length, utf, outfile);
5346     fprintf(outfile, " (%lu)\n", (unsigned long)length);
5347     }
5348   }
5349 
5350 /* Test copy strings by name */
5351 
5352 nptr = dat_datctl.copy_names;
5353 for (;;)
5354   {
5355   int rc;
5356   int groupnumber;
5357   PCRE2_SIZE length, length2;
5358   uint32_t copybuffer[256];
5359   int namelen = strlen((const char *)nptr);
5360 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
5361   PCRE2_SIZE cnl = namelen;
5362 #endif
5363   if (namelen == 0) break;
5364 
5365 #ifdef SUPPORT_PCRE2_8
5366   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
5367 #endif
5368 #ifdef SUPPORT_PCRE2_16
5369   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
5370 #endif
5371 #ifdef SUPPORT_PCRE2_32
5372   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
5373 #endif
5374 
5375   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
5376   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
5377     fprintf(outfile, "Number not found for group '%s'\n", nptr);
5378 
5379   length = sizeof(copybuffer)/code_unit_size;
5380   PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
5381   if (rc < 0)
5382     {
5383     fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
5384     PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5385     PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5386     fprintf(outfile, "\n");
5387     }
5388   else
5389     {
5390     PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
5391     if (rc < 0)
5392       {
5393       fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
5394       PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5395       PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5396       fprintf(outfile, "\n");
5397       }
5398     else if (length2 != length)
5399       {
5400       fprintf(outfile, "Mismatched substring lengths: %lu %lu\n",
5401         (unsigned long int)length, (unsigned long int)length2);
5402       }
5403     fprintf(outfile, "  C ");
5404     PCHARSV(copybuffer, 0, length, utf, outfile);
5405     fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr);
5406     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
5407       else fprintf(outfile, " (non-unique)\n");
5408     }
5409   nptr += namelen + 1;
5410   }
5411 
5412 /* Test get strings by number */
5413 
5414 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
5415   {
5416   int rc;
5417   PCRE2_SIZE length;
5418   void *gotbuffer;
5419   uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
5420   PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
5421   if (rc < 0)
5422     {
5423     fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
5424     PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5425     PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5426     fprintf(outfile, "\n");
5427     }
5428   else
5429     {
5430     fprintf(outfile, "%2dG ", n);
5431     PCHARSV(gotbuffer, 0, length, utf, outfile);
5432     fprintf(outfile, " (%lu)\n", (unsigned long)length);
5433     PCRE2_SUBSTRING_FREE(gotbuffer);
5434     }
5435   }
5436 
5437 /* Test get strings by name */
5438 
5439 nptr = dat_datctl.get_names;
5440 for (;;)
5441   {
5442   PCRE2_SIZE length;
5443   void *gotbuffer;
5444   int rc;
5445   int groupnumber;
5446   int namelen = strlen((const char *)nptr);
5447 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
5448   PCRE2_SIZE cnl = namelen;
5449 #endif
5450   if (namelen == 0) break;
5451 
5452 #ifdef SUPPORT_PCRE2_8
5453   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
5454 #endif
5455 #ifdef SUPPORT_PCRE2_16
5456   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
5457 #endif
5458 #ifdef SUPPORT_PCRE2_32
5459   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
5460 #endif
5461 
5462   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
5463   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
5464     fprintf(outfile, "Number not found for group '%s'\n", nptr);
5465 
5466   PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
5467   if (rc < 0)
5468     {
5469     fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
5470     PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5471     PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5472     fprintf(outfile, "\n");
5473     }
5474   else
5475     {
5476     fprintf(outfile, "  G ");
5477     PCHARSV(gotbuffer, 0, length, utf, outfile);
5478     fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr);
5479     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
5480       else fprintf(outfile, " (non-unique)\n");
5481     PCRE2_SUBSTRING_FREE(gotbuffer);
5482     }
5483   nptr += namelen + 1;
5484   }
5485 
5486 /* Test getting the complete list of captured strings. */
5487 
5488 if ((dat_datctl.control & CTL_GETALL) != 0)
5489   {
5490   int rc;
5491   void **stringlist;
5492   PCRE2_SIZE *lengths;
5493   PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
5494   if (rc < 0)
5495     {
5496     fprintf(outfile, "get substring list failed (%d): ", rc);
5497     PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5498     PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5499     fprintf(outfile, "\n");
5500     }
5501   else
5502     {
5503     for (i = 0; i < capcount; i++)
5504       {
5505       fprintf(outfile, "%2dL ", i);
5506       PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
5507       putc('\n', outfile);
5508       }
5509     if (stringlist[i] != NULL)
5510       fprintf(outfile, "string list not terminated by NULL\n");
5511     PCRE2_SUBSTRING_LIST_FREE(stringlist);
5512     }
5513   }
5514 }
5515 
5516 
5517 
5518 /*************************************************
5519 *               Process a data line              *
5520 *************************************************/
5521 
5522 /* The line is in buffer; it will not be empty.
5523 
5524 Arguments:  none
5525 
5526 Returns:    PR_OK     continue processing next line
5527             PR_SKIP   skip to a blank line
5528             PR_ABEND  abort the pcre2test run
5529 */
5530 
5531 static int
process_data(void)5532 process_data(void)
5533 {
5534 PCRE2_SIZE len, ulen;
5535 uint32_t gmatched;
5536 uint32_t c, k;
5537 uint32_t g_notempty = 0;
5538 uint8_t *p, *pp, *start_rep;
5539 size_t needlen;
5540 void *use_dat_context;
5541 BOOL utf;
5542 
5543 #ifdef SUPPORT_PCRE2_8
5544 uint8_t *q8 = NULL;
5545 #endif
5546 #ifdef SUPPORT_PCRE2_16
5547 uint16_t *q16 = NULL;
5548 #endif
5549 #ifdef SUPPORT_PCRE2_32
5550 uint32_t *q32 = NULL;
5551 #endif
5552 
5553 /* Copy the default context and data control blocks to the active ones. Then
5554 copy from the pattern the controls that can be set in either the pattern or the
5555 data. This allows them to be overridden in the data line. We do not do this for
5556 options because those that are common apply separately to compiling and
5557 matching. */
5558 
5559 DATCTXCPY(dat_context, default_dat_context);
5560 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
5561 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
5562 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
5563 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
5564 
5565 /* Initialize for scanning the data line. */
5566 
5567 #ifdef SUPPORT_PCRE2_8
5568 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
5569   ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
5570   FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
5571 #else
5572 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5573 #endif
5574 
5575 start_rep = NULL;
5576 len = strlen((const char *)buffer);
5577 while (len > 0 && isspace(buffer[len-1])) len--;
5578 buffer[len] = 0;
5579 p = buffer;
5580 while (isspace(*p)) p++;
5581 
5582 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
5583 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
5584 
5585 if (utf)
5586   {
5587   uint8_t *q;
5588   uint32_t cc;
5589   int n = 1;
5590   for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
5591   if (n <= 0)
5592     {
5593     fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
5594       "in UTF mode\n");
5595     return PR_OK;
5596     }
5597   }
5598 
5599 #ifdef SUPPORT_VALGRIND
5600 /* Mark the dbuffer as addressable but undefined again. */
5601 if (dbuffer != NULL)
5602   {
5603   VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
5604   }
5605 #endif
5606 
5607 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
5608 the number of code units that will be needed (though the buffer may have to be
5609 extended if replication is involved). */
5610 
5611 needlen = (size_t)((len+1) * code_unit_size);
5612 if (dbuffer == NULL || needlen >= dbuffer_size)
5613   {
5614   while (needlen >= dbuffer_size) dbuffer_size *= 2;
5615   dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
5616   if (dbuffer == NULL)
5617     {
5618     fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
5619     exit(1);
5620     }
5621   }
5622 SETCASTPTR(q, dbuffer);  /* Sets q8, q16, or q32, as appropriate. */
5623 
5624 /* Scan the data line, interpreting data escapes, and put the result into a
5625 buffer of the appropriate width. In UTF mode, input can be UTF-8. */
5626 
5627 while ((c = *p++) != 0)
5628   {
5629   int32_t i = 0;
5630   size_t replen;
5631 
5632   /* ] may mark the end of a replicated sequence */
5633 
5634   if (c == ']' && start_rep != NULL)
5635     {
5636     long li;
5637     char *endptr;
5638     size_t qoffset = CAST8VAR(q) - dbuffer;
5639     size_t rep_offset = start_rep - dbuffer;
5640 
5641     if (*p++ != '{')
5642       {
5643       fprintf(outfile, "** Expected '{' after \\[....]\n");
5644       return PR_OK;
5645       }
5646 
5647     li = strtol((const char *)p, &endptr, 10);
5648     if (S32OVERFLOW(li))
5649       {
5650       fprintf(outfile, "** Repeat count too large\n");
5651       return PR_OK;
5652       }
5653 
5654     p = (uint8_t *)endptr;
5655     if (*p++ != '}')
5656       {
5657       fprintf(outfile, "** Expected '}' after \\[...]{...\n");
5658       return PR_OK;
5659       }
5660 
5661     i = (int32_t)li;
5662     if (i-- == 0)
5663       {
5664       fprintf(outfile, "** Zero repeat not allowed\n");
5665       return PR_OK;
5666       }
5667 
5668     replen = CAST8VAR(q) - start_rep;
5669     needlen += replen * i;
5670 
5671     if (needlen >= dbuffer_size)
5672       {
5673       while (needlen >= dbuffer_size) dbuffer_size *= 2;
5674       dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
5675       if (dbuffer == NULL)
5676         {
5677         fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
5678         exit(1);
5679         }
5680       SETCASTPTR(q, dbuffer + qoffset);
5681       start_rep = dbuffer + rep_offset;
5682       }
5683 
5684     while (i-- > 0)
5685       {
5686       memcpy(CAST8VAR(q), start_rep, replen);
5687       SETPLUS(q, replen/code_unit_size);
5688       }
5689 
5690     start_rep = NULL;
5691     continue;
5692     }
5693 
5694   /* Handle a non-escaped character */
5695 
5696   if (c != '\\')
5697     {
5698     if (utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
5699     }
5700 
5701   /* Handle backslash escapes */
5702 
5703   else switch ((c = *p++))
5704     {
5705     case '\\': break;
5706     case 'a': c = CHAR_BEL; break;
5707     case 'b': c = '\b'; break;
5708     case 'e': c = CHAR_ESC; break;
5709     case 'f': c = '\f'; break;
5710     case 'n': c = '\n'; break;
5711     case 'r': c = '\r'; break;
5712     case 't': c = '\t'; break;
5713     case 'v': c = '\v'; break;
5714 
5715     case '0': case '1': case '2': case '3':
5716     case '4': case '5': case '6': case '7':
5717     c -= '0';
5718     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
5719       c = c * 8 + *p++ - '0';
5720     break;
5721 
5722     case 'o':
5723     if (*p == '{')
5724       {
5725       uint8_t *pt = p;
5726       c = 0;
5727       for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
5728         {
5729         if (++i == 12)
5730           fprintf(outfile, "** Too many octal digits in \\o{...} item; "
5731                            "using only the first twelve.\n");
5732         else c = c * 8 + *pt - '0';
5733         }
5734       if (*pt == '}') p = pt + 1;
5735         else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
5736       }
5737     break;
5738 
5739     case 'x':
5740     if (*p == '{')
5741       {
5742       uint8_t *pt = p;
5743       c = 0;
5744 
5745       /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
5746       when isxdigit() is a macro that refers to its argument more than
5747       once. This is banned by the C Standard, but apparently happens in at
5748       least one MacOS environment. */
5749 
5750       for (pt++; isxdigit(*pt); pt++)
5751         {
5752         if (++i == 9)
5753           fprintf(outfile, "** Too many hex digits in \\x{...} item; "
5754                            "using only the first eight.\n");
5755         else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
5756         }
5757       if (*pt == '}')
5758         {
5759         p = pt + 1;
5760         break;
5761         }
5762       /* Not correct form for \x{...}; fall through */
5763       }
5764 
5765     /* \x without {} always defines just one byte in 8-bit mode. This
5766     allows UTF-8 characters to be constructed byte by byte, and also allows
5767     invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
5768     Otherwise, pass it down as data. */
5769 
5770     c = 0;
5771     while (i++ < 2 && isxdigit(*p))
5772       {
5773       c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
5774       p++;
5775       }
5776 #if defined SUPPORT_PCRE2_8
5777     if (utf && (test_mode == PCRE8_MODE))
5778       {
5779       *q8++ = c;
5780       continue;
5781       }
5782 #endif
5783     break;
5784 
5785     case 0:     /* \ followed by EOF allows for an empty line */
5786     p--;
5787     continue;
5788 
5789     case '=':   /* \= terminates the data, starts modifiers */
5790     goto ENDSTRING;
5791 
5792     case '[':   /* \[ introduces a replicated character sequence */
5793     if (start_rep != NULL)
5794       {
5795       fprintf(outfile, "** Nested replication is not supported\n");
5796       return PR_OK;
5797       }
5798     start_rep = CAST8VAR(q);
5799     continue;
5800 
5801     default:
5802     if (isalnum(c))
5803       {
5804       fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
5805       return PR_OK;
5806       }
5807     }
5808 
5809   /* We now have a character value in c that may be greater than 255.
5810   In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
5811   than 127 in UTF mode must have come from \x{...} or octal constructs
5812   because values from \x.. get this far only in non-UTF mode. */
5813 
5814 #ifdef SUPPORT_PCRE2_8
5815   if (test_mode == PCRE8_MODE)
5816     {
5817     if (utf)
5818       {
5819       if (c > 0x7fffffff)
5820         {
5821         fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
5822           "and so cannot be converted to UTF-8\n", c);
5823         return PR_OK;
5824         }
5825       q8 += ord2utf8(c, q8);
5826       }
5827     else
5828       {
5829       if (c > 0xffu)
5830         {
5831         fprintf(outfile, "** Character \\x{%x} is greater than 255 "
5832           "and UTF-8 mode is not enabled.\n", c);
5833         fprintf(outfile, "** Truncation will probably give the wrong "
5834           "result.\n");
5835         }
5836       *q8++ = c;
5837       }
5838     }
5839 #endif
5840 #ifdef SUPPORT_PCRE2_16
5841   if (test_mode == PCRE16_MODE)
5842     {
5843     if (utf)
5844       {
5845       if (c > 0x10ffffu)
5846         {
5847         fprintf(outfile, "** Failed: character \\x{%x} is greater than "
5848           "0x10ffff and so cannot be converted to UTF-16\n", c);
5849         return PR_OK;
5850         }
5851       else if (c >= 0x10000u)
5852         {
5853         c-= 0x10000u;
5854         *q16++ = 0xD800 | (c >> 10);
5855         *q16++ = 0xDC00 | (c & 0x3ff);
5856         }
5857       else
5858         *q16++ = c;
5859       }
5860     else
5861       {
5862       if (c > 0xffffu)
5863         {
5864         fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
5865           "and UTF-16 mode is not enabled.\n", c);
5866         fprintf(outfile, "** Truncation will probably give the wrong "
5867           "result.\n");
5868         }
5869 
5870       *q16++ = c;
5871       }
5872     }
5873 #endif
5874 #ifdef SUPPORT_PCRE2_32
5875   if (test_mode == PCRE32_MODE)
5876     {
5877     *q32++ = c;
5878     }
5879 #endif
5880   }
5881 
5882 ENDSTRING:
5883 SET(*q, 0);
5884 len = CASTVAR(uint8_t *, q) - dbuffer;    /* Length in bytes */
5885 ulen = len/code_unit_size;                /* Length in code units */
5886 
5887 /* If the string was terminated by \= we must now interpret modifiers. */
5888 
5889 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
5890   return PR_OK;
5891 
5892 /* Check for mutually exclusive modifiers. At present, these are all in the
5893 first control word. */
5894 
5895 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
5896   {
5897   c = dat_datctl.control & exclusive_dat_controls[k];
5898   if (c != 0 && c != (c & (~c+1)))
5899     {
5900     show_controls(c, 0, "** Not allowed together:");
5901     fprintf(outfile, "\n");
5902     return PR_OK;
5903     }
5904   }
5905 
5906 if (pat_patctl.replacement[0] != 0 &&
5907     (dat_datctl.control & CTL_NULLCONTEXT) != 0)
5908   {
5909   fprintf(outfile, "** Replacement text is not supported with null_context.\n");
5910   return PR_OK;
5911   }
5912 
5913 /* We now have the subject in dbuffer, with len containing the byte length, and
5914 ulen containing the code unit length. Move the data to the end of the buffer so
5915 that a read over the end can be caught by valgrind or other means. If we have
5916 explicit valgrind support, mark the unused start of the buffer unaddressable.
5917 If we are using the POSIX interface, or testing zero-termination, we must
5918 include the terminating zero in the usable data. */
5919 
5920 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
5921                        (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
5922 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
5923 #ifdef SUPPORT_VALGRIND
5924   VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
5925 #endif
5926 
5927 /* Now pp points to the subject string. POSIX matching is only possible in
5928 8-bit mode, and it does not support timing or other fancy features. Some were
5929 checked at compile time, but we need to check the match-time settings here. */
5930 
5931 #ifdef SUPPORT_PCRE2_8
5932 if ((pat_patctl.control & CTL_POSIX) != 0)
5933   {
5934   int rc;
5935   int eflags = 0;
5936   regmatch_t *pmatch = NULL;
5937   const char *msg = "** Ignored with POSIX interface:";
5938 
5939   if (dat_datctl.cfail[0] != CFAIL_UNSET || dat_datctl.cfail[1] != CFAIL_UNSET)
5940     prmsg(&msg, "callout_fail");
5941   if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
5942     prmsg(&msg, "copy");
5943   if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
5944     prmsg(&msg, "get");
5945   if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
5946 
5947   if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
5948     {
5949     fprintf(outfile, "%s", msg);
5950     show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
5951     msg = "";
5952     }
5953   if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
5954       (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
5955     {
5956     show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
5957                   dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
5958     msg = "";
5959     }
5960 
5961   if (msg[0] == 0) fprintf(outfile, "\n");
5962 
5963   if (dat_datctl.oveccount > 0)
5964     pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
5965   if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
5966   if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
5967   if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
5968 
5969   rc = regexec(&preg, (const char *)pp + dat_datctl.offset,
5970     dat_datctl.oveccount, pmatch, eflags);
5971   if (rc != 0)
5972     {
5973     (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
5974     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
5975     }
5976   else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
5977     fprintf(outfile, "Matched with REG_NOSUB\n");
5978   else if (dat_datctl.oveccount == 0)
5979     fprintf(outfile, "Matched without capture\n");
5980   else
5981     {
5982     size_t i;
5983     for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
5984       {
5985       if (pmatch[i].rm_so >= 0)
5986         {
5987         fprintf(outfile, "%2d: ", (int)i);
5988         PCHARSV(pp, pmatch[i].rm_so,
5989           pmatch[i].rm_eo - pmatch[i].rm_so, utf, outfile);
5990         fprintf(outfile, "\n");
5991         if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
5992             (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
5993           {
5994           fprintf(outfile, "%2d+ ", (int)i);
5995           PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
5996             utf, outfile);
5997           fprintf(outfile, "\n");
5998           }
5999         }
6000       }
6001     }
6002   free(pmatch);
6003   return PR_OK;
6004   }
6005 #endif  /* SUPPORT_PCRE2_8 */
6006 
6007  /* Handle matching via the native interface. Check for consistency of
6008 modifiers. */
6009 
6010 if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS))
6011   {
6012   fprintf(outfile, "** Finding match limits is not relevant for DFA matching: ignored\n");
6013   dat_datctl.control &= ~CTL_FINDLIMITS;
6014   }
6015 
6016 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
6017 matching, even if the JIT compiler was used. */
6018 
6019 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
6020     FLD(compiled_code, executable_jit) != NULL)
6021   {
6022   fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
6023   dat_datctl.control &= ~CTL_ALLUSEDTEXT;
6024   }
6025 
6026 /* Handle passing the subject as zero-terminated. */
6027 
6028 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
6029   ulen = PCRE2_ZERO_TERMINATED;
6030 
6031 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
6032 NULL context. */
6033 
6034 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
6035   NULL : PTR(dat_context);
6036 
6037 /* Enable display of malloc/free if wanted. */
6038 
6039 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
6040 
6041 /* Create and assign a JIT stack if requested. */
6042 
6043 if (dat_datctl.jitstack != 0)
6044   {
6045   if (dat_datctl.jitstack != jit_stack_size)
6046     {
6047     PCRE2_JIT_STACK_FREE(jit_stack);
6048     PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
6049     jit_stack_size = dat_datctl.jitstack;
6050     }
6051   PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
6052   }
6053 
6054 /* Or de-assign */
6055 
6056 else if (jit_stack != NULL)
6057   {
6058   PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
6059   PCRE2_JIT_STACK_FREE(jit_stack);
6060   jit_stack = NULL;
6061   jit_stack_size = 0;
6062   }
6063 
6064 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
6065 if we want to verify that JIT was actually used. */
6066 
6067 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
6068    {
6069    PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
6070    }
6071 
6072 /* Adjust match_data according to size of offsets required. A size of zero
6073 causes a new match data block to be obtained that exactly fits the pattern. */
6074 
6075 if (dat_datctl.oveccount == 0)
6076   {
6077   PCRE2_MATCH_DATA_FREE(match_data);
6078   PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
6079   PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
6080   }
6081 else if (dat_datctl.oveccount <= max_oveccount)
6082   {
6083   SETFLD(match_data, oveccount, dat_datctl.oveccount);
6084   }
6085 else
6086   {
6087   max_oveccount = dat_datctl.oveccount;
6088   PCRE2_MATCH_DATA_FREE(match_data);
6089   PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
6090   }
6091 
6092 /* Replacement processing is ignored for DFA matching. */
6093 
6094 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
6095   {
6096   fprintf(outfile, "** Ignored for DFA matching: replace\n");
6097   dat_datctl.replacement[0] = 0;
6098   }
6099 
6100 /* If a replacement string is provided, call pcre2_substitute() instead of one
6101 of the matching functions. First we have to convert the replacement string to
6102 the appropriate width. */
6103 
6104 if (dat_datctl.replacement[0] != 0)
6105   {
6106   int rc;
6107   uint8_t *pr;
6108   uint8_t rbuffer[REPLACE_BUFFSIZE];
6109   uint8_t nbuffer[REPLACE_BUFFSIZE];
6110   uint32_t xoptions;
6111   PCRE2_SIZE rlen, nsize, erroroffset;
6112   BOOL badutf = FALSE;
6113 
6114 #ifdef SUPPORT_PCRE2_8
6115   uint8_t *r8 = NULL;
6116 #endif
6117 #ifdef SUPPORT_PCRE2_16
6118   uint16_t *r16 = NULL;
6119 #endif
6120 #ifdef SUPPORT_PCRE2_32
6121   uint32_t *r32 = NULL;
6122 #endif
6123 
6124   if (timeitm)
6125     fprintf(outfile, "** Timing is not supported with replace: ignored\n");
6126 
6127   xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
6128                 PCRE2_SUBSTITUTE_GLOBAL) |
6129              (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
6130                 PCRE2_SUBSTITUTE_EXTENDED) |
6131              (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
6132                 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
6133              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
6134                 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
6135              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
6136                 PCRE2_SUBSTITUTE_UNSET_EMPTY);
6137 
6138   SETCASTPTR(r, rbuffer);  /* Sets r8, r16, or r32, as appropriate. */
6139   pr = dat_datctl.replacement;
6140 
6141   /* If the replacement starts with '[<number>]' we interpret that as length
6142   value for the replacement buffer. */
6143 
6144   nsize = REPLACE_BUFFSIZE/code_unit_size;
6145   if (*pr == '[')
6146     {
6147     PCRE2_SIZE n = 0;
6148     while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
6149     if (*pr++ != ']')
6150       {
6151       fprintf(outfile, "Bad buffer size in replacement string\n");
6152       return PR_OK;
6153       }
6154     if (n > nsize)
6155       {
6156       fprintf(outfile, "Replacement buffer setting (%lu) is too large "
6157         "(max %lu)\n", (unsigned long int)n, (unsigned long int)nsize);
6158       return PR_OK;
6159       }
6160     nsize = n;
6161     }
6162 
6163   /* Now copy the replacement string to a buffer of the appropriate width. No
6164   escape processing is done for replacements. In UTF mode, check for an invalid
6165   UTF-8 input string, and if it is invalid, just copy its code units without
6166   UTF interpretation. This provides a means of checking that an invalid string
6167   is detected. Otherwise, UTF-8 can be used to include wide characters in a
6168   replacement. */
6169 
6170   if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
6171 
6172   /* Not UTF or invalid UTF-8: just copy the code units. */
6173 
6174   if (!utf || badutf)
6175     {
6176     while ((c = *pr++) != 0)
6177       {
6178 #ifdef SUPPORT_PCRE2_8
6179       if (test_mode == PCRE8_MODE) *r8++ = c;
6180 #endif
6181 #ifdef SUPPORT_PCRE2_16
6182       if (test_mode == PCRE16_MODE) *r16++ = c;
6183 #endif
6184 #ifdef SUPPORT_PCRE2_32
6185       if (test_mode == PCRE32_MODE) *r32++ = c;
6186 #endif
6187       }
6188     }
6189 
6190   /* Valid UTF-8 replacement string */
6191 
6192   else while ((c = *pr++) != 0)
6193     {
6194     if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
6195 
6196 #ifdef SUPPORT_PCRE2_8
6197     if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
6198 #endif
6199 
6200 #ifdef SUPPORT_PCRE2_16
6201     if (test_mode == PCRE16_MODE)
6202       {
6203       if (c >= 0x10000u)
6204         {
6205         c-= 0x10000u;
6206         *r16++ = 0xD800 | (c >> 10);
6207         *r16++ = 0xDC00 | (c & 0x3ff);
6208         }
6209       else *r16++ = c;
6210       }
6211 #endif
6212 
6213 #ifdef SUPPORT_PCRE2_32
6214     if (test_mode == PCRE32_MODE) *r32++ = c;
6215 #endif
6216     }
6217 
6218   SET(*r, 0);
6219   if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
6220     rlen = PCRE2_ZERO_TERMINATED;
6221   else
6222     rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
6223   PCRE2_SUBSTITUTE(rc, compiled_code, pp, ulen, dat_datctl.offset,
6224     dat_datctl.options|xoptions, match_data, dat_context,
6225     rbuffer, rlen, nbuffer, &nsize);
6226 
6227   if (rc < 0)
6228     {
6229     PCRE2_SIZE msize;
6230     fprintf(outfile, "Failed: error %d", rc);
6231     if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
6232       fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
6233     fprintf(outfile, ": ");
6234     PCRE2_GET_ERROR_MESSAGE(msize, rc, pbuffer);
6235     PCHARSV(CASTVAR(void *, pbuffer), 0, msize, FALSE, outfile);
6236     if (rc == PCRE2_ERROR_NOMEMORY &&
6237         (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
6238       fprintf(outfile, ": %ld code units are needed", (long int)nsize);
6239     }
6240   else
6241     {
6242     fprintf(outfile, "%2d: ", rc);
6243     PCHARSV(nbuffer, 0, nsize, utf, outfile);
6244     }
6245 
6246   fprintf(outfile, "\n");
6247   }   /* End of substitution handling */
6248 
6249 /* When a replacement string is not provided, run a loop for global matching
6250 with one of the basic matching functions. */
6251 
6252 else for (gmatched = 0;; gmatched++)
6253   {
6254   PCRE2_SIZE j;
6255   int capcount;
6256   PCRE2_SIZE *ovector;
6257   PCRE2_SIZE ovecsave[2];
6258 
6259   ovector = FLD(match_data, ovector);
6260 
6261   /* After the first time round a global loop, for a normal global (/g)
6262   iteration, save the current ovector[0,1] so that we can check that they do
6263   change each time. Otherwise a matching bug that returns the same string
6264   causes an infinite loop. It has happened! */
6265 
6266   if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0)
6267     {
6268     ovecsave[0] = ovector[0];
6269     ovecsave[1] = ovector[1];
6270     }
6271 
6272   /* For altglobal (or first time round the loop), set an "unset" value. */
6273 
6274   else ovecsave[0] = ovecsave[1] = PCRE2_UNSET;
6275 
6276   /* Fill the ovector with junk to detect elements that do not get set
6277   when they should be. */
6278 
6279   for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
6280 
6281   /* When matching is via pcre2_match(), we will detect the use of JIT via the
6282   stack callback function. */
6283 
6284   jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
6285 
6286   /* Do timing if required. */
6287 
6288   if (timeitm > 0)
6289     {
6290     register int i;
6291     clock_t start_time, time_taken;
6292 
6293     if ((dat_datctl.control & CTL_DFA) != 0)
6294       {
6295       if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
6296         {
6297         fprintf(outfile, "Timing DFA restarts is not supported\n");
6298         return PR_OK;
6299         }
6300       if (dfa_workspace == NULL)
6301         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6302       start_time = clock();
6303       for (i = 0; i < timeitm; i++)
6304         {
6305         PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen,
6306           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6307           use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
6308         }
6309       }
6310 
6311     else if ((pat_patctl.control & CTL_JITFAST) != 0)
6312       {
6313       start_time = clock();
6314       for (i = 0; i < timeitm; i++)
6315         {
6316         PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen,
6317           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6318           use_dat_context);
6319         }
6320       }
6321 
6322     else
6323       {
6324       start_time = clock();
6325       for (i = 0; i < timeitm; i++)
6326         {
6327         PCRE2_MATCH(capcount, compiled_code, pp, ulen,
6328           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6329           use_dat_context);
6330         }
6331       }
6332     total_match_time += (time_taken = clock() - start_time);
6333     fprintf(outfile, "Match time %.4f milliseconds\n",
6334       (((double)time_taken * 1000.0) / (double)timeitm) /
6335         (double)CLOCKS_PER_SEC);
6336     }
6337 
6338   /* Find the match and recursion limits if requested. The recursion limit
6339   is not relevant for JIT. */
6340 
6341   if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
6342     {
6343     capcount = check_match_limit(pp, ulen, PCRE2_ERROR_MATCHLIMIT, "match");
6344     if (FLD(compiled_code, executable_jit) == NULL)
6345       (void)check_match_limit(pp, ulen, PCRE2_ERROR_RECURSIONLIMIT,
6346         "recursion");
6347     }
6348 
6349   /* Otherwise just run a single match, setting up a callout if required (the
6350   default). */
6351 
6352   else
6353     {
6354     if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
6355       {
6356       PCRE2_SET_CALLOUT(dat_context, callout_function,
6357         (void *)(&dat_datctl.callout_data));
6358       first_callout = TRUE;
6359       last_callout_mark = NULL;
6360       callout_count = 0;
6361       }
6362     else
6363       {
6364       PCRE2_SET_CALLOUT(dat_context, NULL, NULL);  /* No callout */
6365       }
6366 
6367     /* Run a single DFA or NFA match. */
6368 
6369     if ((dat_datctl.control & CTL_DFA) != 0)
6370       {
6371       if (dfa_workspace == NULL)
6372         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6373       if (dfa_matched++ == 0)
6374         dfa_workspace[0] = -1;  /* To catch bad restart */
6375       PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen,
6376         dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6377         use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
6378       if (capcount == 0)
6379         {
6380         fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
6381         capcount = dat_datctl.oveccount;
6382         }
6383       }
6384     else
6385       {
6386       if ((pat_patctl.control & CTL_JITFAST) != 0)
6387         PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6388           dat_datctl.options | g_notempty, match_data, use_dat_context);
6389       else
6390         PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6391           dat_datctl.options | g_notempty, match_data, use_dat_context);
6392       if (capcount == 0)
6393         {
6394         fprintf(outfile, "Matched, but too many substrings\n");
6395         capcount = dat_datctl.oveccount;
6396         }
6397       }
6398     }
6399 
6400   /* The result of the match is now in capcount. First handle a successful
6401   match. */
6402 
6403   if (capcount >= 0)
6404     {
6405     int i;
6406     uint32_t oveccount;
6407 
6408     /* This is a check against a lunatic return value. */
6409 
6410     PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
6411     if (capcount > (int)oveccount)
6412       {
6413       fprintf(outfile,
6414         "** PCRE2 error: returned count %d is too big for ovector count %d\n",
6415         capcount, oveccount);
6416       capcount = oveccount;
6417       if ((dat_datctl.control & CTL_ANYGLOB) != 0)
6418         {
6419         fprintf(outfile, "** Global loop abandoned\n");
6420         dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
6421         }
6422       }
6423 
6424     /* If this is not the first time round a global loop, check that the
6425     returned string has changed. If not, there is a bug somewhere and we must
6426     break the loop because it will go on for ever. We know that there are
6427     always at least two elements in the ovector. */
6428 
6429     if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
6430       {
6431       fprintf(outfile,
6432         "** PCRE2 error: global repeat returned the same string as previous\n");
6433       fprintf(outfile, "** Global loop abandoned\n");
6434       dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
6435       }
6436 
6437     /* "allcaptures" requests showing of all captures in the pattern, to check
6438     unset ones at the end. It may be set on the pattern or the data. Implement
6439     by setting capcount to the maximum. This is not relevant for DFA matching,
6440     so ignore it. */
6441 
6442     if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
6443       {
6444       uint32_t maxcapcount;
6445       if ((dat_datctl.control & CTL_DFA) != 0)
6446         {
6447         fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
6448         }
6449       else
6450         {
6451         if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
6452           return PR_SKIP;
6453         capcount = maxcapcount + 1;   /* Allow for full match */
6454         if (capcount > (int)oveccount) capcount = oveccount;
6455         }
6456       }
6457 
6458     /* Output the captured substrings. Note that, for the matched string,
6459     the use of \K in an assertion can make the start later than the end. */
6460 
6461     for (i = 0; i < 2*capcount; i += 2)
6462       {
6463       PCRE2_SIZE lleft, lmiddle, lright;
6464       PCRE2_SIZE start = ovector[i];
6465       PCRE2_SIZE end = ovector[i+1];
6466 
6467       if (start > end)
6468         {
6469         start = ovector[i+1];
6470         end = ovector[i];
6471         fprintf(outfile, "Start of matched string is beyond its end - "
6472           "displaying from end to start.\n");
6473         }
6474 
6475       fprintf(outfile, "%2d: ", i/2);
6476 
6477       /* Check for an unset group */
6478 
6479       if (start == PCRE2_UNSET)
6480         {
6481         fprintf(outfile, "<unset>\n");
6482         continue;
6483         }
6484 
6485       /* Check for silly offsets, in particular, values that have not been
6486       set when they should have been. */
6487 
6488       if (start > ulen || end > ulen)
6489         {
6490         fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
6491           (unsigned long int)start, (unsigned long int)end);
6492         continue;
6493         }
6494 
6495       /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
6496       JIT, it is disabled above, with a comment.) When the match is done by the
6497       interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
6498       set, and if the leftmost consulted character is before the start of the
6499       match or the rightmost consulted character is past the end of the match,
6500       we want to show all consulted characters for the main matched string, and
6501       indicate which were lookarounds. */
6502 
6503       if (i == 0)
6504         {
6505         BOOL showallused;
6506         PCRE2_SIZE leftchar, rightchar;
6507 
6508         if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
6509           {
6510           leftchar = FLD(match_data, leftchar);
6511           rightchar = FLD(match_data, rightchar);
6512           showallused = i == 0 && (leftchar < start || rightchar > end);
6513           }
6514         else showallused = FALSE;
6515 
6516         if (showallused)
6517           {
6518           PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
6519           PCHARS(lmiddle, pp, start, end - start, utf, outfile);
6520           PCHARS(lright, pp, end, rightchar - end, utf, outfile);
6521           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
6522             fprintf(outfile, " (JIT)");
6523           fprintf(outfile, "\n    ");
6524           for (j = 0; j < lleft; j++) fprintf(outfile, "<");
6525           for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
6526           for (j = 0; j < lright; j++) fprintf(outfile, ">");
6527           }
6528 
6529         /* When a pattern contains \K, the start of match position may be
6530         different to the start of the matched string. When this is the case,
6531         show it when requested. */
6532 
6533         else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
6534           {
6535           PCRE2_SIZE startchar;
6536           PCRE2_GET_STARTCHAR(startchar, match_data);
6537           PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
6538           PCHARSV(pp, start, end - start, utf, outfile);
6539           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
6540             fprintf(outfile, " (JIT)");
6541           if (startchar != start)
6542             {
6543             fprintf(outfile, "\n    ");
6544             for (j = 0; j < lleft; j++) fprintf(outfile, "^");
6545             }
6546           }
6547 
6548         /* Otherwise, just show the matched string. */
6549 
6550         else
6551           {
6552           PCHARSV(pp, start, end - start, utf, outfile);
6553           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
6554             fprintf(outfile, " (JIT)");
6555           }
6556         }
6557 
6558       /* Not the main matched string. Just show it unadorned. */
6559 
6560       else
6561         {
6562         PCHARSV(pp, start, end - start, utf, outfile);
6563         }
6564 
6565       fprintf(outfile, "\n");
6566 
6567       /* Note: don't use the start/end variables here because we want to
6568       show the text from what is reported as the end. */
6569 
6570       if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
6571           (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
6572         {
6573         fprintf(outfile, "%2d+ ", i/2);
6574         PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
6575         fprintf(outfile, "\n");
6576         }
6577       }
6578 
6579     /* Output (*MARK) data if requested */
6580 
6581     if ((dat_datctl.control & CTL_MARK) != 0 &&
6582          TESTFLD(match_data, mark, !=, NULL))
6583       {
6584       fprintf(outfile, "MK: ");
6585       PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
6586       fprintf(outfile, "\n");
6587       }
6588 
6589     /* Process copy/get strings */
6590 
6591     copy_and_get(utf, capcount);
6592 
6593     }    /* End of handling a successful match */
6594 
6595   /* There was a partial match. The value of ovector[0] is the bumpalong point,
6596   that is, startchar, not any \K point that might have been passed. */
6597 
6598   else if (capcount == PCRE2_ERROR_PARTIAL)
6599     {
6600     PCRE2_SIZE poffset;
6601     int backlength;
6602     int rubriclength = 0;
6603 
6604     fprintf(outfile, "Partial match");
6605     if ((dat_datctl.control & CTL_MARK) != 0 &&
6606          TESTFLD(match_data, mark, !=, NULL))
6607       {
6608       fprintf(outfile, ", mark=");
6609       PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf,
6610         outfile);
6611       rubriclength += 7;
6612       }
6613     fprintf(outfile, ": ");
6614     rubriclength += 15;
6615 
6616     poffset = backchars(pp, ovector[0], maxlookbehind, utf);
6617     PCHARS(backlength, pp, poffset, ovector[0] - poffset, utf, outfile);
6618     PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
6619 
6620     if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
6621       fprintf(outfile, " (JIT)");
6622     fprintf(outfile, "\n");
6623 
6624     if (backlength != 0)
6625       {
6626       int i;
6627       for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
6628       for (i = 0; i < backlength; i++) fprintf(outfile, "<");
6629       fprintf(outfile, "\n");
6630       }
6631 
6632     /* Process copy/get strings */
6633 
6634     copy_and_get(utf, 1);
6635 
6636     break;  /* Out of the /g loop */
6637     }       /* End of handling partial match */
6638 
6639   /* Failed to match. If this is a /g or /G loop, we might previously have
6640   set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
6641   If that is the case, this is not necessarily the end. We want to advance the
6642   start offset, and continue. We won't be at the end of the string - that was
6643   checked before setting g_notempty. We achieve the effect by pretending that a
6644   single character was matched.
6645 
6646   Complication arises in the case when the newline convention is "any", "crlf",
6647   or "anycrlf". If the previous match was at the end of a line terminated by
6648   CRLF, an advance of one character just passes the CR, whereas we should
6649   prefer the longer newline sequence, as does the code in pcre2_match().
6650 
6651   Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
6652   character, not one byte. */
6653 
6654   else if (g_notempty != 0)   /* There was a previous null match */
6655     {
6656     uint16_t nl = FLD(compiled_code, newline_convention);
6657     PCRE2_SIZE start_offset = dat_datctl.offset;    /* Where the match was */
6658     PCRE2_SIZE end_offset = start_offset + 1;
6659 
6660     if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
6661          nl == PCRE2_NEWLINE_ANYCRLF) &&
6662         start_offset < ulen - 1 &&
6663         CODE_UNIT(pp, start_offset) == '\r' &&
6664         CODE_UNIT(pp, end_offset) == '\n')
6665       end_offset++;
6666 
6667     else if (utf && test_mode != PCRE32_MODE)
6668       {
6669       if (test_mode == PCRE8_MODE)
6670         {
6671         for (; end_offset < ulen; end_offset++)
6672           if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
6673         }
6674       else  /* 16-bit mode */
6675         {
6676         for (; end_offset < ulen; end_offset++)
6677           if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
6678         }
6679       }
6680 
6681     SETFLDVEC(match_data, ovector, 0, start_offset);
6682     SETFLDVEC(match_data, ovector, 1, end_offset);
6683     }  /* End of handling null match in a global loop */
6684 
6685   /* A "normal" match failure. There will be a negative error number in
6686   capcount. */
6687 
6688   else
6689     {
6690     int mlen;
6691 
6692     switch(capcount)
6693       {
6694       case PCRE2_ERROR_NOMATCH:
6695       if (gmatched == 0)
6696         {
6697         fprintf(outfile, "No match");
6698         if ((dat_datctl.control & CTL_MARK) != 0 &&
6699              TESTFLD(match_data, mark, !=, NULL))
6700           {
6701           fprintf(outfile, ", mark = ");
6702           PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
6703           }
6704         if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
6705           fprintf(outfile, " (JIT)");
6706         fprintf(outfile, "\n");
6707         }
6708       break;
6709 
6710       case PCRE2_ERROR_BADUTFOFFSET:
6711       fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
6712       break;
6713 
6714       default:
6715       fprintf(outfile, "Failed: error %d: ", capcount);
6716       PCRE2_GET_ERROR_MESSAGE(mlen, capcount, pbuffer);
6717       PCHARSV(CASTVAR(void *, pbuffer), 0, mlen, FALSE, outfile);
6718       if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
6719           capcount >= PCRE2_ERROR_UTF32_ERR2)
6720         {
6721         PCRE2_SIZE startchar;
6722         PCRE2_GET_STARTCHAR(startchar, match_data);
6723         fprintf(outfile, " at offset %lu", (unsigned long int)startchar);
6724         }
6725       fprintf(outfile, "\n");
6726       break;
6727       }
6728 
6729     break;  /* Out of the /g loop */
6730     }       /* End of failed match handling */
6731 
6732   /* Control reaches here in two circumstances: (a) after a match, and (b)
6733   after a non-match that immediately followed a match on an empty string when
6734   doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
6735   PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
6736   of one character. So effectively we get here only after a match. If we
6737   are not doing a global search, we are done. */
6738 
6739   if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
6740     {
6741     PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
6742 
6743     /* We must now set up for the next iteration of a global search. If we have
6744     matched an empty string, first check to see if we are at the end of the
6745     subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
6746     does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
6747     at the same point. If this fails it will be picked up above, where a fake
6748     match is set up so that at this point we advance to the next character. */
6749 
6750     if (FLD(match_data, ovector)[0] == end_offset)
6751       {
6752       if (end_offset == ulen) break;      /* End of subject */
6753       g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
6754       }
6755 
6756     /* However, even after matching a non-empty string, there is still one
6757     tricky case. If a pattern contains \K within a lookbehind assertion at the
6758     start, the end of the matched string can be at the offset where the match
6759     started. In the case of a normal /g iteration without special action, this
6760     leads to a loop that keeps on returning the same substring. The loop would
6761     be caught above, but we really want to move on to the next match. */
6762 
6763     else
6764       {
6765       g_notempty = 0;   /* Set for a "normal" repeat */
6766       if ((dat_datctl.control & CTL_GLOBAL) != 0)
6767         {
6768         PCRE2_SIZE startchar;
6769         PCRE2_GET_STARTCHAR(startchar, match_data);
6770         if (end_offset <= startchar)
6771           {
6772           if (startchar >= ulen) break;       /* End of subject */
6773           end_offset = startchar + 1;
6774           if (utf && test_mode != PCRE32_MODE)
6775             {
6776             if (test_mode == PCRE8_MODE)
6777               {
6778               for (; end_offset < ulen; end_offset++)
6779                 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
6780               }
6781             else  /* 16-bit mode */
6782               {
6783               for (; end_offset < ulen; end_offset++)
6784                 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
6785               }
6786             }
6787           }
6788         }
6789       }
6790 
6791     /* For /g (global), update the start offset, leaving the rest alone. */
6792 
6793     if ((dat_datctl.control & CTL_GLOBAL) != 0)
6794       dat_datctl.offset = end_offset;
6795 
6796     /* For altglobal, just update the pointer and length. */
6797 
6798     else
6799       {
6800       pp += end_offset * code_unit_size;
6801       len -= end_offset * code_unit_size;
6802       ulen -= end_offset;
6803       }
6804     }
6805   }  /* End of global loop */
6806 
6807 show_memory = FALSE;
6808 return PR_OK;
6809 }
6810 
6811 
6812 
6813 
6814 /*************************************************
6815 *               Print PCRE2 version              *
6816 *************************************************/
6817 
6818 static void
print_version(FILE * f)6819 print_version(FILE *f)
6820 {
6821 VERSION_TYPE *vp;
6822 fprintf(f, "PCRE2 version ");
6823 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
6824 fprintf(f, "\n");
6825 }
6826 
6827 
6828 
6829 /*************************************************
6830 *               Print Unicode version            *
6831 *************************************************/
6832 
6833 static void
print_unicode_version(FILE * f)6834 print_unicode_version(FILE *f)
6835 {
6836 VERSION_TYPE *vp;
6837 fprintf(f, "Unicode version ");
6838 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
6839 }
6840 
6841 
6842 
6843 /*************************************************
6844 *               Print JIT target                 *
6845 *************************************************/
6846 
6847 static void
print_jit_target(FILE * f)6848 print_jit_target(FILE *f)
6849 {
6850 VERSION_TYPE *vp;
6851 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
6852 }
6853 
6854 
6855 
6856 /*************************************************
6857 *       Print newline configuration              *
6858 *************************************************/
6859 
6860 /* Output is always to stdout.
6861 
6862 Arguments:
6863   rc         the return code from PCRE2_CONFIG_NEWLINE
6864   isc        TRUE if called from "-C newline"
6865 Returns:     nothing
6866 */
6867 
6868 static void
print_newline_config(uint32_t optval,BOOL isc)6869 print_newline_config(uint32_t optval, BOOL isc)
6870 {
6871 if (!isc) printf("  Newline sequence is ");
6872 if (optval < sizeof(newlines)/sizeof(char *))
6873   printf("%s\n", newlines[optval]);
6874 else
6875   printf("a non-standard value: %d\n", optval);
6876 }
6877 
6878 
6879 
6880 /*************************************************
6881 *             Usage function                     *
6882 *************************************************/
6883 
6884 static void
usage(void)6885 usage(void)
6886 {
6887 printf("Usage:     pcre2test [options] [<input file> [<output file>]]\n\n");
6888 printf("Input and output default to stdin and stdout.\n");
6889 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
6890 printf("If input is a terminal, readline() is used to read from it.\n");
6891 #else
6892 printf("This version of pcre2test is not linked with readline().\n");
6893 #endif
6894 printf("\nOptions:\n");
6895 #ifdef SUPPORT_PCRE2_8
6896 printf("  -8            use the 8-bit library\n");
6897 #endif
6898 #ifdef SUPPORT_PCRE2_16
6899 printf("  -16           use the 16-bit library\n");
6900 #endif
6901 #ifdef SUPPORT_PCRE2_32
6902 printf("  -32           use the 32-bit library\n");
6903 #endif
6904 printf("  -b            set default pattern control 'fullbincode'\n");
6905 printf("  -C            show PCRE2 compile-time options and exit\n");
6906 printf("  -C arg        show a specific compile-time option and exit with its\n");
6907 printf("                  value if numeric (else 0). The arg can be:\n");
6908 printf("     backslash-C    use of \\C is enabled [0, 1]\n");
6909 printf("     bsr            \\R type [ANYCRLF, ANY]\n");
6910 printf("     ebcdic         compiled for EBCDIC character code [0,1]\n");
6911 printf("     ebcdic-nl      NL code if compiled for EBCDIC\n");
6912 printf("     jit            just-in-time compiler supported [0, 1]\n");
6913 printf("     linksize       internal link size [2, 3, 4]\n");
6914 printf("     newline        newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
6915 printf("     pcre2-8        8 bit library support enabled [0, 1]\n");
6916 printf("     pcre2-16       16 bit library support enabled [0, 1]\n");
6917 printf("     pcre2-32       32 bit library support enabled [0, 1]\n");
6918 printf("     unicode        Unicode and UTF support enabled [0, 1]\n");
6919 printf("  -d            set default pattern control 'debug'\n");
6920 printf("  -dfa          set default subject control 'dfa'\n");
6921 printf("  -error <n,m,..>  show messages for error numbers, then exit\n");
6922 printf("  -help         show usage information\n");
6923 printf("  -i            set default pattern control 'info'\n");
6924 printf("  -jit          set default pattern control 'jit'\n");
6925 printf("  -q            quiet: do not output PCRE2 version number at start\n");
6926 printf("  -pattern <s>  set default pattern control fields\n");
6927 printf("  -subject <s>  set default subject control fields\n");
6928 printf("  -S <n>        set stack size to <n> megabytes\n");
6929 printf("  -t [<n>]      time compilation and execution, repeating <n> times\n");
6930 printf("  -tm [<n>]     time execution (matching) only, repeating <n> times\n");
6931 printf("  -T            same as -t, but show total times at the end\n");
6932 printf("  -TM           same as -tm, but show total time at the end\n");
6933 printf("  -version      show PCRE2 version and exit\n");
6934 }
6935 
6936 
6937 
6938 /*************************************************
6939 *             Handle -C option                   *
6940 *************************************************/
6941 
6942 /* This option outputs configuration options and sets an appropriate return
6943 code when asked for a single option. The code is abstracted into a separate
6944 function because of its size. Use whichever pcre2_config() function is
6945 available.
6946 
6947 Argument:   an option name or NULL
6948 Returns:    the return code
6949 */
6950 
6951 static int
c_option(const char * arg)6952 c_option(const char *arg)
6953 {
6954 uint32_t optval;
6955 int yield = 0;
6956 
6957 if (arg != NULL)
6958   {
6959   unsigned int i;
6960 
6961   for (i = 0; i < COPTLISTCOUNT; i++)
6962     if (strcmp(arg, coptlist[i].name) == 0) break;
6963 
6964   if (i >= COPTLISTCOUNT)
6965     {
6966     fprintf(stderr, "** Unknown -C option '%s'\n", arg);
6967     return -1;
6968     }
6969 
6970   switch (coptlist[i].type)
6971     {
6972     case CONF_BSR:
6973     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
6974     printf("%s\n", optval? "ANYCRLF" : "ANY");
6975     break;
6976 
6977     case CONF_FIX:
6978     yield = coptlist[i].value;
6979     printf("%d\n", yield);
6980     break;
6981 
6982     case CONF_FIZ:
6983     optval = coptlist[i].value;
6984     printf("%d\n", optval);
6985     break;
6986 
6987     case CONF_INT:
6988     (void)PCRE2_CONFIG(coptlist[i].value, &yield);
6989     printf("%d\n", yield);
6990     break;
6991 
6992     case CONF_NL:
6993     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
6994     print_newline_config(optval, TRUE);
6995     break;
6996     }
6997 
6998 /* For VMS, return the value by setting a symbol, for certain values only. */
6999 
7000 #ifdef __VMS
7001   if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
7002     {
7003     char ucname[16];
7004     strcpy(ucname, coptlist[i].name);
7005     for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i];
7006     vms_setsymbol(ucname, 0, optval);
7007     }
7008 #endif
7009 
7010   return yield;
7011   }
7012 
7013 /* No argument for -C: output all configuration information. */
7014 
7015 print_version(stdout);
7016 printf("Compiled with\n");
7017 
7018 #ifdef EBCDIC
7019 printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
7020 #if defined NATIVE_ZOS
7021 printf("  EBCDIC code page %s or similar\n", pcrz_cpversion());
7022 #endif
7023 #endif
7024 
7025 #ifdef SUPPORT_PCRE2_8
7026 printf("  8-bit support\n");
7027 #endif
7028 #ifdef SUPPORT_PCRE2_16
7029 printf("  16-bit support\n");
7030 #endif
7031 #ifdef SUPPORT_PCRE2_32
7032 printf("  32-bit support\n");
7033 #endif
7034 
7035 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
7036 if (optval != 0)
7037   {
7038   printf("  UTF and UCP support (");
7039   print_unicode_version(stdout);
7040   printf(")\n");
7041   }
7042 else printf("  No Unicode support\n");
7043 
7044 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
7045 if (optval != 0)
7046   {
7047   printf("  Just-in-time compiler support: ");
7048   print_jit_target(stdout);
7049   printf("\n");
7050   }
7051 else
7052   {
7053   printf("  No just-in-time compiler support\n");
7054   }
7055 
7056 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
7057 print_newline_config(optval, FALSE);
7058 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
7059 printf("  \\R matches %s\n", optval? "CR, LF, or CRLF only" :
7060                                  "all Unicode newlines");
7061 #ifdef NEVER_BACKSLASH_C
7062 printf("  \\C is not supported\n");
7063 #else
7064 printf("  \\C is supported\n");
7065 #endif
7066 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
7067 printf("  Internal link size = %d\n", optval);
7068 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
7069 printf("  Parentheses nest limit = %d\n", optval);
7070 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
7071 printf("  Default match limit = %d\n", optval);
7072 (void)PCRE2_CONFIG(PCRE2_CONFIG_RECURSIONLIMIT, &optval);
7073 printf("  Default recursion depth limit = %d\n", optval);
7074 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &optval);
7075 printf("  Match recursion uses %s", optval? "stack" : "heap");
7076 
7077 printf("\n");
7078 return 0;
7079 }
7080 
7081 
7082 
7083 /*************************************************
7084 *                Main Program                    *
7085 *************************************************/
7086 
7087 int
7088 main(int argc, char **argv)
7089 {
7090 uint32_t yield = 0;
7091 uint32_t op = 1;
7092 uint32_t stack_size;
7093 BOOL notdone = TRUE;
7094 BOOL quiet = FALSE;
7095 BOOL showtotaltimes = FALSE;
7096 BOOL skipping = FALSE;
7097 char *arg_subject = NULL;
7098 char *arg_pattern = NULL;
7099 char *arg_error = NULL;
7100 
7101 /* The offsets to the options and control bits fields of the pattern and data
7102 control blocks must be the same so that common options and controls such as
7103 "anchored" or "memory" can work for either of them from a single table entry.
7104 We cannot test this till runtime because "offsetof" does not work in the
7105 preprocessor. */
7106 
7107 if (PO(options) != DO(options) || PO(control) != DO(control) ||
7108     PO(control2) != DO(control2))
7109   {
7110   fprintf(stderr, "** Coding error: "
7111     "options and control offsets for pattern and data must be the same.\n");
7112   return 1;
7113   }
7114 
7115 /* Get the PCRE2 and Unicode version number and JIT target information, at the
7116 same time checking that a request for the length gives the same answer. Also
7117 check lengths for non-string items. */
7118 
7119 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
7120     PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
7121 
7122     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
7123     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
7124 
7125     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
7126     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
7127 
7128     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
7129     PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
7130   {
7131   fprintf(stderr, "** Error in pcre2_config(): bad length\n");
7132   return 1;
7133   }
7134 
7135 /* Get buffers from malloc() so that valgrind will check their misuse when
7136 debugging. They grow automatically when very long lines are read. The 16-
7137 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
7138 
7139 buffer = (uint8_t *)malloc(pbuffer8_size);
7140 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
7141 
7142 /* The following  _setmode() stuff is some Windows magic that tells its runtime
7143 library to translate CRLF into a single LF character. At least, that's what
7144 I've been told: never having used Windows I take this all on trust. Originally
7145 it set 0x8000, but then I was advised that _O_BINARY was better. */
7146 
7147 #if defined(_WIN32) || defined(WIN32)
7148 _setmode( _fileno( stdout ), _O_BINARY );
7149 #endif
7150 
7151 /* Initialization that does not depend on the running mode. */
7152 
7153 locale_name[0] = 0;
7154 memset(&def_patctl, 0, sizeof(patctl));
7155 memset(&def_datctl, 0, sizeof(datctl));
7156 def_datctl.oveccount = DEFAULT_OVECCOUNT;
7157 def_datctl.copy_numbers[0] = -1;
7158 def_datctl.get_numbers[0] = -1;
7159 def_datctl.cfail[0] = def_datctl.cfail[1] = CFAIL_UNSET;
7160 
7161 /* Scan command line options. */
7162 
7163 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
7164   {
7165   char *endptr;
7166   char *arg = argv[op];
7167   unsigned long uli;
7168 
7169   /* Display and/or set return code for configuration options. */
7170 
7171   if (strcmp(arg, "-C") == 0)
7172     {
7173     yield = c_option(argv[op + 1]);
7174     goto EXIT;
7175     }
7176 
7177   /* Select operating mode */
7178 
7179   if (strcmp(arg, "-8") == 0)
7180     {
7181 #ifdef SUPPORT_PCRE2_8
7182     test_mode = PCRE8_MODE;
7183 #else
7184     fprintf(stderr,
7185       "** This version of PCRE2 was built without 8-bit support\n");
7186     exit(1);
7187 #endif
7188     }
7189   else if (strcmp(arg, "-16") == 0)
7190     {
7191 #ifdef SUPPORT_PCRE2_16
7192     test_mode = PCRE16_MODE;
7193 #else
7194     fprintf(stderr,
7195       "** This version of PCRE2 was built without 16-bit support\n");
7196     exit(1);
7197 #endif
7198     }
7199   else if (strcmp(arg, "-32") == 0)
7200     {
7201 #ifdef SUPPORT_PCRE2_32
7202     test_mode = PCRE32_MODE;
7203 #else
7204     fprintf(stderr,
7205       "** This version of PCRE2 was built without 32-bit support\n");
7206     exit(1);
7207 #endif
7208     }
7209 
7210   /* Set quiet (no version verification) */
7211 
7212   else if (strcmp(arg, "-q") == 0) quiet = TRUE;
7213 
7214   /* Set system stack size */
7215 
7216   else if (strcmp(arg, "-S") == 0 && argc > 2 &&
7217       ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
7218     {
7219 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
7220     fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
7221     exit(1);
7222 #else
7223     int rc;
7224     struct rlimit rlim;
7225     if (U32OVERFLOW(uli))
7226       {
7227       fprintf(stderr, "** Argument for -S is too big\n");
7228       exit(1);
7229       }
7230     stack_size = (uint32_t)uli;
7231     getrlimit(RLIMIT_STACK, &rlim);
7232     rlim.rlim_cur = stack_size * 1024 * 1024;
7233     if (rlim.rlim_cur > rlim.rlim_max)
7234       {
7235       fprintf(stderr,
7236         "pcre2test: requested stack size %luM is greater than hard limit %lu\n",
7237         (unsigned long int)stack_size,
7238         (unsigned long int)(rlim.rlim_max));
7239       exit(1);
7240       }
7241     rc = setrlimit(RLIMIT_STACK, &rlim);
7242     if (rc != 0)
7243       {
7244       fprintf(stderr, "pcre2test: setting stack size %luM failed: %s\n",
7245         (unsigned long int)stack_size, strerror(errno));
7246       exit(1);
7247       }
7248     op++;
7249     argc--;
7250 #endif
7251     }
7252 
7253   /* Set some common pattern and subject controls */
7254 
7255   else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
7256   else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
7257   else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
7258   else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
7259   else if (strcmp(arg, "-jit") == 0)
7260     {
7261     def_patctl.jit = 7;  /* full & partial */
7262 #ifndef SUPPORT_JIT
7263     fprintf(stderr, "** Warning: JIT support is not available: "
7264                     "-jit calls functions that do nothing.\n");
7265 #endif
7266     }
7267 
7268   /* Set timing parameters */
7269 
7270   else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
7271            strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
7272     {
7273     int both = arg[2] == 0;
7274     showtotaltimes = arg[1] == 'T';
7275     if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
7276       {
7277       if (U32OVERFLOW(uli))
7278         {
7279         fprintf(stderr, "** Argument for %s is too big\n", arg);
7280         exit(1);
7281         }
7282       timeitm = (int)uli;
7283       op++;
7284       argc--;
7285       }
7286     else timeitm = LOOPREPEAT;
7287     if (both) timeit = timeitm;
7288     }
7289 
7290   /* Give help */
7291 
7292   else if (strcmp(arg, "-help") == 0 ||
7293            strcmp(arg, "--help") == 0)
7294     {
7295     usage();
7296     goto EXIT;
7297     }
7298 
7299   /* Show version */
7300 
7301   else if (strcmp(arg, "-version") == 0 ||
7302            strcmp(arg, "--version") == 0)
7303     {
7304     print_version(stdout);
7305     goto EXIT;
7306     }
7307 
7308   /* The following options save their data for processing once we know what
7309   the running mode is. */
7310 
7311   else if (strcmp(arg, "-error") == 0)
7312     {
7313     arg_error = argv[op+1];
7314     goto CHECK_VALUE_EXISTS;
7315     }
7316 
7317   else if (strcmp(arg, "-subject") == 0)
7318     {
7319     arg_subject = argv[op+1];
7320     goto CHECK_VALUE_EXISTS;
7321     }
7322 
7323   else if (strcmp(arg, "-pattern") == 0)
7324     {
7325     arg_pattern = argv[op+1];
7326     CHECK_VALUE_EXISTS:
7327     if (argc <= 2)
7328       {
7329       fprintf(stderr, "** Missing value for %s\n", arg);
7330       yield = 1;
7331       goto EXIT;
7332       }
7333     op++;
7334     argc--;
7335     }
7336 
7337   /* Unrecognized option */
7338 
7339   else
7340     {
7341     fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
7342     usage();
7343     yield = 1;
7344     goto EXIT;
7345     }
7346   op++;
7347   argc--;
7348   }
7349 
7350 /* If -error was present, get the error numbers, show the messages, and exit.
7351 We wait to do this until we know which mode we are in. */
7352 
7353 if (arg_error != NULL)
7354   {
7355   int len;
7356   int errcode;
7357   char *endptr;
7358 
7359 /* Ensure the relevant non-8-bit buffer is available. */
7360 
7361 #ifdef SUPPORT_PCRE2_16
7362   if (test_mode == PCRE16_MODE)
7363     {
7364     pbuffer16_size = 256;
7365     pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
7366     if (pbuffer16 == NULL)
7367       {
7368       fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n",
7369         (unsigned long int)pbuffer16_size);
7370       yield = 1;
7371       goto EXIT;
7372       }
7373     }
7374 #endif
7375 
7376 #ifdef SUPPORT_PCRE2_32
7377   if (test_mode == PCRE32_MODE)
7378     {
7379     pbuffer32_size = 256;
7380     pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
7381     if (pbuffer32 == NULL)
7382       {
7383       fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n",
7384         (unsigned long int)pbuffer32_size);
7385       yield = 1;
7386       goto EXIT;
7387       }
7388     }
7389 #endif
7390 
7391   /* Loop along a list of error numbers. */
7392 
7393   for (;;)
7394     {
7395     errcode = strtol(arg_error, &endptr, 10);
7396     if (*endptr != 0 && *endptr != CHAR_COMMA)
7397       {
7398       fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
7399       yield = 1;
7400       goto EXIT;
7401       }
7402     printf("Error %d: ", errcode);
7403     PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
7404     if (len < 0)
7405       {
7406       switch (len)
7407         {
7408         case PCRE2_ERROR_BADDATA:
7409         printf("PCRE2_ERROR_BADDATA (unknown error number)");
7410         break;
7411 
7412         case PCRE2_ERROR_NOMEMORY:
7413         printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
7414         break;
7415 
7416         default:
7417         printf("Unexpected return (%d) from pcre2_get_error_message()", len);
7418         break;
7419         }
7420       }
7421     else
7422       {
7423       PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
7424       }
7425     printf("\n");
7426     if (*endptr == 0) goto EXIT;
7427     arg_error = endptr + 1;
7428     }
7429   /* Control never reaches here */
7430   }  /* End of -error handling */
7431 
7432 /* Initialize things that cannot be done until we know which test mode we are
7433 running in. When HEAP_MATCH_RECURSE is undefined, calling pcre2_set_recursion_
7434 memory_management() is a no-op, but we call it in order to exercise it. Also
7435 exercise the general context copying function, which is not otherwise used. */
7436 
7437 code_unit_size = test_mode/8;
7438 max_oveccount = DEFAULT_OVECCOUNT;
7439 
7440 /* Use macros to save a lot of duplication. */
7441 
7442 #define CREATECONTEXTS \
7443   G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
7444   G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
7445   G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
7446   G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
7447   G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
7448   G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
7449   G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
7450 
7451 #ifdef HEAP_MATCH_RECURSE
7452 #define SETRECURSEMEMMAN \
7453   (void)G(pcre2_set_recursion_memory_management_,BITS) \
7454     (G(default_dat_context,BITS), \
7455     &my_stack_malloc, &my_stack_free, NULL)
7456 #else
7457 #define SETRECURSEMEMMAN \
7458   (void)G(pcre2_set_recursion_memory_management_,BITS)(NULL, NULL, NULL, NULL)
7459 #endif
7460 
7461 /* Call the appropriate functions for the current mode. */
7462 
7463 #ifdef SUPPORT_PCRE2_8
7464 #undef BITS
7465 #define BITS 8
7466 if (test_mode == PCRE8_MODE)
7467   {
7468   CREATECONTEXTS;
7469   SETRECURSEMEMMAN;
7470   }
7471 #endif
7472 
7473 #ifdef SUPPORT_PCRE2_16
7474 #undef BITS
7475 #define BITS 16
7476 if (test_mode == PCRE16_MODE)
7477   {
7478   CREATECONTEXTS;
7479   SETRECURSEMEMMAN;
7480   }
7481 #endif
7482 
7483 #ifdef SUPPORT_PCRE2_32
7484 #undef BITS
7485 #define BITS 32
7486 if (test_mode == PCRE32_MODE)
7487   {
7488   CREATECONTEXTS;
7489   SETRECURSEMEMMAN;
7490   }
7491 #endif
7492 
7493 /* Set a default parentheses nest limit that is large enough to run the
7494 standard tests (this also exercises the function). */
7495 
7496 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, 220);
7497 
7498 /* Handle command line modifier settings, sending any error messages to
7499 stderr. We need to know the mode before modifying the context, and it is tidier
7500 to do them all in the same way. */
7501 
7502 outfile = stderr;
7503 if ((arg_pattern != NULL &&
7504     !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
7505     (arg_subject != NULL &&
7506     !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
7507   {
7508   yield = 1;
7509   goto EXIT;
7510   }
7511 
7512 /* Sort out the input and output files, defaulting to stdin/stdout. */
7513 
7514 infile = stdin;
7515 outfile = stdout;
7516 
7517 if (argc > 1 && strcmp(argv[op], "-") != 0)
7518   {
7519   infile = fopen(argv[op], INPUT_MODE);
7520   if (infile == NULL)
7521     {
7522     printf("** Failed to open '%s'\n", argv[op]);
7523     yield = 1;
7524     goto EXIT;
7525     }
7526   }
7527 
7528 if (argc > 2)
7529   {
7530   outfile = fopen(argv[op+1], OUTPUT_MODE);
7531   if (outfile == NULL)
7532     {
7533     printf("** Failed to open '%s'\n", argv[op+1]);
7534     yield = 1;
7535     goto EXIT;
7536     }
7537   }
7538 
7539 /* Output a heading line unless quiet, then process input lines. */
7540 
7541 if (!quiet) print_version(outfile);
7542 
7543 SET(compiled_code, NULL);
7544 
7545 #ifdef SUPPORT_PCRE2_8
7546 preg.re_pcre2_code = NULL;
7547 preg.re_match_data = NULL;
7548 #endif
7549 
7550 while (notdone)
7551   {
7552   uint8_t *p;
7553   int rc = PR_OK;
7554   BOOL expectdata = TEST(compiled_code, !=, NULL);
7555 #ifdef SUPPORT_PCRE2_8
7556   expectdata |= preg.re_pcre2_code != NULL;
7557 #endif
7558 
7559   if (extend_inputline(infile, buffer, expectdata? "data> " : "  re> ") == NULL)
7560     break;
7561   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
7562   fflush(outfile);
7563   p = buffer;
7564 
7565   /* If we have a pattern set up for testing, or we are skipping after a
7566   compile failure, a blank line terminates this test; otherwise process the
7567   line as a data line. */
7568 
7569   if (expectdata || skipping)
7570     {
7571     while (isspace(*p)) p++;
7572     if (*p == 0)
7573       {
7574 #ifdef SUPPORT_PCRE2_8
7575       if (preg.re_pcre2_code != NULL)
7576         {
7577         regfree(&preg);
7578         preg.re_pcre2_code = NULL;
7579         preg.re_match_data = NULL;
7580         }
7581 #endif  /* SUPPORT_PCRE2_8 */
7582       if (TEST(compiled_code, !=, NULL))
7583         {
7584         SUB1(pcre2_code_free, compiled_code);
7585         SET(compiled_code, NULL);
7586         }
7587       skipping = FALSE;
7588       setlocale(LC_CTYPE, "C");
7589       }
7590     else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
7591       rc = process_data();
7592     }
7593 
7594   /* We do not have a pattern set up for testing. Lines starting with # are
7595   either comments or special commands. Blank lines are ignored. Otherwise, the
7596   line must start with a valid delimiter. It is then processed as a pattern
7597   line. */
7598 
7599   else if (*p == '#')
7600     {
7601     if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
7602     rc = process_command();
7603     }
7604 
7605   else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
7606     {
7607     rc = process_pattern();
7608     dfa_matched = 0;
7609     }
7610 
7611   else
7612     {
7613     while (isspace(*p)) p++;
7614     if (*p != 0)
7615       {
7616       fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
7617         *buffer);
7618       rc = PR_SKIP;
7619       }
7620     }
7621 
7622   if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
7623   else if (rc == PR_ABEND)
7624     {
7625     fprintf(outfile, "** pcre2test run abandoned\n");
7626     yield = 1;
7627     goto EXIT;
7628     }
7629   }
7630 
7631 /* Finish off a normal run. */
7632 
7633 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
7634 
7635 if (showtotaltimes)
7636   {
7637   const char *pad = "";
7638   fprintf(outfile, "--------------------------------------\n");
7639   if (timeit > 0)
7640     {
7641     fprintf(outfile, "Total compile time %.4f milliseconds\n",
7642       (((double)total_compile_time * 1000.0) / (double)timeit) /
7643         (double)CLOCKS_PER_SEC);
7644     if (total_jit_compile_time > 0)
7645       fprintf(outfile, "Total JIT compile  %.4f milliseconds\n",
7646         (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
7647           (double)CLOCKS_PER_SEC);
7648     pad = "  ";
7649     }
7650   fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
7651     (((double)total_match_time * 1000.0) / (double)timeitm) /
7652       (double)CLOCKS_PER_SEC);
7653   }
7654 
7655 
7656 EXIT:
7657 
7658 if (infile != NULL && infile != stdin) fclose(infile);
7659 if (outfile != NULL && outfile != stdout) fclose(outfile);
7660 
7661 free(buffer);
7662 free(dbuffer);
7663 free(pbuffer8);
7664 free(dfa_workspace);
7665 free((void *)locale_tables);
7666 PCRE2_MATCH_DATA_FREE(match_data);
7667 SUB1(pcre2_code_free, compiled_code);
7668 
7669 while(patstacknext-- > 0)
7670   {
7671   SET(compiled_code, patstack[patstacknext]);
7672   SUB1(pcre2_code_free, compiled_code);
7673   }
7674 
7675 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
7676 if (jit_stack != NULL)
7677   {
7678   PCRE2_JIT_STACK_FREE(jit_stack);
7679   }
7680 
7681 #define FREECONTEXTS \
7682   G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
7683   G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
7684   G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
7685   G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
7686   G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
7687   G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS))
7688 
7689 #ifdef SUPPORT_PCRE2_8
7690 #undef BITS
7691 #define BITS 8
7692 if (preg.re_pcre2_code != NULL) regfree(&preg);
7693 FREECONTEXTS;
7694 #endif
7695 
7696 #ifdef SUPPORT_PCRE2_16
7697 #undef BITS
7698 #define BITS 16
7699 free(pbuffer16);
7700 FREECONTEXTS;
7701 #endif
7702 
7703 #ifdef SUPPORT_PCRE2_32
7704 #undef BITS
7705 #define BITS 32
7706 free(pbuffer32);
7707 FREECONTEXTS;
7708 #endif
7709 
7710 #if defined(__VMS)
7711   yield = SS$_NORMAL;  /* Return values via DCL symbols */
7712 #endif
7713 
7714 return yield;
7715 }
7716 
7717 /* End of pcre2test.c */
7718