1 /*************************************************
2 *             PCRE2 testing program              *
3 *************************************************/
4 
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11 
12                        Written by Philip Hazel
13      Original code Copyright (c) 1997-2012 University of Cambridge
14     Rewritten code Copyright (c) 2016-2020 University of Cambridge
15 
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19 
20     * Redistributions of source code must retain the above copyright notice,
21       this list of conditions and the following disclaimer.
22 
23     * Redistributions in binary form must reproduce the above copyright
24       notice, this list of conditions and the following disclaimer in the
25       documentation and/or other materials provided with the distribution.
26 
27     * Neither the name of the University of Cambridge nor the names of its
28       contributors may be used to endorse or promote products derived from
29       this software without specific prior written permission.
30 
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44 
45 
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56 
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60 
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68 
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76 
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 
81 /* Debugging code enabler */
82 
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84 
85 /* Both libreadline and libedit are optionally supported. The user-supplied
86 original patch uses readline/readline.h for libedit, but in at least one system
87 it is installed as editline/readline.h, so the configuration code now looks for
88 that first, falling back to readline/readline.h. */
89 
90 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
91 #if defined(SUPPORT_LIBREADLINE)
92 #include <readline/readline.h>
93 #include <readline/history.h>
94 #else
95 #if defined(HAVE_EDITLINE_READLINE_H)
96 #include <editline/readline.h>
97 #else
98 #include <readline/readline.h>
99 #endif
100 #endif
101 #endif
102 
103 /* Put the test for interactive input into a macro so that it can be changed if
104 required for different environments. */
105 
106 #define INTERACTIVE(f) isatty(fileno(f))
107 
108 
109 /* ---------------------- System-specific definitions ---------------------- */
110 
111 /* A number of things vary for Windows builds. Originally, pcretest opened its
112 input and output without "b"; then I was told that "b" was needed in some
113 environments, so it was added for release 5.0 to both the input and output. (It
114 makes no difference on Unix-like systems.) Later I was told that it is wrong
115 for the input on Windows. I've now abstracted the modes into macros that are
116 set here, to make it easier to fiddle with them, and removed "b" from the input
117 mode under Windows. The BINARY versions are used when saving/restoring compiled
118 patterns. */
119 
120 #if defined(_WIN32) || defined(WIN32)
121 #include <io.h>                /* For _setmode() */
122 #include <fcntl.h>             /* For _O_BINARY */
123 #define INPUT_MODE          "r"
124 #define OUTPUT_MODE         "wb"
125 #define BINARY_INPUT_MODE   "rb"
126 #define BINARY_OUTPUT_MODE  "wb"
127 
128 #ifndef isatty
129 #define isatty _isatty         /* This is what Windows calls them, I'm told, */
130 #endif                         /* though in some environments they seem to   */
131                                /* be already defined, hence the #ifndefs.    */
132 #ifndef fileno
133 #define fileno _fileno
134 #endif
135 
136 /* A user sent this fix for Borland Builder 5 under Windows. */
137 
138 #ifdef __BORLANDC__
139 #define _setmode(handle, mode) setmode(handle, mode)
140 #endif
141 
142 /* Not Windows */
143 
144 #else
145 #include <sys/time.h>          /* These two includes are needed */
146 #include <sys/resource.h>      /* for setrlimit(). */
147 #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
148 #define INPUT_MODE   "r"
149 #define OUTPUT_MODE  "w"
150 #define BINARY_INPUT_MODE   "rb"
151 #define BINARY_OUTPUT_MODE  "wb"
152 #else
153 #define INPUT_MODE          "rb"
154 #define OUTPUT_MODE         "wb"
155 #define BINARY_INPUT_MODE   "rb"
156 #define BINARY_OUTPUT_MODE  "wb"
157 #endif
158 #endif
159 
160 /* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
161 user [2] provided alternative code which worked better for him. I have
162 commented out the original, but kept it around just in case. */
163 
164 #ifdef __VMS
165 #include <ssdef.h>
166 /* These two includes came from [2]. */
167 #include descrip
168 #include lib$routines
169 /* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
170 #endif
171 
172 /* VC and older compilers don't support %td or %zu, and even some that claim to
173 be C99 don't support it (hence DISABLE_PERCENT_ZT). There are some non-C99
174 environments where %lu gives a warning with 32-bit pointers. As there doesn't
175 seem to be an easy way round this, just live with it (the cases are rare). */
176 
177 #if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L || defined(DISABLE_PERCENT_ZT)
178 #define PTR_FORM "lu"
179 #define SIZ_FORM "lu"
180 #define SIZ_CAST (unsigned long int)
181 #else
182 #define PTR_FORM "td"
183 #define SIZ_FORM "zu"
184 #define SIZ_CAST
185 #endif
186 
187 /* ------------------End of system-specific definitions -------------------- */
188 
189 /* Glueing macros that are used in several places below. */
190 
191 #define glue(a,b) a##b
192 #define G(a,b) glue(a,b)
193 
194 /* Miscellaneous parameters and manifests */
195 
196 #ifndef CLOCKS_PER_SEC
197 #ifdef CLK_TCK
198 #define CLOCKS_PER_SEC CLK_TCK
199 #else
200 #define CLOCKS_PER_SEC 100
201 #endif
202 #endif
203 
204 #define CFORE_UNSET UINT32_MAX    /* Unset value for startend/cfail/cerror fields */
205 #define CONVERT_UNSET UINT32_MAX  /* Unset value for convert_type field */
206 #define DFA_WS_DIMENSION 1000     /* Size of DFA workspace */
207 #define DEFAULT_OVECCOUNT 15      /* Default ovector count */
208 #define JUNK_OFFSET 0xdeadbeef    /* For initializing ovector */
209 #define LOCALESIZE 32             /* Size of locale name */
210 #define LOOPREPEAT 500000         /* Default loop count for timing */
211 #define MALLOCLISTSIZE 20         /* For remembering mallocs */
212 #define PARENS_NEST_DEFAULT 220   /* Default parentheses nest limit */
213 #define PATSTACKSIZE 20           /* Pattern stack for save/restore testing */
214 #define REPLACE_MODSIZE 100       /* Field for reading 8-bit replacement */
215 #define VERSION_SIZE 64           /* Size of buffer for the version strings */
216 
217 /* Default JIT compile options */
218 
219 #define JIT_DEFAULT (PCRE2_JIT_COMPLETE|\
220                      PCRE2_JIT_PARTIAL_SOFT|\
221                      PCRE2_JIT_PARTIAL_HARD)
222 
223 /* Make sure the buffer into which replacement strings are copied is big enough
224 to hold them as 32-bit code units. */
225 
226 #define REPLACE_BUFFSIZE 1024   /* This is a byte value */
227 
228 /* Execution modes */
229 
230 #define PCRE8_MODE   8
231 #define PCRE16_MODE 16
232 #define PCRE32_MODE 32
233 
234 /* Processing returns */
235 
236 enum { PR_OK, PR_SKIP, PR_ABEND };
237 
238 /* The macro PRINTABLE determines whether to print an output character as-is or
239 as a hex value when showing compiled patterns. is We use it in cases when the
240 locale has not been explicitly changed, so as to get consistent output from
241 systems that differ in their output from isprint() even in the "C" locale. */
242 
243 #ifdef EBCDIC
244 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
245 #else
246 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
247 #endif
248 
249 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
250 
251 /* We have to include some of the library source files because we need
252 to use some of the macros, internal structure definitions, and other internal
253 values - pcre2test has "inside information" compared to an application program
254 that strictly follows the PCRE2 API.
255 
256 Before including pcre2_internal.h we define PRIV so that it does not get
257 defined therein. This ensures that PRIV names in the included files do not
258 clash with those in the libraries. Also, although pcre2_internal.h does itself
259 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
260 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
261 for building the library. */
262 
263 #define PRIV(name) name
264 #define PCRE2_CODE_UNIT_WIDTH 0
265 #include "pcre2.h"
266 #include "pcre2posix.h"
267 #include "pcre2_internal.h"
268 
269 /* We need access to some of the data tables that PCRE2 uses. Defining
270 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
271 of PRIV avoids name clashes. */
272 
273 #define PCRE2_PCRE2TEST
274 #include "pcre2_tables.c"
275 #include "pcre2_ucd.c"
276 
277 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
278 check needed for overflow depends on whether long ints are in fact longer than
279 ints. They are defined not to be shorter. */
280 
281 #if ULONG_MAX > UINT32_MAX
282 #define U32OVERFLOW(x) (x > UINT32_MAX)
283 #else
284 #define U32OVERFLOW(x) (x == UINT32_MAX)
285 #endif
286 
287 #if LONG_MAX > INT32_MAX
288 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
289 #else
290 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
291 #endif
292 
293 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
294 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
295 defined. We can now include it for each supported code unit width. Because
296 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
297 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
298 while including these files, and then restore it to a no-op. Because LINK_SIZE
299 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
300 these inclusions should not be changed. */
301 
302 #undef PCRE2_SUFFIX
303 #undef PCRE2_CODE_UNIT_WIDTH
304 
305 #ifdef   SUPPORT_PCRE2_8
306 #define  PCRE2_CODE_UNIT_WIDTH 8
307 #define  PCRE2_SUFFIX(a) G(a,8)
308 #include "pcre2_intmodedep.h"
309 #include "pcre2_printint.c"
310 #undef   PCRE2_CODE_UNIT_WIDTH
311 #undef   PCRE2_SUFFIX
312 #endif   /* SUPPORT_PCRE2_8 */
313 
314 #ifdef   SUPPORT_PCRE2_16
315 #define  PCRE2_CODE_UNIT_WIDTH 16
316 #define  PCRE2_SUFFIX(a) G(a,16)
317 #include "pcre2_intmodedep.h"
318 #include "pcre2_printint.c"
319 #undef   PCRE2_CODE_UNIT_WIDTH
320 #undef   PCRE2_SUFFIX
321 #endif   /* SUPPORT_PCRE2_16 */
322 
323 #ifdef   SUPPORT_PCRE2_32
324 #define  PCRE2_CODE_UNIT_WIDTH 32
325 #define  PCRE2_SUFFIX(a) G(a,32)
326 #include "pcre2_intmodedep.h"
327 #include "pcre2_printint.c"
328 #undef   PCRE2_CODE_UNIT_WIDTH
329 #undef   PCRE2_SUFFIX
330 #endif   /* SUPPORT_PCRE2_32 */
331 
332 #define PCRE2_SUFFIX(a) a
333 
334 /* We need to be able to check input text for UTF-8 validity, whatever code
335 widths are actually available, because the input to pcre2test is always in
336 8-bit code units. So we include the UTF validity checking function for 8-bit
337 code units. */
338 
339 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
340 
341 #define  PCRE2_CODE_UNIT_WIDTH 8
342 #undef   PCRE2_SPTR
343 #define  PCRE2_SPTR PCRE2_SPTR8
344 #include "pcre2_valid_utf.c"
345 #undef   PCRE2_CODE_UNIT_WIDTH
346 #undef   PCRE2_SPTR
347 
348 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
349 support, it can be selected by a command-line option. If there is no 8-bit
350 support, there must be 16-bit or 32-bit support, so default to one of them. The
351 config function, JIT stack, contexts, and version string are the same in all
352 modes, so use the form of the first that is available. */
353 
354 #if defined SUPPORT_PCRE2_8
355 #define DEFAULT_TEST_MODE PCRE8_MODE
356 #define VERSION_TYPE PCRE2_UCHAR8
357 #define PCRE2_CONFIG pcre2_config_8
358 #define PCRE2_JIT_STACK pcre2_jit_stack_8
359 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
360 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
361 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
362 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
363 
364 #elif defined SUPPORT_PCRE2_16
365 #define DEFAULT_TEST_MODE PCRE16_MODE
366 #define VERSION_TYPE PCRE2_UCHAR16
367 #define PCRE2_CONFIG pcre2_config_16
368 #define PCRE2_JIT_STACK pcre2_jit_stack_16
369 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
370 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
371 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
372 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
373 
374 #elif defined SUPPORT_PCRE2_32
375 #define DEFAULT_TEST_MODE PCRE32_MODE
376 #define VERSION_TYPE PCRE2_UCHAR32
377 #define PCRE2_CONFIG pcre2_config_32
378 #define PCRE2_JIT_STACK pcre2_jit_stack_32
379 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
380 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
381 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
382 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
383 #endif
384 
385 /* ------------- Structure and table for handling #-commands ------------- */
386 
387 typedef struct cmdstruct {
388   const char *name;
389   int  value;
390 } cmdstruct;
391 
392 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
393   CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
394   CMD_UNKNOWN };
395 
396 static cmdstruct cmdlist[] = {
397   { "forbid_utf",      CMD_FORBID_UTF },
398   { "load",            CMD_LOAD },
399   { "loadtables",      CMD_LOADTABLES },
400   { "newline_default", CMD_NEWLINE_DEFAULT },
401   { "pattern",         CMD_PATTERN },
402   { "perltest",        CMD_PERLTEST },
403   { "pop",             CMD_POP },
404   { "popcopy",         CMD_POPCOPY },
405   { "save",            CMD_SAVE },
406   { "subject",         CMD_SUBJECT }};
407 
408 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
409 
410 /* ------------- Structures and tables for handling modifiers -------------- */
411 
412 /* Table of names for newline types. Must be kept in step with the definitions
413 of PCRE2_NEWLINE_xx in pcre2.h. */
414 
415 static const char *newlines[] = {
416   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
417 
418 /* Structure and table for handling pattern conversion types. */
419 
420 typedef struct convertstruct {
421   const char *name;
422   uint32_t option;
423 } convertstruct;
424 
425 static convertstruct convertlist[] = {
426   { "glob",                   PCRE2_CONVERT_GLOB },
427   { "glob_no_starstar",       PCRE2_CONVERT_GLOB_NO_STARSTAR },
428   { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
429   { "posix_basic",            PCRE2_CONVERT_POSIX_BASIC },
430   { "posix_extended",         PCRE2_CONVERT_POSIX_EXTENDED },
431   { "unset",                  CONVERT_UNSET }};
432 
433 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
434 
435 /* Modifier types and applicability */
436 
437 enum { MOD_CTC,    /* Applies to a compile context */
438        MOD_CTM,    /* Applies to a match context */
439        MOD_PAT,    /* Applies to a pattern */
440        MOD_PATP,   /* Ditto, OK for Perl test */
441        MOD_DAT,    /* Applies to a data line */
442        MOD_PD,     /* Applies to a pattern or a data line */
443        MOD_PDP,    /* As MOD_PD, OK for Perl test */
444        MOD_PND,    /* As MOD_PD, but not for a default pattern */
445        MOD_PNDP,   /* As MOD_PND, OK for Perl test */
446        MOD_CHR,    /* Is a single character */
447        MOD_CON,    /* Is a "convert" type/options list */
448        MOD_CTL,    /* Is a control bit */
449        MOD_BSR,    /* Is a BSR value */
450        MOD_IN2,    /* Is one or two unsigned integers */
451        MOD_INS,    /* Is a signed integer */
452        MOD_INT,    /* Is an unsigned integer */
453        MOD_IND,    /* Is an unsigned integer, but no value => default */
454        MOD_NL,     /* Is a newline value */
455        MOD_NN,     /* Is a number or a name; more than one may occur */
456        MOD_OPT,    /* Is an option bit */
457        MOD_SIZ,    /* Is a PCRE2_SIZE value */
458        MOD_STR };  /* Is a string */
459 
460 /* Control bits. Some apply to compiling, some to matching, but some can be set
461 either on a pattern or a data line, so they must all be distinct. There are now
462 so many of them that they are split into two fields. */
463 
464 #define CTL_AFTERTEXT                    0x00000001u
465 #define CTL_ALLAFTERTEXT                 0x00000002u
466 #define CTL_ALLCAPTURES                  0x00000004u
467 #define CTL_ALLUSEDTEXT                  0x00000008u
468 #define CTL_ALTGLOBAL                    0x00000010u
469 #define CTL_BINCODE                      0x00000020u
470 #define CTL_CALLOUT_CAPTURE              0x00000040u
471 #define CTL_CALLOUT_INFO                 0x00000080u
472 #define CTL_CALLOUT_NONE                 0x00000100u
473 #define CTL_DFA                          0x00000200u
474 #define CTL_EXPAND                       0x00000400u
475 #define CTL_FINDLIMITS                   0x00000800u
476 #define CTL_FRAMESIZE                    0x00001000u
477 #define CTL_FULLBINCODE                  0x00002000u
478 #define CTL_GETALL                       0x00004000u
479 #define CTL_GLOBAL                       0x00008000u
480 #define CTL_HEXPAT                       0x00010000u  /* Same word as USE_LENGTH */
481 #define CTL_INFO                         0x00020000u
482 #define CTL_JITFAST                      0x00040000u
483 #define CTL_JITVERIFY                    0x00080000u
484 #define CTL_MARK                         0x00100000u
485 #define CTL_MEMORY                       0x00200000u
486 #define CTL_NULLCONTEXT                  0x00400000u
487 #define CTL_POSIX                        0x00800000u
488 #define CTL_POSIX_NOSUB                  0x01000000u
489 #define CTL_PUSH                         0x02000000u  /* These three must be */
490 #define CTL_PUSHCOPY                     0x04000000u  /*   all in the same */
491 #define CTL_PUSHTABLESCOPY               0x08000000u  /*     word. */
492 #define CTL_STARTCHAR                    0x10000000u
493 #define CTL_USE_LENGTH                   0x20000000u  /* Same word as HEXPAT */
494 #define CTL_UTF8_INPUT                   0x40000000u
495 #define CTL_ZERO_TERMINATE               0x80000000u
496 
497 /* Combinations */
498 
499 #define CTL_DEBUG            (CTL_FULLBINCODE|CTL_INFO)  /* For setting */
500 #define CTL_ANYINFO          (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
501 #define CTL_ANYGLOB          (CTL_ALTGLOBAL|CTL_GLOBAL)
502 
503 /* Second control word */
504 
505 #define CTL2_SUBSTITUTE_CALLOUT          0x00000001u
506 #define CTL2_SUBSTITUTE_EXTENDED         0x00000002u
507 #define CTL2_SUBSTITUTE_LITERAL          0x00000004u
508 #define CTL2_SUBSTITUTE_MATCHED          0x00000008u
509 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH  0x00000010u
510 #define CTL2_SUBSTITUTE_REPLACEMENT_ONLY 0x00000020u
511 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET    0x00000040u
512 #define CTL2_SUBSTITUTE_UNSET_EMPTY      0x00000080u
513 #define CTL2_SUBJECT_LITERAL             0x00000100u
514 #define CTL2_CALLOUT_NO_WHERE            0x00000200u
515 #define CTL2_CALLOUT_EXTRA               0x00000400u
516 #define CTL2_ALLVECTOR                   0x00000800u
517 
518 #define CTL2_NL_SET                      0x40000000u  /* Informational */
519 #define CTL2_BSR_SET                     0x80000000u  /* Informational */
520 
521 /* These are the matching controls that may be set either on a pattern or on a
522 data line. They are copied from the pattern controls as initial settings for
523 data line controls. Note that CTL_MEMORY is not included here, because it does
524 different things in the two cases. */
525 
526 #define CTL_ALLPD  (CTL_AFTERTEXT|\
527                     CTL_ALLAFTERTEXT|\
528                     CTL_ALLCAPTURES|\
529                     CTL_ALLUSEDTEXT|\
530                     CTL_ALTGLOBAL|\
531                     CTL_GLOBAL|\
532                     CTL_MARK|\
533                     CTL_STARTCHAR|\
534                     CTL_UTF8_INPUT)
535 
536 #define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
537                     CTL2_SUBSTITUTE_EXTENDED|\
538                     CTL2_SUBSTITUTE_LITERAL|\
539                     CTL2_SUBSTITUTE_MATCHED|\
540                     CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
541                     CTL2_SUBSTITUTE_REPLACEMENT_ONLY|\
542                     CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
543                     CTL2_SUBSTITUTE_UNSET_EMPTY|\
544                     CTL2_ALLVECTOR)
545 
546 /* Structures for holding modifier information for patterns and subject strings
547 (data). Fields containing modifiers that can be set either for a pattern or a
548 subject must be at the start and in the same order in both cases so that the
549 same offset in the big table below works for both. */
550 
551 typedef struct patctl {       /* Structure for pattern modifiers. */
552   uint32_t  options;          /* Must be in same position as datctl */
553   uint32_t  control;          /* Must be in same position as datctl */
554   uint32_t  control2;         /* Must be in same position as datctl */
555   uint32_t  jitstack;         /* Must be in same position as datctl */
556    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
557   uint32_t  substitute_skip;  /* Must be in same position as patctl */
558   uint32_t  substitute_stop;  /* Must be in same position as patctl */
559   uint32_t  jit;
560   uint32_t  stackguard_test;
561   uint32_t  tables_id;
562   uint32_t  convert_type;
563   uint32_t  convert_length;
564   uint32_t  convert_glob_escape;
565   uint32_t  convert_glob_separator;
566   uint32_t  regerror_buffsize;
567    uint8_t  locale[LOCALESIZE];
568 } patctl;
569 
570 #define MAXCPYGET 10
571 #define LENCPYGET 64
572 
573 typedef struct datctl {       /* Structure for data line modifiers. */
574   uint32_t  options;          /* Must be in same position as patctl */
575   uint32_t  control;          /* Must be in same position as patctl */
576   uint32_t  control2;         /* Must be in same position as patctl */
577   uint32_t  jitstack;         /* Must be in same position as patctl */
578    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
579   uint32_t  substitute_skip;  /* Must be in same position as patctl */
580   uint32_t  substitute_stop;  /* Must be in same position as patctl */
581   uint32_t  startend[2];
582   uint32_t  cerror[2];
583   uint32_t  cfail[2];
584    int32_t  callout_data;
585    int32_t  copy_numbers[MAXCPYGET];
586    int32_t  get_numbers[MAXCPYGET];
587   uint32_t  oveccount;
588   uint32_t  offset;
589   uint8_t   copy_names[LENCPYGET];
590   uint8_t   get_names[LENCPYGET];
591 } datctl;
592 
593 /* Ids for which context to modify. */
594 
595 enum { CTX_PAT,            /* Active pattern context */
596        CTX_POPPAT,         /* Ditto, for a popped pattern */
597        CTX_DEFPAT,         /* Default pattern context */
598        CTX_DAT,            /* Active data (match) context */
599        CTX_DEFDAT };       /* Default data (match) context */
600 
601 /* Macros to simplify the big table below. */
602 
603 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
604 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
605 #define PO(name) offsetof(patctl, name)
606 #define PD(name) PO(name)
607 #define DO(name) offsetof(datctl, name)
608 
609 /* Table of all long-form modifiers. Must be in collating sequence of modifier
610 name because it is searched by binary chop. */
611 
612 typedef struct modstruct {
613   const char   *name;
614   uint16_t      which;
615   uint16_t      type;
616   uint32_t      value;
617   PCRE2_SIZE    offset;
618 } modstruct;
619 
620 static modstruct modlist[] = {
621   { "aftertext",                   MOD_PNDP, MOD_CTL, CTL_AFTERTEXT,              PO(control) },
622   { "allaftertext",                MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT,           PO(control) },
623   { "allcaptures",                 MOD_PND,  MOD_CTL, CTL_ALLCAPTURES,            PO(control) },
624   { "allow_empty_class",           MOD_PAT,  MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS,    PO(options) },
625   { "allow_surrogate_escapes",     MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
626   { "allusedtext",                 MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT,            PO(control) },
627   { "allvector",                   MOD_PND,  MOD_CTL, CTL2_ALLVECTOR,             PO(control2) },
628   { "alt_bsux",                    MOD_PAT,  MOD_OPT, PCRE2_ALT_BSUX,             PO(options) },
629   { "alt_circumflex",              MOD_PAT,  MOD_OPT, PCRE2_ALT_CIRCUMFLEX,       PO(options) },
630   { "alt_verbnames",               MOD_PAT,  MOD_OPT, PCRE2_ALT_VERBNAMES,        PO(options) },
631   { "altglobal",                   MOD_PND,  MOD_CTL, CTL_ALTGLOBAL,              PO(control) },
632   { "anchored",                    MOD_PD,   MOD_OPT, PCRE2_ANCHORED,             PD(options) },
633   { "auto_callout",                MOD_PAT,  MOD_OPT, PCRE2_AUTO_CALLOUT,         PO(options) },
634   { "bad_escape_is_literal",       MOD_CTC,  MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
635   { "bincode",                     MOD_PAT,  MOD_CTL, CTL_BINCODE,                PO(control) },
636   { "bsr",                         MOD_CTC,  MOD_BSR, 0,                          CO(bsr_convention) },
637   { "callout_capture",             MOD_DAT,  MOD_CTL, CTL_CALLOUT_CAPTURE,        DO(control) },
638   { "callout_data",                MOD_DAT,  MOD_INS, 0,                          DO(callout_data) },
639   { "callout_error",               MOD_DAT,  MOD_IN2, 0,                          DO(cerror) },
640   { "callout_extra",               MOD_DAT,  MOD_CTL, CTL2_CALLOUT_EXTRA,         DO(control2) },
641   { "callout_fail",                MOD_DAT,  MOD_IN2, 0,                          DO(cfail) },
642   { "callout_info",                MOD_PAT,  MOD_CTL, CTL_CALLOUT_INFO,           PO(control) },
643   { "callout_no_where",            MOD_DAT,  MOD_CTL, CTL2_CALLOUT_NO_WHERE,      DO(control2) },
644   { "callout_none",                MOD_DAT,  MOD_CTL, CTL_CALLOUT_NONE,           DO(control) },
645   { "caseless",                    MOD_PATP, MOD_OPT, PCRE2_CASELESS,             PO(options) },
646   { "convert",                     MOD_PAT,  MOD_CON, 0,                          PO(convert_type) },
647   { "convert_glob_escape",         MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_escape) },
648   { "convert_glob_separator",      MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_separator) },
649   { "convert_length",              MOD_PAT,  MOD_INT, 0,                          PO(convert_length) },
650   { "copy",                        MOD_DAT,  MOD_NN,  DO(copy_numbers),           DO(copy_names) },
651   { "copy_matched_subject",        MOD_DAT,  MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
652   { "debug",                       MOD_PAT,  MOD_CTL, CTL_DEBUG,                  PO(control) },
653   { "depth_limit",                 MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },
654   { "dfa",                         MOD_DAT,  MOD_CTL, CTL_DFA,                    DO(control) },
655   { "dfa_restart",                 MOD_DAT,  MOD_OPT, PCRE2_DFA_RESTART,          DO(options) },
656   { "dfa_shortest",                MOD_DAT,  MOD_OPT, PCRE2_DFA_SHORTEST,         DO(options) },
657   { "dollar_endonly",              MOD_PAT,  MOD_OPT, PCRE2_DOLLAR_ENDONLY,       PO(options) },
658   { "dotall",                      MOD_PATP, MOD_OPT, PCRE2_DOTALL,               PO(options) },
659   { "dupnames",                    MOD_PATP, MOD_OPT, PCRE2_DUPNAMES,             PO(options) },
660   { "endanchored",                 MOD_PD,   MOD_OPT, PCRE2_ENDANCHORED,          PD(options) },
661   { "escaped_cr_is_lf",            MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
662   { "expand",                      MOD_PAT,  MOD_CTL, CTL_EXPAND,                 PO(control) },
663   { "extended",                    MOD_PATP, MOD_OPT, PCRE2_EXTENDED,             PO(options) },
664   { "extended_more",               MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE,        PO(options) },
665   { "extra_alt_bsux",              MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALT_BSUX,       CO(extra_options) },
666   { "find_limits",                 MOD_DAT,  MOD_CTL, CTL_FINDLIMITS,             DO(control) },
667   { "firstline",                   MOD_PAT,  MOD_OPT, PCRE2_FIRSTLINE,            PO(options) },
668   { "framesize",                   MOD_PAT,  MOD_CTL, CTL_FRAMESIZE,              PO(control) },
669   { "fullbincode",                 MOD_PAT,  MOD_CTL, CTL_FULLBINCODE,            PO(control) },
670   { "get",                         MOD_DAT,  MOD_NN,  DO(get_numbers),            DO(get_names) },
671   { "getall",                      MOD_DAT,  MOD_CTL, CTL_GETALL,                 DO(control) },
672   { "global",                      MOD_PNDP, MOD_CTL, CTL_GLOBAL,                 PO(control) },
673   { "heap_limit",                  MOD_CTM,  MOD_INT, 0,                          MO(heap_limit) },
674   { "hex",                         MOD_PAT,  MOD_CTL, CTL_HEXPAT,                 PO(control) },
675   { "info",                        MOD_PAT,  MOD_CTL, CTL_INFO,                   PO(control) },
676   { "jit",                         MOD_PAT,  MOD_IND, 7,                          PO(jit) },
677   { "jitfast",                     MOD_PAT,  MOD_CTL, CTL_JITFAST,                PO(control) },
678   { "jitstack",                    MOD_PNDP, MOD_INT, 0,                          PO(jitstack) },
679   { "jitverify",                   MOD_PAT,  MOD_CTL, CTL_JITVERIFY,              PO(control) },
680   { "literal",                     MOD_PAT,  MOD_OPT, PCRE2_LITERAL,              PO(options) },
681   { "locale",                      MOD_PAT,  MOD_STR, LOCALESIZE,                 PO(locale) },
682   { "mark",                        MOD_PNDP, MOD_CTL, CTL_MARK,                   PO(control) },
683   { "match_invalid_utf",           MOD_PAT,  MOD_OPT, PCRE2_MATCH_INVALID_UTF,    PO(options) },
684   { "match_limit",                 MOD_CTM,  MOD_INT, 0,                          MO(match_limit) },
685   { "match_line",                  MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_LINE,     CO(extra_options) },
686   { "match_unset_backref",         MOD_PAT,  MOD_OPT, PCRE2_MATCH_UNSET_BACKREF,  PO(options) },
687   { "match_word",                  MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_WORD,     CO(extra_options) },
688   { "max_pattern_length",          MOD_CTC,  MOD_SIZ, 0,                          CO(max_pattern_length) },
689   { "memory",                      MOD_PD,   MOD_CTL, CTL_MEMORY,                 PD(control) },
690   { "multiline",                   MOD_PATP, MOD_OPT, PCRE2_MULTILINE,            PO(options) },
691   { "never_backslash_c",           MOD_PAT,  MOD_OPT, PCRE2_NEVER_BACKSLASH_C,    PO(options) },
692   { "never_ucp",                   MOD_PAT,  MOD_OPT, PCRE2_NEVER_UCP,            PO(options) },
693   { "never_utf",                   MOD_PAT,  MOD_OPT, PCRE2_NEVER_UTF,            PO(options) },
694   { "newline",                     MOD_CTC,  MOD_NL,  0,                          CO(newline_convention) },
695   { "no_auto_capture",             MOD_PAT,  MOD_OPT, PCRE2_NO_AUTO_CAPTURE,      PO(options) },
696   { "no_auto_possess",             MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS,      PO(options) },
697   { "no_dotstar_anchor",           MOD_PAT,  MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR,    PO(options) },
698   { "no_jit",                      MOD_DAT,  MOD_OPT, PCRE2_NO_JIT,               DO(options) },
699   { "no_start_optimize",           MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE,    PO(options) },
700   { "no_utf_check",                MOD_PD,   MOD_OPT, PCRE2_NO_UTF_CHECK,         PD(options) },
701   { "notbol",                      MOD_DAT,  MOD_OPT, PCRE2_NOTBOL,               DO(options) },
702   { "notempty",                    MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY,             DO(options) },
703   { "notempty_atstart",            MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY_ATSTART,     DO(options) },
704   { "noteol",                      MOD_DAT,  MOD_OPT, PCRE2_NOTEOL,               DO(options) },
705   { "null_context",                MOD_PD,   MOD_CTL, CTL_NULLCONTEXT,            PO(control) },
706   { "offset",                      MOD_DAT,  MOD_INT, 0,                          DO(offset) },
707   { "offset_limit",                MOD_CTM,  MOD_SIZ, 0,                          MO(offset_limit)},
708   { "ovector",                     MOD_DAT,  MOD_INT, 0,                          DO(oveccount) },
709   { "parens_nest_limit",           MOD_CTC,  MOD_INT, 0,                          CO(parens_nest_limit) },
710   { "partial_hard",                MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
711   { "partial_soft",                MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
712   { "ph",                          MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
713   { "posix",                       MOD_PAT,  MOD_CTL, CTL_POSIX,                  PO(control) },
714   { "posix_nosub",                 MOD_PAT,  MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB,  PO(control) },
715   { "posix_startend",              MOD_DAT,  MOD_IN2, 0,                          DO(startend) },
716   { "ps",                          MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
717   { "push",                        MOD_PAT,  MOD_CTL, CTL_PUSH,                   PO(control) },
718   { "pushcopy",                    MOD_PAT,  MOD_CTL, CTL_PUSHCOPY,               PO(control) },
719   { "pushtablescopy",              MOD_PAT,  MOD_CTL, CTL_PUSHTABLESCOPY,         PO(control) },
720   { "recursion_limit",             MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },  /* Obsolete synonym */
721   { "regerror_buffsize",           MOD_PAT,  MOD_INT, 0,                          PO(regerror_buffsize) },
722   { "replace",                     MOD_PND,  MOD_STR, REPLACE_MODSIZE,            PO(replacement) },
723   { "stackguard",                  MOD_PAT,  MOD_INT, 0,                          PO(stackguard_test) },
724   { "startchar",                   MOD_PND,  MOD_CTL, CTL_STARTCHAR,              PO(control) },
725   { "startoffset",                 MOD_DAT,  MOD_INT, 0,                          DO(offset) },
726   { "subject_literal",             MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL,       PO(control2) },
727   { "substitute_callout",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_CALLOUT,    PO(control2) },
728   { "substitute_extended",         MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_EXTENDED,   PO(control2) },
729   { "substitute_literal",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_LITERAL,    PO(control2) },
730   { "substitute_matched",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_MATCHED,    PO(control2) },
731   { "substitute_overflow_length",  MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
732   { "substitute_replacement_only", MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) },
733   { "substitute_skip",             MOD_PND,  MOD_INT, 0,                          PO(substitute_skip) },
734   { "substitute_stop",             MOD_PND,  MOD_INT, 0,                          PO(substitute_stop) },
735   { "substitute_unknown_unset",    MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
736   { "substitute_unset_empty",      MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
737   { "tables",                      MOD_PAT,  MOD_INT, 0,                          PO(tables_id) },
738   { "ucp",                         MOD_PATP, MOD_OPT, PCRE2_UCP,                  PO(options) },
739   { "ungreedy",                    MOD_PAT,  MOD_OPT, PCRE2_UNGREEDY,             PO(options) },
740   { "use_length",                  MOD_PAT,  MOD_CTL, CTL_USE_LENGTH,             PO(control) },
741   { "use_offset_limit",            MOD_PAT,  MOD_OPT, PCRE2_USE_OFFSET_LIMIT,     PO(options) },
742   { "utf",                         MOD_PATP, MOD_OPT, PCRE2_UTF,                  PO(options) },
743   { "utf8_input",                  MOD_PAT,  MOD_CTL, CTL_UTF8_INPUT,             PO(control) },
744   { "zero_terminate",              MOD_DAT,  MOD_CTL, CTL_ZERO_TERMINATE,         DO(control) }
745 };
746 
747 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
748 
749 /* Controls and options that are supported for use with the POSIX interface. */
750 
751 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
752   PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
753   PCRE2_UTF|PCRE2_UNGREEDY)
754 
755 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
756 
757 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
758   CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
759   CTL_POSIX_NOSUB|CTL_USE_LENGTH)
760 
761 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
762 
763 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
764   PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
765 
766 #define POSIX_SUPPORTED_MATCH_CONTROLS  (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
767 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
768 
769 /* Control bits that are not ignored with 'push'. */
770 
771 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
772   CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
773   CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
774   CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
775 
776 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET)
777 
778 /* Controls that apply only at compile time with 'push'. */
779 
780 #define PUSH_COMPILE_ONLY_CONTROLS   CTL_JITVERIFY
781 #define PUSH_COMPILE_ONLY_CONTROLS2  (0)
782 
783 /* Controls that are forbidden with #pop or #popcopy. */
784 
785 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
786   CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
787 
788 /* Pattern controls that are mutually exclusive. At present these are all in
789 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
790 CTL_POSIX, so it doesn't need its own entries. */
791 
792 static uint32_t exclusive_pat_controls[] = {
793   CTL_POSIX    | CTL_PUSH,
794   CTL_POSIX    | CTL_PUSHCOPY,
795   CTL_POSIX    | CTL_PUSHTABLESCOPY,
796   CTL_PUSH     | CTL_PUSHCOPY,
797   CTL_PUSH     | CTL_PUSHTABLESCOPY,
798   CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
799   CTL_EXPAND   | CTL_HEXPAT };
800 
801 /* Data controls that are mutually exclusive. At present these are all in the
802 first control word. */
803 
804 static uint32_t exclusive_dat_controls[] = {
805   CTL_ALLUSEDTEXT | CTL_STARTCHAR,
806   CTL_FINDLIMITS  | CTL_NULLCONTEXT };
807 
808 /* Table of single-character abbreviated modifiers. The index field is
809 initialized to -1, but the first time the modifier is encountered, it is filled
810 in with the index of the full entry in modlist, to save repeated searching when
811 processing multiple test items. This short list is searched serially, so its
812 order does not matter. */
813 
814 typedef struct c1modstruct {
815   const char *fullname;
816   uint32_t    onechar;
817   int         index;
818 } c1modstruct;
819 
820 static c1modstruct c1modlist[] = {
821   { "bincode",         'B',           -1 },
822   { "info",            'I',           -1 },
823   { "global",          'g',           -1 },
824   { "caseless",        'i',           -1 },
825   { "multiline",       'm',           -1 },
826   { "no_auto_capture", 'n',           -1 },
827   { "dotall",          's',           -1 },
828   { "extended",        'x',           -1 }
829 };
830 
831 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
832 
833 /* Table of arguments for the -C command line option. Use macros to make the
834 table itself easier to read. */
835 
836 #if defined SUPPORT_PCRE2_8
837 #define SUPPORT_8 1
838 #endif
839 #if defined SUPPORT_PCRE2_16
840 #define SUPPORT_16 1
841 #endif
842 #if defined SUPPORT_PCRE2_32
843 #define SUPPORT_32 1
844 #endif
845 
846 #ifndef SUPPORT_8
847 #define SUPPORT_8 0
848 #endif
849 #ifndef SUPPORT_16
850 #define SUPPORT_16 0
851 #endif
852 #ifndef SUPPORT_32
853 #define SUPPORT_32 0
854 #endif
855 
856 #ifdef EBCDIC
857 #define SUPPORT_EBCDIC 1
858 #define EBCDIC_NL CHAR_LF
859 #else
860 #define SUPPORT_EBCDIC 0
861 #define EBCDIC_NL 0
862 #endif
863 
864 #ifdef NEVER_BACKSLASH_C
865 #define BACKSLASH_C 0
866 #else
867 #define BACKSLASH_C 1
868 #endif
869 
870 typedef struct coptstruct {
871   const char *name;
872   uint32_t    type;
873   uint32_t    value;
874 } coptstruct;
875 
876 enum { CONF_BSR,
877        CONF_FIX,
878        CONF_FIZ,
879        CONF_INT,
880        CONF_NL
881 };
882 
883 static coptstruct coptlist[] = {
884   { "backslash-C", CONF_FIX, BACKSLASH_C },
885   { "bsr",         CONF_BSR, PCRE2_CONFIG_BSR },
886   { "ebcdic",      CONF_FIX, SUPPORT_EBCDIC },
887   { "ebcdic-nl",   CONF_FIZ, EBCDIC_NL },
888   { "jit",         CONF_INT, PCRE2_CONFIG_JIT },
889   { "linksize",    CONF_INT, PCRE2_CONFIG_LINKSIZE },
890   { "newline",     CONF_NL,  PCRE2_CONFIG_NEWLINE },
891   { "pcre2-16",    CONF_FIX, SUPPORT_16 },
892   { "pcre2-32",    CONF_FIX, SUPPORT_32 },
893   { "pcre2-8",     CONF_FIX, SUPPORT_8 },
894   { "unicode",     CONF_INT, PCRE2_CONFIG_UNICODE }
895 };
896 
897 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
898 
899 #undef SUPPORT_8
900 #undef SUPPORT_16
901 #undef SUPPORT_32
902 #undef SUPPORT_EBCDIC
903 
904 
905 /* ----------------------- Static variables ------------------------ */
906 
907 static FILE *infile;
908 static FILE *outfile;
909 
910 static const void *last_callout_mark;
911 static PCRE2_JIT_STACK *jit_stack = NULL;
912 static size_t jit_stack_size = 0;
913 
914 static BOOL first_callout;
915 static BOOL jit_was_used;
916 static BOOL restrict_for_perl_test = FALSE;
917 static BOOL show_memory = FALSE;
918 
919 static int code_unit_size;                    /* Bytes */
920 static int jitrc;                             /* Return from JIT compile */
921 static int test_mode = DEFAULT_TEST_MODE;
922 static int timeit = 0;
923 static int timeitm = 0;
924 
925 clock_t total_compile_time = 0;
926 clock_t total_jit_compile_time = 0;
927 clock_t total_match_time = 0;
928 
929 static uint32_t dfa_matched;
930 static uint32_t forbid_utf = 0;
931 static uint32_t maxlookbehind;
932 static uint32_t max_oveccount;
933 static uint32_t callout_count;
934 static uint32_t maxcapcount;
935 
936 static uint16_t local_newline_default = 0;
937 
938 static VERSION_TYPE jittarget[VERSION_SIZE];
939 static VERSION_TYPE version[VERSION_SIZE];
940 static VERSION_TYPE uversion[VERSION_SIZE];
941 
942 static patctl def_patctl;
943 static patctl pat_patctl;
944 static datctl def_datctl;
945 static datctl dat_datctl;
946 
947 static void *patstack[PATSTACKSIZE];
948 static int patstacknext = 0;
949 
950 static void *malloclist[MALLOCLISTSIZE];
951 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
952 static uint32_t malloclistptr = 0;
953 
954 #ifdef SUPPORT_PCRE2_8
955 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
956 #endif
957 
958 static int *dfa_workspace = NULL;
959 static const uint8_t *locale_tables = NULL;
960 static const uint8_t *use_tables = NULL;
961 static uint8_t locale_name[32];
962 static uint8_t *tables3 = NULL;         /* For binary-loaded tables */
963 static uint32_t loadtables_length = 0;
964 
965 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
966 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
967 buffer is where all input lines are read. Its size is the same as pbuffer8.
968 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
969 are actually compiled from pbuffer16 or pbuffer32. */
970 
971 static size_t    pbuffer8_size  = 50000;        /* Initial size, bytes */
972 static uint8_t  *pbuffer8 = NULL;
973 static uint8_t  *buffer = NULL;
974 
975 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
976 is cast as needed. For long data lines it grows as necessary. */
977 
978 static size_t dbuffer_size = 1u << 14;    /* Initial size, bytes */
979 static uint8_t *dbuffer = NULL;
980 
981 
982 /* ---------------- Mode-dependent variables -------------------*/
983 
984 #ifdef SUPPORT_PCRE2_8
985 static pcre2_code_8             *compiled_code8;
986 static pcre2_general_context_8  *general_context8, *general_context_copy8;
987 static pcre2_compile_context_8  *pat_context8, *default_pat_context8;
988 static pcre2_convert_context_8  *con_context8, *default_con_context8;
989 static pcre2_match_context_8    *dat_context8, *default_dat_context8;
990 static pcre2_match_data_8       *match_data8;
991 #endif
992 
993 #ifdef SUPPORT_PCRE2_16
994 static pcre2_code_16            *compiled_code16;
995 static pcre2_general_context_16 *general_context16, *general_context_copy16;
996 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
997 static pcre2_convert_context_16 *con_context16, *default_con_context16;
998 static pcre2_match_context_16   *dat_context16, *default_dat_context16;
999 static pcre2_match_data_16      *match_data16;
1000 static PCRE2_SIZE pbuffer16_size = 0;   /* Set only when needed */
1001 static uint16_t *pbuffer16 = NULL;
1002 #endif
1003 
1004 #ifdef SUPPORT_PCRE2_32
1005 static pcre2_code_32            *compiled_code32;
1006 static pcre2_general_context_32 *general_context32, *general_context_copy32;
1007 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
1008 static pcre2_convert_context_32 *con_context32, *default_con_context32;
1009 static pcre2_match_context_32   *dat_context32, *default_dat_context32;
1010 static pcre2_match_data_32      *match_data32;
1011 static PCRE2_SIZE pbuffer32_size = 0;   /* Set only when needed */
1012 static uint32_t *pbuffer32 = NULL;
1013 #endif
1014 
1015 
1016 /* ---------------- Macros that work in all modes ----------------- */
1017 
1018 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
1019 #define SET(x,y) SETOP(x,y,=)
1020 #define SETPLUS(x,y) SETOP(x,y,+=)
1021 #define strlen8(x) strlen((char *)x)
1022 
1023 
1024 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
1025 
1026 /* Define macros for variables and functions that must be selected dynamically
1027 depending on the mode setting (8, 16, 32). These are dependent on which modes
1028 are supported. */
1029 
1030 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
1031      defined (SUPPORT_PCRE2_32)) >= 2
1032 
1033 /* ----- All three modes supported ----- */
1034 
1035 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
1036 
1037 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
1038   (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
1039 
1040 #define CASTVAR(t,x) ( \
1041   (test_mode == PCRE8_MODE)? (t)G(x,8) : \
1042   (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
1043 
1044 #define CODE_UNIT(a,b) ( \
1045   (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1046   (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1047   (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1048 
1049 #define CONCTXCPY(a,b) \
1050   if (test_mode == PCRE8_MODE) \
1051     memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1052   else if (test_mode == PCRE16_MODE) \
1053     memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1054   else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1055 
1056 #define CONVERT_COPY(a,b,c) \
1057   if (test_mode == PCRE8_MODE) \
1058     memcpy(G(a,8),(char *)b,c); \
1059   else if (test_mode == PCRE16_MODE) \
1060     memcpy(G(a,16),(char *)b,(c)*2); \
1061   else if (test_mode == PCRE32_MODE) \
1062     memcpy(G(a,32),(char *)b,(c)*4)
1063 
1064 #define DATCTXCPY(a,b) \
1065   if (test_mode == PCRE8_MODE) \
1066     memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1067   else if (test_mode == PCRE16_MODE) \
1068     memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1069   else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1070 
1071 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1072   (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1073 
1074 #define PATCTXCPY(a,b) \
1075   if (test_mode == PCRE8_MODE) \
1076     memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1077   else if (test_mode == PCRE16_MODE) \
1078     memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1079   else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1080 
1081 #define PCHARS(lv, p, offset, len, utf, f) \
1082   if (test_mode == PCRE32_MODE) \
1083     lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1084   else if (test_mode == PCRE16_MODE) \
1085     lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1086   else \
1087     lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1088 
1089 #define PCHARSV(p, offset, len, utf, f) \
1090   if (test_mode == PCRE32_MODE) \
1091     (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1092   else if (test_mode == PCRE16_MODE) \
1093     (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1094   else \
1095     (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1096 
1097 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1098   if (test_mode == PCRE8_MODE) \
1099      a = pcre2_callout_enumerate_8(compiled_code8, \
1100        (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1101   else if (test_mode == PCRE16_MODE) \
1102      a = pcre2_callout_enumerate_16(compiled_code16, \
1103        (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1104   else \
1105      a = pcre2_callout_enumerate_32(compiled_code32, \
1106        (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1107 
1108 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1109   if (test_mode == PCRE8_MODE) \
1110     G(a,8) = pcre2_code_copy_8(b); \
1111   else if (test_mode == PCRE16_MODE) \
1112     G(a,16) = pcre2_code_copy_16(b); \
1113   else \
1114     G(a,32) = pcre2_code_copy_32(b)
1115 
1116 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1117   if (test_mode == PCRE8_MODE) \
1118     a = (void *)pcre2_code_copy_8(G(b,8)); \
1119   else if (test_mode == PCRE16_MODE) \
1120     a = (void *)pcre2_code_copy_16(G(b,16)); \
1121   else \
1122     a = (void *)pcre2_code_copy_32(G(b,32))
1123 
1124 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1125   if (test_mode == PCRE8_MODE) \
1126     a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1127   else if (test_mode == PCRE16_MODE) \
1128     a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1129   else \
1130     a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1131 
1132 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1133   if (test_mode == PCRE8_MODE) \
1134     G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1135   else if (test_mode == PCRE16_MODE) \
1136     G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1137   else \
1138     G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1139 
1140 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1141   if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1142   else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1143   else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1144 
1145 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1146   if (test_mode == PCRE8_MODE) \
1147     a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1148   else if (test_mode == PCRE16_MODE) \
1149     a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1150   else \
1151     a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1152 
1153 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1154   if (test_mode == PCRE8_MODE) \
1155     r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1156   else if (test_mode == PCRE16_MODE) \
1157     r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1158   else \
1159     r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1160 
1161 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1162   if (test_mode == PCRE8_MODE) \
1163     a = pcre2_get_ovector_count_8(G(b,8)); \
1164   else if (test_mode == PCRE16_MODE) \
1165     a = pcre2_get_ovector_count_16(G(b,16)); \
1166   else \
1167     a = pcre2_get_ovector_count_32(G(b,32))
1168 
1169 #define PCRE2_GET_STARTCHAR(a,b) \
1170   if (test_mode == PCRE8_MODE) \
1171     a = pcre2_get_startchar_8(G(b,8)); \
1172   else if (test_mode == PCRE16_MODE) \
1173     a = pcre2_get_startchar_16(G(b,16)); \
1174   else \
1175     a = pcre2_get_startchar_32(G(b,32))
1176 
1177 #define PCRE2_JIT_COMPILE(r,a,b) \
1178   if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1179   else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1180   else r = pcre2_jit_compile_32(G(a,32),b)
1181 
1182 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1183   if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1184   else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1185   else pcre2_jit_free_unused_memory_32(G(a,32))
1186 
1187 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1188   if (test_mode == PCRE8_MODE) \
1189     a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1190   else if (test_mode == PCRE16_MODE) \
1191     a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1192   else \
1193     a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1194 
1195 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1196   if (test_mode == PCRE8_MODE) \
1197     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1198   else if (test_mode == PCRE16_MODE) \
1199     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1200   else \
1201     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1202 
1203 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1204   if (test_mode == PCRE8_MODE) \
1205     pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1206   else if (test_mode == PCRE16_MODE) \
1207     pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1208   else \
1209     pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1210 
1211 #define PCRE2_JIT_STACK_FREE(a) \
1212   if (test_mode == PCRE8_MODE) \
1213     pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1214   else if (test_mode == PCRE16_MODE) \
1215     pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1216   else \
1217     pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1218 
1219 #define PCRE2_MAKETABLES(a) \
1220   if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1221   else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1222   else a = pcre2_maketables_32(NULL)
1223 
1224 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1225   if (test_mode == PCRE8_MODE) \
1226     a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1227   else if (test_mode == PCRE16_MODE) \
1228     a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1229   else \
1230     a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1231 
1232 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1233   if (test_mode == PCRE8_MODE) \
1234     G(a,8) = pcre2_match_data_create_8(b,c); \
1235   else if (test_mode == PCRE16_MODE) \
1236     G(a,16) = pcre2_match_data_create_16(b,c); \
1237   else \
1238     G(a,32) = pcre2_match_data_create_32(b,c)
1239 
1240 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1241   if (test_mode == PCRE8_MODE) \
1242     G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1243   else if (test_mode == PCRE16_MODE) \
1244     G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1245   else \
1246     G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1247 
1248 #define PCRE2_MATCH_DATA_FREE(a) \
1249   if (test_mode == PCRE8_MODE) \
1250     pcre2_match_data_free_8(G(a,8)); \
1251   else if (test_mode == PCRE16_MODE) \
1252     pcre2_match_data_free_16(G(a,16)); \
1253   else \
1254     pcre2_match_data_free_32(G(a,32))
1255 
1256 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1257   if (test_mode == PCRE8_MODE) \
1258     a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1259   else if (test_mode == PCRE16_MODE) \
1260     a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1261   else \
1262     a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1263 
1264 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1265   if (test_mode == PCRE8_MODE) \
1266     a = pcre2_pattern_info_8(G(b,8),c,d); \
1267   else if (test_mode == PCRE16_MODE) \
1268     a = pcre2_pattern_info_16(G(b,16),c,d); \
1269   else \
1270     a = pcre2_pattern_info_32(G(b,32),c,d)
1271 
1272 #define PCRE2_PRINTINT(a) \
1273   if (test_mode == PCRE8_MODE) \
1274     pcre2_printint_8(compiled_code8,outfile,a); \
1275   else if (test_mode == PCRE16_MODE) \
1276     pcre2_printint_16(compiled_code16,outfile,a); \
1277   else \
1278     pcre2_printint_32(compiled_code32,outfile,a)
1279 
1280 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1281   if (test_mode == PCRE8_MODE) \
1282     r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1283   else if (test_mode == PCRE16_MODE) \
1284     r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1285   else \
1286     r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1287 
1288 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1289   if (test_mode == PCRE8_MODE) \
1290     r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1291   else if (test_mode == PCRE16_MODE) \
1292     r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1293   else \
1294     r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1295 
1296 #define PCRE2_SERIALIZE_FREE(a) \
1297   if (test_mode == PCRE8_MODE) \
1298     pcre2_serialize_free_8(a); \
1299   else if (test_mode == PCRE16_MODE) \
1300     pcre2_serialize_free_16(a); \
1301   else \
1302     pcre2_serialize_free_32(a)
1303 
1304 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1305   if (test_mode == PCRE8_MODE) \
1306     r = pcre2_serialize_get_number_of_codes_8(a); \
1307   else if (test_mode == PCRE16_MODE) \
1308     r = pcre2_serialize_get_number_of_codes_16(a); \
1309   else \
1310     r = pcre2_serialize_get_number_of_codes_32(a); \
1311 
1312 #define PCRE2_SET_CALLOUT(a,b,c) \
1313   if (test_mode == PCRE8_MODE) \
1314     pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1315   else if (test_mode == PCRE16_MODE) \
1316     pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1317   else \
1318     pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1319 
1320 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1321   if (test_mode == PCRE8_MODE) \
1322     pcre2_set_character_tables_8(G(a,8),b); \
1323   else if (test_mode == PCRE16_MODE) \
1324     pcre2_set_character_tables_16(G(a,16),b); \
1325   else \
1326     pcre2_set_character_tables_32(G(a,32),b)
1327 
1328 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1329   if (test_mode == PCRE8_MODE) \
1330     pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1331   else if (test_mode == PCRE16_MODE) \
1332     pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1333   else \
1334     pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1335 
1336 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1337   if (test_mode == PCRE8_MODE) \
1338     pcre2_set_depth_limit_8(G(a,8),b); \
1339   else if (test_mode == PCRE16_MODE) \
1340     pcre2_set_depth_limit_16(G(a,16),b); \
1341   else \
1342     pcre2_set_depth_limit_32(G(a,32),b)
1343 
1344 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1345   if (test_mode == PCRE8_MODE) \
1346     r = pcre2_set_glob_separator_8(G(a,8),b); \
1347   else if (test_mode == PCRE16_MODE) \
1348     r = pcre2_set_glob_separator_16(G(a,16),b); \
1349   else \
1350     r = pcre2_set_glob_separator_32(G(a,32),b)
1351 
1352 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1353   if (test_mode == PCRE8_MODE) \
1354     r = pcre2_set_glob_escape_8(G(a,8),b); \
1355   else if (test_mode == PCRE16_MODE) \
1356     r = pcre2_set_glob_escape_16(G(a,16),b); \
1357   else \
1358     r = pcre2_set_glob_escape_32(G(a,32),b)
1359 
1360 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1361   if (test_mode == PCRE8_MODE) \
1362     pcre2_set_heap_limit_8(G(a,8),b); \
1363   else if (test_mode == PCRE16_MODE) \
1364     pcre2_set_heap_limit_16(G(a,16),b); \
1365   else \
1366     pcre2_set_heap_limit_32(G(a,32),b)
1367 
1368 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1369   if (test_mode == PCRE8_MODE) \
1370     pcre2_set_match_limit_8(G(a,8),b); \
1371   else if (test_mode == PCRE16_MODE) \
1372     pcre2_set_match_limit_16(G(a,16),b); \
1373   else \
1374     pcre2_set_match_limit_32(G(a,32),b)
1375 
1376 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1377   if (test_mode == PCRE8_MODE) \
1378     pcre2_set_max_pattern_length_8(G(a,8),b); \
1379   else if (test_mode == PCRE16_MODE) \
1380     pcre2_set_max_pattern_length_16(G(a,16),b); \
1381   else \
1382     pcre2_set_max_pattern_length_32(G(a,32),b)
1383 
1384 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1385   if (test_mode == PCRE8_MODE) \
1386     pcre2_set_offset_limit_8(G(a,8),b); \
1387   else if (test_mode == PCRE16_MODE) \
1388     pcre2_set_offset_limit_16(G(a,16),b); \
1389   else \
1390     pcre2_set_offset_limit_32(G(a,32),b)
1391 
1392 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1393   if (test_mode == PCRE8_MODE) \
1394     pcre2_set_parens_nest_limit_8(G(a,8),b); \
1395   else if (test_mode == PCRE16_MODE) \
1396     pcre2_set_parens_nest_limit_16(G(a,16),b); \
1397   else \
1398     pcre2_set_parens_nest_limit_32(G(a,32),b)
1399 
1400 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1401   if (test_mode == PCRE8_MODE) \
1402     pcre2_set_substitute_callout_8(G(a,8), \
1403       (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
1404   else if (test_mode == PCRE16_MODE) \
1405     pcre2_set_substitute_callout_16(G(a,16), \
1406       (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
1407   else \
1408     pcre2_set_substitute_callout_32(G(a,32), \
1409       (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
1410 
1411 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1412   if (test_mode == PCRE8_MODE) \
1413     a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
1414       (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1415   else if (test_mode == PCRE16_MODE) \
1416     a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
1417       (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1418   else \
1419     a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
1420       (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1421 
1422 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1423   if (test_mode == PCRE8_MODE) \
1424     a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1425   else if (test_mode == PCRE16_MODE) \
1426     a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1427   else \
1428     a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1429 
1430 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1431   if (test_mode == PCRE8_MODE) \
1432     a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1433   else if (test_mode == PCRE16_MODE) \
1434     a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1435   else \
1436     a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1437 
1438 #define PCRE2_SUBSTRING_FREE(a) \
1439   if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1440   else if (test_mode == PCRE16_MODE) \
1441     pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1442   else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1443 
1444 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1445   if (test_mode == PCRE8_MODE) \
1446     a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1447   else if (test_mode == PCRE16_MODE) \
1448     a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1449   else \
1450     a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1451 
1452 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1453   if (test_mode == PCRE8_MODE) \
1454     a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1455   else if (test_mode == PCRE16_MODE) \
1456     a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1457   else \
1458     a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1459 
1460 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1461   if (test_mode == PCRE8_MODE) \
1462     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1463   else if (test_mode == PCRE16_MODE) \
1464     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1465   else \
1466     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1467 
1468 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1469   if (test_mode == PCRE8_MODE) \
1470     a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1471   else if (test_mode == PCRE16_MODE) \
1472     a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1473   else \
1474     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1475 
1476 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1477   if (test_mode == PCRE8_MODE) \
1478     a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1479   else if (test_mode == PCRE16_MODE) \
1480     a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1481   else \
1482     a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1483 
1484 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1485   if (test_mode == PCRE8_MODE) \
1486     pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1487   else if (test_mode == PCRE16_MODE) \
1488     pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1489   else \
1490     pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1491 
1492 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1493   if (test_mode == PCRE8_MODE) \
1494     a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1495   else if (test_mode == PCRE16_MODE) \
1496     a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1497   else \
1498     a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1499 
1500 #define PTR(x) ( \
1501   (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1502   (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1503   (void *)G(x,32))
1504 
1505 #define SETFLD(x,y,z) \
1506   if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1507   else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1508   else G(x,32)->y = z
1509 
1510 #define SETFLDVEC(x,y,v,z) \
1511   if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1512   else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1513   else G(x,32)->y[v] = z
1514 
1515 #define SETOP(x,y,z) \
1516   if (test_mode == PCRE8_MODE) G(x,8) z y; \
1517   else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1518   else G(x,32) z y
1519 
1520 #define SETCASTPTR(x,y) \
1521   if (test_mode == PCRE8_MODE) \
1522     G(x,8) = (uint8_t *)(y); \
1523   else if (test_mode == PCRE16_MODE) \
1524     G(x,16) = (uint16_t *)(y); \
1525   else \
1526     G(x,32) = (uint32_t *)(y)
1527 
1528 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1529   (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1530   ((int)strlen32((PCRE2_SPTR32)p)))
1531 
1532 #define SUB1(a,b) \
1533   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1534   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1535   else G(a,32)(G(b,32))
1536 
1537 #define SUB2(a,b,c) \
1538   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1539   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1540   else G(a,32)(G(b,32),G(c,32))
1541 
1542 #define TEST(x,r,y) ( \
1543   (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1544   (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1545   (test_mode == PCRE32_MODE && G(x,32) r (y)))
1546 
1547 #define TESTFLD(x,f,r,y) ( \
1548   (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1549   (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1550   (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1551 
1552 
1553 /* ----- Two out of three modes are supported ----- */
1554 
1555 #else
1556 
1557 /* We can use some macro trickery to make a single set of definitions work in
1558 the three different cases. */
1559 
1560 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1561 
1562 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1563 #define BITONE 32
1564 #define BITTWO 16
1565 
1566 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1567 
1568 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1569 #define BITONE 32
1570 #define BITTWO 8
1571 
1572 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1573 
1574 #else
1575 #define BITONE 16
1576 #define BITTWO 8
1577 #endif
1578 
1579 
1580 /* ----- Common macros for two-mode cases ----- */
1581 
1582 #define BYTEONE (BITONE/8)
1583 #define BYTETWO (BITTWO/8)
1584 
1585 #define CASTFLD(t,a,b) \
1586   ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1587     (t)(G(a,BITTWO)->b))
1588 
1589 #define CASTVAR(t,x) ( \
1590   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1591     (t)G(x,BITONE) : (t)G(x,BITTWO))
1592 
1593 #define CODE_UNIT(a,b) ( \
1594   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1595   (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1596   (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1597 
1598 #define CONCTXCPY(a,b) \
1599   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1600     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1601   else \
1602     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1603 
1604 #define CONVERT_COPY(a,b,c) \
1605   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1606   memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1607   memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1608 
1609 #define DATCTXCPY(a,b) \
1610   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1611     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1612   else \
1613     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1614 
1615 #define FLD(a,b) \
1616   ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1617 
1618 #define PATCTXCPY(a,b) \
1619   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1620     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1621   else \
1622     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1623 
1624 #define PCHARS(lv, p, offset, len, utf, f) \
1625   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1626     lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1627   else \
1628     lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1629 
1630 #define PCHARSV(p, offset, len, utf, f) \
1631   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1632     (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1633   else \
1634     (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1635 
1636 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1637   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1638      a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1639        (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1640   else \
1641      a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1642        (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1643 
1644 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1645   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1646     G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1647   else \
1648     G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1649 
1650 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1651   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1652     a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1653   else \
1654     a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1655 
1656 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1657   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1658     a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1659   else \
1660     a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1661 
1662 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1663   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1664     G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1665   else \
1666     G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1667 
1668 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1669   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1670     G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1671   else \
1672     G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1673 
1674 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1675   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1676     a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1677       G(g,BITONE),h,i,j); \
1678   else \
1679     a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1680       G(g,BITTWO),h,i,j)
1681 
1682 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1683   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1684     r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1685   else \
1686     r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1687 
1688 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1689   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1690     a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1691   else \
1692     a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1693 
1694 #define PCRE2_GET_STARTCHAR(a,b) \
1695   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1696     a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1697   else \
1698     a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1699 
1700 #define PCRE2_JIT_COMPILE(r,a,b) \
1701   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1702     r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1703   else \
1704     r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1705 
1706 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1707   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1708     G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1709   else \
1710     G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1711 
1712 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1713   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1714     a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1715       G(g,BITONE),h); \
1716   else \
1717     a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1718       G(g,BITTWO),h)
1719 
1720 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1721   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1722     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1723   else \
1724     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1725 
1726 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1727   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1728     G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1729   else \
1730     G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1731 
1732 #define PCRE2_JIT_STACK_FREE(a) \
1733   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1734     G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1735   else \
1736     G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1737 
1738 #define PCRE2_MAKETABLES(a) \
1739   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1740     a = G(pcre2_maketables_,BITONE)(NULL); \
1741   else \
1742     a = G(pcre2_maketables_,BITTWO)(NULL)
1743 
1744 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1745   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1746     a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1747       G(g,BITONE),h); \
1748   else \
1749     a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1750       G(g,BITTWO),h)
1751 
1752 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1753   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1754     G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1755   else \
1756     G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1757 
1758 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1759   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1760     G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1761   else \
1762     G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1763 
1764 #define PCRE2_MATCH_DATA_FREE(a) \
1765   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1766     G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1767   else \
1768     G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1769 
1770 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1771   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1772     a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1773   else \
1774     a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1775 
1776 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1777   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1778     a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1779   else \
1780     a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1781 
1782 #define PCRE2_PRINTINT(a) \
1783  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1784     G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1785   else \
1786     G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1787 
1788 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1789  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1790     r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1791   else \
1792     r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1793 
1794 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1795  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1796     r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1797   else \
1798     r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1799 
1800 #define PCRE2_SERIALIZE_FREE(a) \
1801  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1802     G(pcre2_serialize_free_,BITONE)(a); \
1803   else \
1804     G(pcre2_serialize_free_,BITTWO)(a)
1805 
1806 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1807  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1808     r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1809   else \
1810     r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1811 
1812 #define PCRE2_SET_CALLOUT(a,b,c) \
1813   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1814     G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1815       (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1816   else \
1817     G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1818       (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1819 
1820 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1821   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1822     G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1823   else \
1824     G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1825 
1826 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1827   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1828     G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1829   else \
1830     G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1831 
1832 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1833   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1834     G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1835   else \
1836     G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1837 
1838 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1839   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1840     r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1841   else \
1842     r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1843 
1844 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1845   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1846     r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1847   else \
1848     r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1849 
1850 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1851   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1852     G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1853   else \
1854     G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1855 
1856 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1857   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1858     G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1859   else \
1860     G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1861 
1862 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1863   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1864     G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1865   else \
1866     G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1867 
1868 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1869   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1870     G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1871   else \
1872     G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1873 
1874 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1875   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1876     G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1877   else \
1878     G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1879 
1880 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1881   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1882     G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
1883       (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
1884   else \
1885     G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
1886       (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
1887 
1888 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1889   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1890     a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1891       G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
1892       (G(PCRE2_UCHAR,BITONE) *)k,l); \
1893   else \
1894     a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1895       G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
1896       (G(PCRE2_UCHAR,BITTWO) *)k,l)
1897 
1898 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1899   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1900     a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1901       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1902   else \
1903     a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1904       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1905 
1906 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1907   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1908     a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1909       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1910   else \
1911     a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1912       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1913 
1914 #define PCRE2_SUBSTRING_FREE(a) \
1915   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1916     G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1917   else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1918 
1919 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1920   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1921     a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1922       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1923   else \
1924     a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1925       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1926 
1927 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1928   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1929     a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1930       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1931   else \
1932     a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1933       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1934 
1935 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1936   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1937     a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1938   else \
1939     a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1940 
1941 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1942   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1943     a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1944   else \
1945     a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1946 
1947 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1948   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1949     a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1950       (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1951   else \
1952     a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1953       (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1954 
1955 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1956   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1957     G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1958   else \
1959     G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1960 
1961 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1962   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1963     a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1964   else \
1965     a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1966 
1967 #define PTR(x) ( \
1968   (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1969   (void *)G(x,BITTWO))
1970 
1971 #define SETFLD(x,y,z) \
1972   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1973   else G(x,BITTWO)->y = z
1974 
1975 #define SETFLDVEC(x,y,v,z) \
1976   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1977   else G(x,BITTWO)->y[v] = z
1978 
1979 #define SETOP(x,y,z) \
1980   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1981   else G(x,BITTWO) z y
1982 
1983 #define SETCASTPTR(x,y) \
1984   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1985     G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1986   else \
1987     G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1988 
1989 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1990   G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1991   G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1992 
1993 #define SUB1(a,b) \
1994   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1995     G(a,BITONE)(G(b,BITONE)); \
1996   else \
1997     G(a,BITTWO)(G(b,BITTWO))
1998 
1999 #define SUB2(a,b,c) \
2000   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2001     G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
2002   else \
2003     G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
2004 
2005 #define TEST(x,r,y) ( \
2006   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
2007   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
2008 
2009 #define TESTFLD(x,f,r,y) ( \
2010   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
2011   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
2012 
2013 
2014 #endif  /* Two out of three modes */
2015 
2016 /* ----- End of cases where more than one mode is supported ----- */
2017 
2018 
2019 /* ----- Only 8-bit mode is supported ----- */
2020 
2021 #elif defined SUPPORT_PCRE2_8
2022 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
2023 #define CASTVAR(t,x) (t)G(x,8)
2024 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
2025 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
2026 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
2027 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
2028 #define FLD(a,b) G(a,8)->b
2029 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
2030 #define PCHARS(lv, p, offset, len, utf, f) \
2031   lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2032 #define PCHARSV(p, offset, len, utf, f) \
2033   (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2034 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2035    a = pcre2_callout_enumerate_8(compiled_code8, \
2036      (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
2037 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
2038 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
2039 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
2040 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2041   G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
2042 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2043   pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
2044 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2045   a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
2046 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2047   r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
2048 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
2049 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
2050 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
2051 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
2052 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2053   a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2054 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2055   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
2056 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2057   pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
2058 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
2059 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
2060 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2061   a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2062 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
2063 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2064   G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
2065 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2066 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2067 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2068 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2069 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2070   r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2071 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2072   r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2073 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2074 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2075   r = pcre2_serialize_get_number_of_codes_8(a)
2076 #define PCRE2_SET_CALLOUT(a,b,c) \
2077   pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2078 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2079 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2080   pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2081 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2082 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2083 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2084 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2085 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2086 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2087 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2088 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2089 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2090   pcre2_set_substitute_callout_8(G(a,8), \
2091     (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
2092 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2093   a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
2094     (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2095 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2096   a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2097 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2098   a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2099 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2100 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2101   a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2102 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2103   a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2104 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2105     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2106 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2107     a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2108 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2109   a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2110 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2111   pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
2112 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2113   a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2114 #define PTR(x) (void *)G(x,8)
2115 #define SETFLD(x,y,z) G(x,8)->y = z
2116 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2117 #define SETOP(x,y,z) G(x,8) z y
2118 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2119 #define STRLEN(p) (int)strlen((char *)p)
2120 #define SUB1(a,b) G(a,8)(G(b,8))
2121 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2122 #define TEST(x,r,y) (G(x,8) r (y))
2123 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2124 
2125 
2126 /* ----- Only 16-bit mode is supported ----- */
2127 
2128 #elif defined SUPPORT_PCRE2_16
2129 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2130 #define CASTVAR(t,x) (t)G(x,16)
2131 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2132 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2133 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2134 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2135 #define FLD(a,b) G(a,16)->b
2136 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2137 #define PCHARS(lv, p, offset, len, utf, f) \
2138   lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2139 #define PCHARSV(p, offset, len, utf, f) \
2140   (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2141 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2142    a = pcre2_callout_enumerate_16(compiled_code16, \
2143      (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2144 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2145 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2146 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2147 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2148   G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
2149 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2150   pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2151 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2152   a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2153 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2154   r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2155 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2156 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2157 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2158 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2159 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2160   a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2161 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2162   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2163 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2164   pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2165 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2166 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
2167 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2168   a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2169 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
2170 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2171   G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
2172 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2173 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2174 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2175 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2176 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2177   r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2178 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2179   r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2180 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2181 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2182   r = pcre2_serialize_get_number_of_codes_16(a)
2183 #define PCRE2_SET_CALLOUT(a,b,c) \
2184   pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2185 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2186 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2187   pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2188 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2189 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2190 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2191 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2192 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2193 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
2194 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2195 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2196 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2197   pcre2_set_substitute_callout_16(G(a,16), \
2198     (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
2199 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2200   a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
2201     (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2202 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2203   a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2204 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2205   a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2206 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2207 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2208   a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2209 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2210   a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2211 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2212     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2213 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2214     a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2215 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2216   a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2217 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2218   pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
2219 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2220   a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2221 #define PTR(x) (void *)G(x,16)
2222 #define SETFLD(x,y,z) G(x,16)->y = z
2223 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2224 #define SETOP(x,y,z) G(x,16) z y
2225 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2226 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2227 #define SUB1(a,b) G(a,16)(G(b,16))
2228 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2229 #define TEST(x,r,y) (G(x,16) r (y))
2230 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2231 
2232 
2233 /* ----- Only 32-bit mode is supported ----- */
2234 
2235 #elif defined SUPPORT_PCRE2_32
2236 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2237 #define CASTVAR(t,x) (t)G(x,32)
2238 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2239 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2240 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2241 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2242 #define FLD(a,b) G(a,32)->b
2243 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2244 #define PCHARS(lv, p, offset, len, utf, f) \
2245   lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2246 #define PCHARSV(p, offset, len, utf, f) \
2247   (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2248 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2249    a = pcre2_callout_enumerate_32(compiled_code32, \
2250      (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2251 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2252 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2253 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2254 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2255   G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2256 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2257   pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2258 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2259   a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2260 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2261   r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2262 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2263 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2264 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2265 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2266 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2267   a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2268 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2269   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2270 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2271   pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2272 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2273 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2274 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2275   a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2276 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2277 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2278   G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2279 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2280 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2281 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2282 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2283 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2284   r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2285 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2286   r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2287 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2288 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2289   r = pcre2_serialize_get_number_of_codes_32(a)
2290 #define PCRE2_SET_CALLOUT(a,b,c) \
2291   pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
2292 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2293 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2294   pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2295 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2296 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2297 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2298 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2299 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2300 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2301 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2302 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2303 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2304   pcre2_set_substitute_callout_32(G(a,32), \
2305     (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
2306 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2307   a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
2308     (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2309 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2310   a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2311 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2312   a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2313 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2314 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2315   a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2316 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2317   a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2318 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2319     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2320 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2321     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2322 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2323   a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2324 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2325   pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2326 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2327   a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2328 #define PTR(x) (void *)G(x,32)
2329 #define SETFLD(x,y,z) G(x,32)->y = z
2330 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2331 #define SETOP(x,y,z) G(x,32) z y
2332 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2333 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2334 #define SUB1(a,b) G(a,32)(G(b,32))
2335 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2336 #define TEST(x,r,y) (G(x,32) r (y))
2337 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2338 
2339 #endif
2340 
2341 /* ----- End of mode-specific function call macros ----- */
2342 
2343 
2344 
2345 
2346 /*************************************************
2347 *         Alternate character tables             *
2348 *************************************************/
2349 
2350 /* By default, the "tables" pointer in the compile context when calling
2351 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2352 library. However, the tables modifier can be used to select alternate sets of
2353 tables, for different kinds of testing. Note that the locale modifier also
2354 adjusts the tables. */
2355 
2356 /* This is the set of tables distributed as default with PCRE2. It recognizes
2357 only ASCII characters. */
2358 
2359 static const uint8_t tables1[] = {
2360 
2361 /* This table is a lower casing table. */
2362 
2363     0,  1,  2,  3,  4,  5,  6,  7,
2364     8,  9, 10, 11, 12, 13, 14, 15,
2365    16, 17, 18, 19, 20, 21, 22, 23,
2366    24, 25, 26, 27, 28, 29, 30, 31,
2367    32, 33, 34, 35, 36, 37, 38, 39,
2368    40, 41, 42, 43, 44, 45, 46, 47,
2369    48, 49, 50, 51, 52, 53, 54, 55,
2370    56, 57, 58, 59, 60, 61, 62, 63,
2371    64, 97, 98, 99,100,101,102,103,
2372   104,105,106,107,108,109,110,111,
2373   112,113,114,115,116,117,118,119,
2374   120,121,122, 91, 92, 93, 94, 95,
2375    96, 97, 98, 99,100,101,102,103,
2376   104,105,106,107,108,109,110,111,
2377   112,113,114,115,116,117,118,119,
2378   120,121,122,123,124,125,126,127,
2379   128,129,130,131,132,133,134,135,
2380   136,137,138,139,140,141,142,143,
2381   144,145,146,147,148,149,150,151,
2382   152,153,154,155,156,157,158,159,
2383   160,161,162,163,164,165,166,167,
2384   168,169,170,171,172,173,174,175,
2385   176,177,178,179,180,181,182,183,
2386   184,185,186,187,188,189,190,191,
2387   192,193,194,195,196,197,198,199,
2388   200,201,202,203,204,205,206,207,
2389   208,209,210,211,212,213,214,215,
2390   216,217,218,219,220,221,222,223,
2391   224,225,226,227,228,229,230,231,
2392   232,233,234,235,236,237,238,239,
2393   240,241,242,243,244,245,246,247,
2394   248,249,250,251,252,253,254,255,
2395 
2396 /* This table is a case flipping table. */
2397 
2398     0,  1,  2,  3,  4,  5,  6,  7,
2399     8,  9, 10, 11, 12, 13, 14, 15,
2400    16, 17, 18, 19, 20, 21, 22, 23,
2401    24, 25, 26, 27, 28, 29, 30, 31,
2402    32, 33, 34, 35, 36, 37, 38, 39,
2403    40, 41, 42, 43, 44, 45, 46, 47,
2404    48, 49, 50, 51, 52, 53, 54, 55,
2405    56, 57, 58, 59, 60, 61, 62, 63,
2406    64, 97, 98, 99,100,101,102,103,
2407   104,105,106,107,108,109,110,111,
2408   112,113,114,115,116,117,118,119,
2409   120,121,122, 91, 92, 93, 94, 95,
2410    96, 65, 66, 67, 68, 69, 70, 71,
2411    72, 73, 74, 75, 76, 77, 78, 79,
2412    80, 81, 82, 83, 84, 85, 86, 87,
2413    88, 89, 90,123,124,125,126,127,
2414   128,129,130,131,132,133,134,135,
2415   136,137,138,139,140,141,142,143,
2416   144,145,146,147,148,149,150,151,
2417   152,153,154,155,156,157,158,159,
2418   160,161,162,163,164,165,166,167,
2419   168,169,170,171,172,173,174,175,
2420   176,177,178,179,180,181,182,183,
2421   184,185,186,187,188,189,190,191,
2422   192,193,194,195,196,197,198,199,
2423   200,201,202,203,204,205,206,207,
2424   208,209,210,211,212,213,214,215,
2425   216,217,218,219,220,221,222,223,
2426   224,225,226,227,228,229,230,231,
2427   232,233,234,235,236,237,238,239,
2428   240,241,242,243,244,245,246,247,
2429   248,249,250,251,252,253,254,255,
2430 
2431 /* This table contains bit maps for various character classes. Each map is 32
2432 bytes long and the bits run from the least significant end of each byte. The
2433 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2434 graph, print, punct, and cntrl. Other classes are built from combinations. */
2435 
2436   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2437   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2438   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2439   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2440 
2441   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2442   0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2443   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2444   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2445 
2446   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2447   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2448   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2449   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2450 
2451   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2452   0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2453   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2454   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2455 
2456   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2457   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2458   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2459   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2460 
2461   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2462   0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2463   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2464   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2465 
2466   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2467   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2468   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2469   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2470 
2471   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2472   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2473   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2474   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2475 
2476   0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2477   0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2478   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2479   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2480 
2481   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2482   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2483   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2484   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2485 
2486 /* This table identifies various classes of character by individual bits:
2487   0x01   white space character
2488   0x02   letter
2489   0x04   decimal digit
2490   0x08   hexadecimal digit
2491   0x10   alphanumeric or '_'
2492   0x80   regular expression metacharacter or binary zero
2493 */
2494 
2495   0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
2496   0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /*   8- 15 */
2497   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
2498   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
2499   0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
2500   0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
2501   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
2502   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
2503   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
2504   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
2505   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
2506   0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
2507   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
2508   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
2509   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
2510   0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
2511   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2512   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2513   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2514   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2515   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2516   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2517   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2518   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2519   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2520   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2521   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2522   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2523   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2524   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2525   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2526   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2527 
2528 /* This is a set of tables that came originally from a Windows user. It seems
2529 to be at least an approximation of ISO 8859. In particular, there are
2530 characters greater than 128 that are marked as spaces, letters, etc. */
2531 
2532 static const uint8_t tables2[] = {
2533 0,1,2,3,4,5,6,7,
2534 8,9,10,11,12,13,14,15,
2535 16,17,18,19,20,21,22,23,
2536 24,25,26,27,28,29,30,31,
2537 32,33,34,35,36,37,38,39,
2538 40,41,42,43,44,45,46,47,
2539 48,49,50,51,52,53,54,55,
2540 56,57,58,59,60,61,62,63,
2541 64,97,98,99,100,101,102,103,
2542 104,105,106,107,108,109,110,111,
2543 112,113,114,115,116,117,118,119,
2544 120,121,122,91,92,93,94,95,
2545 96,97,98,99,100,101,102,103,
2546 104,105,106,107,108,109,110,111,
2547 112,113,114,115,116,117,118,119,
2548 120,121,122,123,124,125,126,127,
2549 128,129,130,131,132,133,134,135,
2550 136,137,138,139,140,141,142,143,
2551 144,145,146,147,148,149,150,151,
2552 152,153,154,155,156,157,158,159,
2553 160,161,162,163,164,165,166,167,
2554 168,169,170,171,172,173,174,175,
2555 176,177,178,179,180,181,182,183,
2556 184,185,186,187,188,189,190,191,
2557 224,225,226,227,228,229,230,231,
2558 232,233,234,235,236,237,238,239,
2559 240,241,242,243,244,245,246,215,
2560 248,249,250,251,252,253,254,223,
2561 224,225,226,227,228,229,230,231,
2562 232,233,234,235,236,237,238,239,
2563 240,241,242,243,244,245,246,247,
2564 248,249,250,251,252,253,254,255,
2565 0,1,2,3,4,5,6,7,
2566 8,9,10,11,12,13,14,15,
2567 16,17,18,19,20,21,22,23,
2568 24,25,26,27,28,29,30,31,
2569 32,33,34,35,36,37,38,39,
2570 40,41,42,43,44,45,46,47,
2571 48,49,50,51,52,53,54,55,
2572 56,57,58,59,60,61,62,63,
2573 64,97,98,99,100,101,102,103,
2574 104,105,106,107,108,109,110,111,
2575 112,113,114,115,116,117,118,119,
2576 120,121,122,91,92,93,94,95,
2577 96,65,66,67,68,69,70,71,
2578 72,73,74,75,76,77,78,79,
2579 80,81,82,83,84,85,86,87,
2580 88,89,90,123,124,125,126,127,
2581 128,129,130,131,132,133,134,135,
2582 136,137,138,139,140,141,142,143,
2583 144,145,146,147,148,149,150,151,
2584 152,153,154,155,156,157,158,159,
2585 160,161,162,163,164,165,166,167,
2586 168,169,170,171,172,173,174,175,
2587 176,177,178,179,180,181,182,183,
2588 184,185,186,187,188,189,190,191,
2589 224,225,226,227,228,229,230,231,
2590 232,233,234,235,236,237,238,239,
2591 240,241,242,243,244,245,246,215,
2592 248,249,250,251,252,253,254,223,
2593 192,193,194,195,196,197,198,199,
2594 200,201,202,203,204,205,206,207,
2595 208,209,210,211,212,213,214,247,
2596 216,217,218,219,220,221,222,255,
2597 0,62,0,0,1,0,0,0,
2598 0,0,0,0,0,0,0,0,
2599 32,0,0,0,1,0,0,0,
2600 0,0,0,0,0,0,0,0,
2601 0,0,0,0,0,0,255,3,
2602 126,0,0,0,126,0,0,0,
2603 0,0,0,0,0,0,0,0,
2604 0,0,0,0,0,0,0,0,
2605 0,0,0,0,0,0,255,3,
2606 0,0,0,0,0,0,0,0,
2607 0,0,0,0,0,0,12,2,
2608 0,0,0,0,0,0,0,0,
2609 0,0,0,0,0,0,0,0,
2610 254,255,255,7,0,0,0,0,
2611 0,0,0,0,0,0,0,0,
2612 255,255,127,127,0,0,0,0,
2613 0,0,0,0,0,0,0,0,
2614 0,0,0,0,254,255,255,7,
2615 0,0,0,0,0,4,32,4,
2616 0,0,0,128,255,255,127,255,
2617 0,0,0,0,0,0,255,3,
2618 254,255,255,135,254,255,255,7,
2619 0,0,0,0,0,4,44,6,
2620 255,255,127,255,255,255,127,255,
2621 0,0,0,0,254,255,255,255,
2622 255,255,255,255,255,255,255,127,
2623 0,0,0,0,254,255,255,255,
2624 255,255,255,255,255,255,255,255,
2625 0,2,0,0,255,255,255,255,
2626 255,255,255,255,255,255,255,127,
2627 0,0,0,0,255,255,255,255,
2628 255,255,255,255,255,255,255,255,
2629 0,0,0,0,254,255,0,252,
2630 1,0,0,248,1,0,0,120,
2631 0,0,0,0,254,255,255,255,
2632 0,0,128,0,0,0,128,0,
2633 255,255,255,255,0,0,0,0,
2634 0,0,0,0,0,0,0,128,
2635 255,255,255,255,0,0,0,0,
2636 0,0,0,0,0,0,0,0,
2637 128,0,0,0,0,0,0,0,
2638 0,1,1,0,1,1,0,0,
2639 0,0,0,0,0,0,0,0,
2640 0,0,0,0,0,0,0,0,
2641 1,0,0,0,128,0,0,0,
2642 128,128,128,128,0,0,128,0,
2643 28,28,28,28,28,28,28,28,
2644 28,28,0,0,0,0,0,128,
2645 0,26,26,26,26,26,26,18,
2646 18,18,18,18,18,18,18,18,
2647 18,18,18,18,18,18,18,18,
2648 18,18,18,128,128,0,128,16,
2649 0,26,26,26,26,26,26,18,
2650 18,18,18,18,18,18,18,18,
2651 18,18,18,18,18,18,18,18,
2652 18,18,18,128,128,0,0,0,
2653 0,0,0,0,0,1,0,0,
2654 0,0,0,0,0,0,0,0,
2655 0,0,0,0,0,0,0,0,
2656 0,0,0,0,0,0,0,0,
2657 1,0,0,0,0,0,0,0,
2658 0,0,18,0,0,0,0,0,
2659 0,0,20,20,0,18,0,0,
2660 0,20,18,0,0,0,0,0,
2661 18,18,18,18,18,18,18,18,
2662 18,18,18,18,18,18,18,18,
2663 18,18,18,18,18,18,18,0,
2664 18,18,18,18,18,18,18,18,
2665 18,18,18,18,18,18,18,18,
2666 18,18,18,18,18,18,18,18,
2667 18,18,18,18,18,18,18,0,
2668 18,18,18,18,18,18,18,18
2669 };
2670 
2671 
2672 
2673 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2674 /*************************************************
2675 *    Emulated memmove() for systems without it   *
2676 *************************************************/
2677 
2678 /* This function can make use of bcopy() if it is available. Otherwise do it by
2679 steam, as there are some non-Unix environments that lack both memmove() and
2680 bcopy(). */
2681 
2682 static void *
emulated_memmove(void * d,const void * s,size_t n)2683 emulated_memmove(void *d, const void *s, size_t n)
2684 {
2685 #ifdef HAVE_BCOPY
2686 bcopy(s, d, n);
2687 return d;
2688 #else
2689 size_t i;
2690 unsigned char *dest = (unsigned char *)d;
2691 const unsigned char *src = (const unsigned char *)s;
2692 if (dest > src)
2693   {
2694   dest += n;
2695   src += n;
2696   for (i = 0; i < n; ++i) *(--dest) = *(--src);
2697   return (void *)dest;
2698   }
2699 else
2700   {
2701   for (i = 0; i < n; ++i) *dest++ = *src++;
2702   return (void *)(dest - n);
2703   }
2704 #endif   /* not HAVE_BCOPY */
2705 }
2706 #undef memmove
2707 #define memmove(d,s,n) emulated_memmove(d,s,n)
2708 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
2709 
2710 
2711 
2712 #ifndef HAVE_STRERROR
2713 /*************************************************
2714 *     Provide strerror() for non-ANSI libraries  *
2715 *************************************************/
2716 
2717 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2718 libraries. They may no longer be around, but just in case, we can try to
2719 provide the same facility by this simple alternative function. */
2720 
2721 extern int   sys_nerr;
2722 extern char *sys_errlist[];
2723 
2724 char *
strerror(int n)2725 strerror(int n)
2726 {
2727 if (n < 0 || n >= sys_nerr) return "unknown error number";
2728 return sys_errlist[n];
2729 }
2730 #endif /* HAVE_STRERROR */
2731 
2732 
2733 
2734 /*************************************************
2735 *            Local memory functions              *
2736 *************************************************/
2737 
2738 /* Alternative memory functions, to test functionality. */
2739 
my_malloc(PCRE2_SIZE size,void * data)2740 static void *my_malloc(PCRE2_SIZE size, void *data)
2741 {
2742 void *block = malloc(size);
2743 (void)data;
2744 if (show_memory)
2745   {
2746   if (block == NULL)
2747     {
2748     fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", SIZ_CAST size);
2749     }
2750   else
2751     {
2752     fprintf(outfile, "malloc  %5" SIZ_FORM, SIZ_CAST size);
2753 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2754     fprintf(outfile, " %p", block);   /* Not portable */
2755 #endif
2756     if (malloclistptr < MALLOCLISTSIZE)
2757       {
2758       malloclist[malloclistptr] = block;
2759       malloclistlength[malloclistptr++] = size;
2760       }
2761     else
2762       fprintf(outfile, " (not remembered)");
2763     fprintf(outfile, "\n");
2764     }
2765   }
2766 return block;
2767 }
2768 
my_free(void * block,void * data)2769 static void my_free(void *block, void *data)
2770 {
2771 (void)data;
2772 if (show_memory)
2773   {
2774   uint32_t i, j;
2775   BOOL found = FALSE;
2776 
2777   fprintf(outfile, "free");
2778   for (i = 0; i < malloclistptr; i++)
2779     {
2780     if (block == malloclist[i])
2781       {
2782       fprintf(outfile, "    %5" SIZ_FORM, SIZ_CAST malloclistlength[i]);
2783       malloclistptr--;
2784       for (j = i; j < malloclistptr; j++)
2785         {
2786         malloclist[j] = malloclist[j+1];
2787         malloclistlength[j] = malloclistlength[j+1];
2788         }
2789       found = TRUE;
2790       break;
2791       }
2792     }
2793   if (!found) fprintf(outfile, " unremembered block");
2794 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2795   fprintf(outfile, " %p", block);  /* Not portable */
2796 #endif
2797   fprintf(outfile, "\n");
2798   }
2799 free(block);
2800 }
2801 
2802 
2803 
2804 /*************************************************
2805 *       Callback function for stack guard        *
2806 *************************************************/
2807 
2808 /* This is set up to be called from pcre2_compile() when the stackguard=n
2809 modifier sets a value greater than zero. The test we do is whether the
2810 parenthesis nesting depth is greater than the value set by the modifier.
2811 
2812 Argument:  the current parenthesis nesting depth
2813 Returns:   non-zero to kill the compilation
2814 */
2815 
2816 static int
stack_guard(uint32_t depth,void * user_data)2817 stack_guard(uint32_t depth, void *user_data)
2818 {
2819 (void)user_data;
2820 return depth > pat_patctl.stackguard_test;
2821 }
2822 
2823 
2824 /*************************************************
2825 *         JIT memory callback                    *
2826 *************************************************/
2827 
2828 static PCRE2_JIT_STACK*
jit_callback(void * arg)2829 jit_callback(void *arg)
2830 {
2831 jit_was_used = TRUE;
2832 return (PCRE2_JIT_STACK *)arg;
2833 }
2834 
2835 
2836 /*************************************************
2837 *      Convert UTF-8 character to code point     *
2838 *************************************************/
2839 
2840 /* This function reads one or more bytes that represent a UTF-8 character,
2841 and returns the codepoint of that character. Note that the function supports
2842 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2843 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2844 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2845 checking, and also for generating 32-bit non-UTF data values above the UTF
2846 limit.
2847 
2848 Argument:
2849   utf8bytes   a pointer to the byte vector
2850   vptr        a pointer to an int to receive the value
2851 
2852 Returns:      >  0 => the number of bytes consumed
2853               -6 to 0 => malformed UTF-8 character at offset = (-return)
2854 */
2855 
2856 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2857 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2858 {
2859 uint32_t c = *utf8bytes++;
2860 uint32_t d = c;
2861 int i, j, s;
2862 
2863 for (i = -1; i < 6; i++)               /* i is number of additional bytes */
2864   {
2865   if ((d & 0x80) == 0) break;
2866   d <<= 1;
2867   }
2868 
2869 if (i == -1) { *vptr = c; return 1; }  /* ascii character */
2870 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
2871 
2872 /* i now has a value in the range 1-5 */
2873 
2874 s = 6*i;
2875 d = (c & utf8_table3[i]) << s;
2876 
2877 for (j = 0; j < i; j++)
2878   {
2879   c = *utf8bytes++;
2880   if ((c & 0xc0) != 0x80) return -(j+1);
2881   s -= 6;
2882   d |= (c & 0x3f) << s;
2883   }
2884 
2885 /* Check that encoding was the correct unique one */
2886 
2887 for (j = 0; j < utf8_table1_size; j++)
2888   if (d <= (uint32_t)utf8_table1[j]) break;
2889 if (j != i) return -(i+1);
2890 
2891 /* Valid value */
2892 
2893 *vptr = d;
2894 return i+1;
2895 }
2896 
2897 
2898 
2899 /*************************************************
2900 *             Print one character                *
2901 *************************************************/
2902 
2903 /* Print a single character either literally, or as a hex escape, and count how
2904 many printed characters are used.
2905 
2906 Arguments:
2907   c            the character
2908   utf          TRUE in UTF mode
2909   f            the FILE to print to, or NULL just to count characters
2910 
2911 Returns:       number of characters written
2912 */
2913 
2914 static int
pchar(uint32_t c,BOOL utf,FILE * f)2915 pchar(uint32_t c, BOOL utf, FILE *f)
2916 {
2917 int n = 0;
2918 char tempbuffer[16];
2919 
2920 if (PRINTOK(c))
2921   {
2922   if (f != NULL) fprintf(f, "%c", c);
2923   return 1;
2924   }
2925 
2926 if (c < 0x100)
2927   {
2928   if (utf)
2929     {
2930     if (f != NULL) fprintf(f, "\\x{%02x}", c);
2931     return 6;
2932     }
2933   else
2934     {
2935     if (f != NULL) fprintf(f, "\\x%02x", c);
2936     return 4;
2937     }
2938   }
2939 
2940 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2941   else n = sprintf(tempbuffer, "\\x{%02x}", c);
2942 
2943 return n >= 0 ? n : 0;
2944 }
2945 
2946 
2947 
2948 #ifdef SUPPORT_PCRE2_16
2949 /*************************************************
2950 *    Find length of 0-terminated 16-bit string   *
2951 *************************************************/
2952 
strlen16(PCRE2_SPTR16 p)2953 static size_t strlen16(PCRE2_SPTR16 p)
2954 {
2955 PCRE2_SPTR16 pp = p;
2956 while (*pp != 0) pp++;
2957 return (int)(pp - p);
2958 }
2959 #endif  /* SUPPORT_PCRE2_16 */
2960 
2961 
2962 
2963 #ifdef SUPPORT_PCRE2_32
2964 /*************************************************
2965 *    Find length of 0-terminated 32-bit string   *
2966 *************************************************/
2967 
strlen32(PCRE2_SPTR32 p)2968 static size_t strlen32(PCRE2_SPTR32 p)
2969 {
2970 PCRE2_SPTR32 pp = p;
2971 while (*pp != 0) pp++;
2972 return (int)(pp - p);
2973 }
2974 #endif  /* SUPPORT_PCRE2_32 */
2975 
2976 
2977 #ifdef SUPPORT_PCRE2_8
2978 /*************************************************
2979 *         Print 8-bit character string           *
2980 *************************************************/
2981 
2982 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2983 For printing *MARK strings, a negative length is given, indicating that the
2984 length is in the first code unit. If handed a NULL file, this function just
2985 counts chars without printing (because pchar() does that). */
2986 
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)2987 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2988 {
2989 uint32_t c = 0;
2990 int yield = 0;
2991 if (length < 0) length = *p++;
2992 while (length-- > 0)
2993   {
2994   if (utf)
2995     {
2996     int rc = utf82ord(p, &c);
2997     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
2998       {
2999       length -= rc - 1;
3000       p += rc;
3001       yield += pchar(c, utf, f);
3002       continue;
3003       }
3004     }
3005   c = *p++;
3006   yield += pchar(c, utf, f);
3007   }
3008 
3009 return yield;
3010 }
3011 #endif
3012 
3013 
3014 #ifdef SUPPORT_PCRE2_16
3015 /*************************************************
3016 *           Print 16-bit character string        *
3017 *************************************************/
3018 
3019 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
3020 For printing *MARK strings, a negative length is given, indicating that the
3021 length is in the first code unit. If handed a NULL file, just counts chars
3022 without printing. */
3023 
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)3024 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
3025 {
3026 int yield = 0;
3027 if (length < 0) length = *p++;
3028 while (length-- > 0)
3029   {
3030   uint32_t c = *p++ & 0xffff;
3031   if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
3032     {
3033     int d = *p & 0xffff;
3034     if (d >= 0xDC00 && d <= 0xDFFF)
3035       {
3036       c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
3037       length--;
3038       p++;
3039       }
3040     }
3041   yield += pchar(c, utf, f);
3042   }
3043 return yield;
3044 }
3045 #endif  /* SUPPORT_PCRE2_16 */
3046 
3047 
3048 
3049 #ifdef SUPPORT_PCRE2_32
3050 /*************************************************
3051 *           Print 32-bit character string        *
3052 *************************************************/
3053 
3054 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
3055 For printing *MARK strings, a negative length is given, indicating that the
3056 length is in the first code unit. If handed a NULL file, just counts chars
3057 without printing. */
3058 
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)3059 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
3060 {
3061 int yield = 0;
3062 (void)(utf);  /* Avoid compiler warning */
3063 if (length < 0) length = *p++;
3064 while (length-- > 0)
3065   {
3066   uint32_t c = *p++;
3067   yield += pchar(c, utf, f);
3068   }
3069 return yield;
3070 }
3071 #endif  /* SUPPORT_PCRE2_32 */
3072 
3073 
3074 
3075 
3076 /*************************************************
3077 *       Convert character value to UTF-8         *
3078 *************************************************/
3079 
3080 /* This function takes an integer value in the range 0 - 0x7fffffff
3081 and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
3082 8-bit library is not supported, to generate UTF-8 output for non-ASCII
3083 characters.
3084 
3085 Arguments:
3086   cvalue     the character value
3087   utf8bytes  pointer to buffer for result - at least 6 bytes long
3088 
3089 Returns:     number of characters placed in the buffer
3090 */
3091 
3092 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3093 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3094 {
3095 int i, j;
3096 if (cvalue > 0x7fffffffu)
3097   return -1;
3098 for (i = 0; i < utf8_table1_size; i++)
3099   if (cvalue <= (uint32_t)utf8_table1[i]) break;
3100 utf8bytes += i;
3101 for (j = i; j > 0; j--)
3102  {
3103  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3104  cvalue >>= 6;
3105  }
3106 *utf8bytes = utf8_table2[i] | cvalue;
3107 return i + 1;
3108 }
3109 
3110 
3111 
3112 #ifdef SUPPORT_PCRE2_16
3113 /*************************************************
3114 *           Convert string to 16-bit             *
3115 *************************************************/
3116 
3117 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3118 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3119 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3120 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3121 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3122 greater than 0xffff.
3123 
3124 If all the input bytes are ASCII, the space needed for a 16-bit string is
3125 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3126 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3127 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3128 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3129 save repeated re-sizing.
3130 
3131 Note that this function does not object to surrogate values. This is
3132 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3133 for the purpose of testing that they are correctly faulted.
3134 
3135 Arguments:
3136   p          points to a byte string
3137   utf        true in UTF mode
3138   lenptr     points to number of bytes in the string (excluding trailing zero)
3139 
3140 Returns:     0 on success, with the length updated to the number of 16-bit
3141                data items used (excluding the trailing zero)
3142              OR -1 if a UTF-8 string is malformed
3143              OR -2 if a value > 0x10ffff is encountered in UTF mode
3144              OR -3 if a value > 0xffff is encountered when not in UTF mode
3145 */
3146 
3147 static PCRE2_SIZE
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3148 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3149 {
3150 uint16_t *pp;
3151 PCRE2_SIZE len = *lenptr;
3152 
3153 if (pbuffer16_size < 2*len + 2)
3154   {
3155   if (pbuffer16 != NULL) free(pbuffer16);
3156   pbuffer16_size = 2*len + 2;
3157   if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3158   pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3159   if (pbuffer16 == NULL)
3160     {
3161     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3162       SIZ_CAST pbuffer16_size);
3163     exit(1);
3164     }
3165   }
3166 
3167 pp = pbuffer16;
3168 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3169   {
3170   for (; len > 0; len--) *pp++ = *p++;
3171   }
3172 else while (len > 0)
3173   {
3174   uint32_t c;
3175   int chlen = utf82ord(p, &c);
3176   if (chlen <= 0) return -1;
3177   if (!utf && c > 0xffff) return -3;
3178   if (c > 0x10ffff) return -2;
3179   p += chlen;
3180   len -= chlen;
3181   if (c < 0x10000) *pp++ = c; else
3182     {
3183     c -= 0x10000;
3184     *pp++ = 0xD800 | (c >> 10);
3185     *pp++ = 0xDC00 | (c & 0x3ff);
3186     }
3187   }
3188 
3189 *pp = 0;
3190 *lenptr = pp - pbuffer16;
3191 return 0;
3192 }
3193 #endif
3194 
3195 
3196 
3197 #ifdef SUPPORT_PCRE2_32
3198 /*************************************************
3199 *           Convert string to 32-bit             *
3200 *************************************************/
3201 
3202 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3203 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3204 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3205 limit of 0x10ffff cause an error.
3206 
3207 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3208 is set, and no limit is imposed. There is special interpretation of the 0xff
3209 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3210 next character to be set. This provides a way of generating 32-bit characters
3211 greater than 0x7fffffff.
3212 
3213 If all the input bytes are ASCII, the space needed for a 32-bit string is
3214 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3215 string is no more than four times, because the number of characters must be
3216 less than the number of bytes. The result is always left in pbuffer32. Impose a
3217 minimum size to save repeated re-sizing.
3218 
3219 Note that this function does not object to surrogate values. This is
3220 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3221 for the purpose of testing that they are correctly faulted.
3222 
3223 Arguments:
3224   p          points to a byte string
3225   utf        true in UTF mode
3226   lenptr     points to number of bytes in the string (excluding trailing zero)
3227 
3228 Returns:     0 on success, with the length updated to the number of 32-bit
3229                data items used (excluding the trailing zero)
3230              OR -1 if a UTF-8 string is malformed
3231              OR -2 if a value > 0x10ffff is encountered in UTF mode
3232 */
3233 
3234 static PCRE2_SIZE
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3235 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3236 {
3237 uint32_t *pp;
3238 PCRE2_SIZE len = *lenptr;
3239 
3240 if (pbuffer32_size < 4*len + 4)
3241   {
3242   if (pbuffer32 != NULL) free(pbuffer32);
3243   pbuffer32_size = 4*len + 4;
3244   if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3245   pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3246   if (pbuffer32 == NULL)
3247     {
3248     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3249       SIZ_CAST pbuffer32_size);
3250     exit(1);
3251     }
3252   }
3253 
3254 pp = pbuffer32;
3255 
3256 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3257   {
3258   for (; len > 0; len--) *pp++ = *p++;
3259   }
3260 
3261 else while (len > 0)
3262   {
3263   int chlen;
3264   uint32_t c;
3265   uint32_t topbit = 0;
3266   if (!utf && *p == 0xff && len > 1)
3267     {
3268     topbit = 0x80000000u;
3269     p++;
3270     len--;
3271     }
3272   chlen = utf82ord(p, &c);
3273   if (chlen <= 0) return -1;
3274   if (utf && c > 0x10ffff) return -2;
3275   p += chlen;
3276   len -= chlen;
3277   *pp++ = c | topbit;
3278   }
3279 
3280 *pp = 0;
3281 *lenptr = pp - pbuffer32;
3282 return 0;
3283 }
3284 #endif /* SUPPORT_PCRE2_32 */
3285 
3286 
3287 
3288 /* This function is no longer used. Keep it around for a while, just in case it
3289 needs to be re-instated. */
3290 
3291 #ifdef NEVERNEVERNEVER
3292 
3293 /*************************************************
3294 *         Move back by so many characters        *
3295 *************************************************/
3296 
3297 /* Given a code unit offset in a subject string, move backwards by a number of
3298 characters, and return the resulting offset.
3299 
3300 Arguments:
3301   subject   pointer to the string
3302   offset    start offset
3303   count     count to move back by
3304   utf       TRUE if in UTF mode
3305 
3306 Returns:   a possibly changed offset
3307 */
3308 
3309 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3310 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3311 {
3312 if (!utf || test_mode == PCRE32_MODE)
3313   return (count >= offset)? 0 : (offset - count);
3314 
3315 else if (test_mode == PCRE8_MODE)
3316   {
3317   PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3318   for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3319     {
3320     pp--;
3321     while ((*pp & 0xc0) == 0x80) pp--;
3322     }
3323   return pp - (PCRE2_SPTR8)subject;
3324   }
3325 
3326 else  /* 16-bit mode */
3327   {
3328   PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3329   for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3330     {
3331     pp--;
3332     if ((*pp & 0xfc00) == 0xdc00) pp--;
3333     }
3334   return pp - (PCRE2_SPTR16)subject;
3335   }
3336 }
3337 #endif  /* NEVERNEVERNEVER */
3338 
3339 
3340 
3341 /*************************************************
3342 *           Expand input buffers                 *
3343 *************************************************/
3344 
3345 /* This function doubles the size of the input buffer and the buffer for
3346 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3347 the new ones.
3348 
3349 Arguments: none
3350 Returns:   nothing (aborts if malloc() fails)
3351 */
3352 
3353 static void
expand_input_buffers(void)3354 expand_input_buffers(void)
3355 {
3356 int new_pbuffer8_size = 2*pbuffer8_size;
3357 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3358 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3359 
3360 if (new_buffer == NULL || new_pbuffer8 == NULL)
3361   {
3362   fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3363   exit(1);
3364   }
3365 
3366 memcpy(new_buffer, buffer, pbuffer8_size);
3367 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3368 
3369 pbuffer8_size = new_pbuffer8_size;
3370 
3371 free(buffer);
3372 free(pbuffer8);
3373 
3374 buffer = new_buffer;
3375 pbuffer8 = new_pbuffer8;
3376 }
3377 
3378 
3379 
3380 /*************************************************
3381 *        Read or extend an input line            *
3382 *************************************************/
3383 
3384 /* Input lines are read into buffer, but both patterns and data lines can be
3385 continued over multiple input lines. In addition, if the buffer fills up, we
3386 want to automatically expand it so as to be able to handle extremely large
3387 lines that are needed for certain stress tests, although this is less likely
3388 now that there are repetition features for both patterns and data. When the
3389 input buffer is expanded, the other two buffers must also be expanded likewise,
3390 and the contents of pbuffer, which are a copy of the input for callouts, must
3391 be preserved (for when expansion happens for a data line). This is not the most
3392 optimal way of handling this, but hey, this is just a test program!
3393 
3394 Arguments:
3395   f            the file to read
3396   start        where in buffer to start (this *must* be within buffer)
3397   prompt       for stdin or readline()
3398 
3399 Returns:       pointer to the start of new data
3400                could be a copy of start, or could be moved
3401                NULL if no data read and EOF reached
3402 */
3403 
3404 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3405 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3406 {
3407 uint8_t *here = start;
3408 
3409 for (;;)
3410   {
3411   size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3412 
3413   if (rlen > 1000)
3414     {
3415     size_t dlen;
3416 
3417     /* If libreadline or libedit support is required, use readline() to read a
3418     line if the input is a terminal. Note that readline() removes the trailing
3419     newline, so we must put it back again, to be compatible with fgets(). */
3420 
3421 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3422     if (INTERACTIVE(f))
3423       {
3424       size_t len;
3425       char *s = readline(prompt);
3426       if (s == NULL) return (here == start)? NULL : start;
3427       len = strlen(s);
3428       if (len > 0) add_history(s);
3429       if (len > rlen - 1) len = rlen - 1;
3430       memcpy(here, s, len);
3431       here[len] = '\n';
3432       here[len+1] = 0;
3433       free(s);
3434       }
3435     else
3436 #endif
3437 
3438     /* Read the next line by normal means, prompting if the file is a tty. */
3439 
3440       {
3441       if (INTERACTIVE(f)) printf("%s", prompt);
3442       if (fgets((char *)here, rlen,  f) == NULL)
3443         return (here == start)? NULL : start;
3444       }
3445 
3446     dlen = strlen((char *)here);
3447     here += dlen;
3448 
3449     /* Check for end of line reached. Take care not to read data from before
3450     start (dlen will be zero for a file starting with a binary zero). */
3451 
3452     if (here > start && here[-1] == '\n') return start;
3453 
3454     /* If we have not read a newline when reading a file, we have either filled
3455     the buffer or reached the end of the file. We can detect the former by
3456     checking that the string fills the buffer, and the latter by feof(). If
3457     neither of these is true, it means we read a binary zero which has caused
3458     strlen() to give a short length. This is a hard error because pcre2test
3459     expects to work with C strings. */
3460 
3461     if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3462       {
3463       fprintf(outfile, "** Binary zero encountered in input\n");
3464       fprintf(outfile, "** pcre2test run abandoned\n");
3465       exit(1);
3466       }
3467     }
3468 
3469   else
3470     {
3471     size_t start_offset = start - buffer;
3472     size_t here_offset = here - buffer;
3473     expand_input_buffers();
3474     start = buffer + start_offset;
3475     here = buffer + here_offset;
3476     }
3477   }
3478 
3479 /* Control never gets here */
3480 }
3481 
3482 
3483 
3484 /*************************************************
3485 *         Case-independent strncmp() function    *
3486 *************************************************/
3487 
3488 /*
3489 Arguments:
3490   s         first string
3491   t         second string
3492   n         number of characters to compare
3493 
3494 Returns:    < 0, = 0, or > 0, according to the comparison
3495 */
3496 
3497 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3498 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3499 {
3500 while (n--)
3501   {
3502   int c = tolower(*s++) - tolower(*t++);
3503   if (c != 0) return c;
3504   }
3505 return 0;
3506 }
3507 
3508 
3509 
3510 /*************************************************
3511 *          Scan the main modifier list           *
3512 *************************************************/
3513 
3514 /* This function searches the modifier list for a long modifier name.
3515 
3516 Argument:
3517   p         start of the name
3518   lenp      length of the name
3519 
3520 Returns:    an index in the modifier list, or -1 on failure
3521 */
3522 
3523 static int
scan_modifiers(const uint8_t * p,unsigned int len)3524 scan_modifiers(const uint8_t *p, unsigned int len)
3525 {
3526 int bot = 0;
3527 int top = MODLISTCOUNT;
3528 
3529 while (top > bot)
3530   {
3531   int mid = (bot + top)/2;
3532   unsigned int mlen = strlen(modlist[mid].name);
3533   int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3534   if (c == 0)
3535     {
3536     if (len == mlen) return mid;
3537     c = (int)len - (int)mlen;
3538     }
3539   if (c > 0) bot = mid + 1; else top = mid;
3540   }
3541 
3542 return -1;
3543 
3544 }
3545 
3546 
3547 
3548 /*************************************************
3549 *        Check a modifer and find its field      *
3550 *************************************************/
3551 
3552 /* This function is called when a modifier has been identified. We check that
3553 it is allowed here and find the field that is to be changed.
3554 
3555 Arguments:
3556   m          the modifier list entry
3557   ctx        CTX_PAT     => pattern context
3558              CTX_POPPAT  => pattern context for popped pattern
3559              CTX_DEFPAT  => default pattern context
3560              CTX_DAT     => data context
3561              CTX_DEFDAT  => default data context
3562   pctl       point to pattern control block
3563   dctl       point to data control block
3564   c          a single character or 0
3565 
3566 Returns:     a field pointer or NULL
3567 */
3568 
3569 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3570 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3571 {
3572 void *field = NULL;
3573 PCRE2_SIZE offset = m->offset;
3574 
3575 if (restrict_for_perl_test) switch(m->which)
3576   {
3577   case MOD_PNDP:
3578   case MOD_PATP:
3579   case MOD_PDP:
3580   break;
3581 
3582   default:
3583   fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3584     m->name);
3585   return NULL;
3586   }
3587 
3588 switch (m->which)
3589   {
3590   case MOD_CTC:  /* Compile context modifier */
3591   if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3592     else if (ctx == CTX_PAT) field = PTR(pat_context);
3593   break;
3594 
3595   case MOD_CTM:  /* Match context modifier */
3596   if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3597     else if (ctx == CTX_DAT) field = PTR(dat_context);
3598   break;
3599 
3600   case MOD_DAT:  /* Data line modifier */
3601   if (dctl != NULL) field = dctl;
3602   break;
3603 
3604   case MOD_PAT:    /* Pattern modifier */
3605   case MOD_PATP:   /* Allowed for Perl test */
3606   if (pctl != NULL) field = pctl;
3607   break;
3608 
3609   case MOD_PD:   /* Pattern or data line modifier */
3610   case MOD_PDP:  /* Ditto, allowed for Perl test */
3611   case MOD_PND:  /* Ditto, but not default pattern */
3612   case MOD_PNDP: /* Ditto, allowed for Perl test */
3613   if (dctl != NULL) field = dctl;
3614     else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3615              ctx != CTX_DEFPAT))
3616       field = pctl;
3617   break;
3618   }
3619 
3620 if (field == NULL)
3621   {
3622   if (c == 0)
3623     fprintf(outfile, "** '%s' is not valid here\n", m->name);
3624   else
3625     fprintf(outfile, "** /%c is not valid here\n", c);
3626   return NULL;
3627   }
3628 
3629 return (char *)field + offset;
3630 }
3631 
3632 
3633 
3634 /*************************************************
3635 *            Decode a modifier list              *
3636 *************************************************/
3637 
3638 /* A pointer to a control block is NULL when called in cases when that block is
3639 not relevant. They are never all relevant in one call. At least one of patctl
3640 and datctl is NULL. The second argument specifies which context to use for
3641 modifiers that apply to contexts.
3642 
3643 Arguments:
3644   p          point to modifier string
3645   ctx        CTX_PAT     => pattern context
3646              CTX_POPPAT  => pattern context for popped pattern
3647              CTX_DEFPAT  => default pattern context
3648              CTX_DAT     => data context
3649              CTX_DEFDAT  => default data context
3650   pctl       point to pattern control block
3651   dctl       point to data control block
3652 
3653 Returns: TRUE if successful decode, FALSE otherwise
3654 */
3655 
3656 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3657 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3658 {
3659 uint8_t *ep, *pp;
3660 long li;
3661 unsigned long uli;
3662 BOOL first = TRUE;
3663 
3664 for (;;)
3665   {
3666   void *field;
3667   modstruct *m;
3668   BOOL off = FALSE;
3669   unsigned int i, len;
3670   int index;
3671   char *endptr;
3672 
3673   /* Skip white space and commas. */
3674 
3675   while (isspace(*p) || *p == ',') p++;
3676   if (*p == 0) break;
3677 
3678   /* Find the end of the item; lose trailing whitespace at end of line. */
3679 
3680   for (ep = p; *ep != 0 && *ep != ','; ep++);
3681   if (*ep == 0)
3682     {
3683     while (ep > p && isspace(ep[-1])) ep--;
3684     *ep = 0;
3685     }
3686 
3687   /* Remember if the first character is '-'. */
3688 
3689   if (*p == '-')
3690     {
3691     off = TRUE;
3692     p++;
3693     }
3694 
3695   /* Find the length of a full-length modifier name, and scan for it. */
3696 
3697   pp = p;
3698   while (pp < ep && *pp != '=') pp++;
3699   index = scan_modifiers(p, pp - p);
3700 
3701   /* If the first modifier is unrecognized, try to interpret it as a sequence
3702   of single-character abbreviated modifiers. None of these modifiers have any
3703   associated data. They just set options or control bits. */
3704 
3705   if (index < 0)
3706     {
3707     uint32_t cc;
3708     uint8_t *mp = p;
3709 
3710     if (!first)
3711       {
3712       fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3713       if (ep - p == 1)
3714         fprintf(outfile, "** Single-character modifiers must come first\n");
3715       return FALSE;
3716       }
3717 
3718     for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3719       {
3720       for (i = 0; i < C1MODLISTCOUNT; i++)
3721         if (cc == c1modlist[i].onechar) break;
3722 
3723       if (i >= C1MODLISTCOUNT)
3724         {
3725         fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3726           *p, (int)(ep-mp), mp);
3727         return FALSE;
3728         }
3729 
3730       if (c1modlist[i].index >= 0)
3731         {
3732         index = c1modlist[i].index;
3733         }
3734 
3735       else
3736         {
3737         index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3738           strlen(c1modlist[i].fullname));
3739         if (index < 0)
3740           {
3741           fprintf(outfile, "** Internal error: single-character equivalent "
3742             "modifier '%s' not found\n", c1modlist[i].fullname);
3743           return FALSE;
3744           }
3745         c1modlist[i].index = index;     /* Cache for next time */
3746         }
3747 
3748       field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3749       if (field == NULL) return FALSE;
3750 
3751       /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3752       PCRE2_EXTENDED_MORE. */
3753 
3754       if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3755         {
3756         *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3757         *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3758         }
3759       else
3760         *((uint32_t *)field) |= modlist[index].value;
3761       }
3762 
3763     continue;    /* With tne next (fullname) modifier */
3764     }
3765 
3766   /* We have a match on a full-name modifier. Check for the existence of data
3767   when needed. */
3768 
3769   m = modlist + index;      /* Save typing */
3770   if (m->type != MOD_CTL && m->type != MOD_OPT &&
3771       (m->type != MOD_IND || *pp == '='))
3772     {
3773     if (*pp++ != '=')
3774       {
3775       fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3776       return FALSE;
3777       }
3778     if (off)
3779       {
3780       fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3781       return FALSE;
3782       }
3783     }
3784 
3785   /* These on/off types have no data. */
3786 
3787   else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3788     {
3789     fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3790     return FALSE;
3791     }
3792 
3793   /* Set the data length for those types that have data. Then find the field
3794   that is to be set. If check_modifier() returns NULL, it has already output an
3795   error message. */
3796 
3797   len = ep - pp;
3798   field = check_modifier(m, ctx, pctl, dctl, 0);
3799   if (field == NULL) return FALSE;
3800 
3801   /* Process according to data type. */
3802 
3803   switch (m->type)
3804     {
3805     case MOD_CTL:
3806     case MOD_OPT:
3807     if (off) *((uint32_t *)field) &= ~m->value;
3808       else *((uint32_t *)field) |= m->value;
3809     break;
3810 
3811     case MOD_BSR:
3812     if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3813       {
3814 #ifdef BSR_ANYCRLF
3815       *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3816 #else
3817       *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3818 #endif
3819       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3820         else dctl->control2 &= ~CTL2_BSR_SET;
3821       }
3822     else
3823       {
3824       if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3825         *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3826       else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3827         *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3828       else goto INVALID_VALUE;
3829       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3830         else dctl->control2 |= CTL2_BSR_SET;
3831       }
3832     pp = ep;
3833     break;
3834 
3835     case MOD_CHR:  /* A single character */
3836     *((uint32_t *)field) = *pp++;
3837     break;
3838 
3839     case MOD_CON:  /* A convert type/options list */
3840     for (;; pp++)
3841       {
3842       uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3843       len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3844       for (i = 0; i < convertlistcount; i++)
3845         {
3846         if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3847           {
3848           if (*((uint32_t *)field) == CONVERT_UNSET)
3849             *((uint32_t *)field) = convertlist[i].option;
3850           else
3851             *((uint32_t *)field) |= convertlist[i].option;
3852           break;
3853           }
3854         }
3855       if (i >= convertlistcount) goto INVALID_VALUE;
3856       pp += len;
3857       if (*pp != ':') break;
3858       }
3859     break;
3860 
3861     case MOD_IN2:    /* One or two unsigned integers */
3862     if (!isdigit(*pp)) goto INVALID_VALUE;
3863     uli = strtoul((const char *)pp, &endptr, 10);
3864     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3865     ((uint32_t *)field)[0] = (uint32_t)uli;
3866     if (*endptr == ':')
3867       {
3868       uli = strtoul((const char *)endptr+1, &endptr, 10);
3869       if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3870       ((uint32_t *)field)[1] = (uint32_t)uli;
3871       }
3872     else ((uint32_t *)field)[1] = 0;
3873     pp = (uint8_t *)endptr;
3874     break;
3875 
3876     /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3877     less than ULONG_MAX. So first test for overflowing the long int, and then
3878     test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3879 
3880     case MOD_SIZ:    /* PCRE2_SIZE value */
3881     if (!isdigit(*pp)) goto INVALID_VALUE;
3882     uli = strtoul((const char *)pp, &endptr, 10);
3883     if (uli == ULONG_MAX) goto INVALID_VALUE;
3884 #if ULONG_MAX > PCRE2_SIZE_MAX
3885     if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3886 #endif
3887     *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3888     pp = (uint8_t *)endptr;
3889     break;
3890 
3891     case MOD_IND:    /* Unsigned integer with default */
3892     if (len == 0)
3893       {
3894       *((uint32_t *)field) = (uint32_t)(m->value);
3895       break;
3896       }
3897     /* Fall through */
3898 
3899     case MOD_INT:    /* Unsigned integer */
3900     if (!isdigit(*pp)) goto INVALID_VALUE;
3901     uli = strtoul((const char *)pp, &endptr, 10);
3902     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3903     *((uint32_t *)field) = (uint32_t)uli;
3904     pp = (uint8_t *)endptr;
3905     break;
3906 
3907     case MOD_INS:   /* Signed integer */
3908     if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3909     li = strtol((const char *)pp, &endptr, 10);
3910     if (S32OVERFLOW(li)) goto INVALID_VALUE;
3911     *((int32_t *)field) = (int32_t)li;
3912     pp = (uint8_t *)endptr;
3913     break;
3914 
3915     case MOD_NL:
3916     for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3917       if (len == strlen(newlines[i]) &&
3918         strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3919     if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3920     if (i == 0)
3921       {
3922       *((uint16_t *)field) = NEWLINE_DEFAULT;
3923       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
3924         else dctl->control2 &= ~CTL2_NL_SET;
3925       }
3926     else
3927       {
3928       *((uint16_t *)field) = i;
3929       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
3930         else dctl->control2 |= CTL2_NL_SET;
3931       }
3932     pp = ep;
3933     break;
3934 
3935     case MOD_NN:              /* Name or (signed) number; may be several */
3936     if (isdigit(*pp) || *pp == '-')
3937       {
3938       int ct = MAXCPYGET - 1;
3939       int32_t value;
3940       li = strtol((const char *)pp, &endptr, 10);
3941       if (S32OVERFLOW(li)) goto INVALID_VALUE;
3942       value = (int32_t)li;
3943       field = (char *)field - m->offset + m->value;      /* Adjust field ptr */
3944       if (value >= 0)                                    /* Add new number */
3945         {
3946         while (*((int32_t *)field) >= 0 && ct-- > 0)   /* Skip previous */
3947           field = (char *)field + sizeof(int32_t);
3948         if (ct <= 0)
3949           {
3950           fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3951           return FALSE;
3952           }
3953         }
3954       *((int32_t *)field) = value;
3955       if (ct > 0) ((int32_t *)field)[1] = -1;
3956       pp = (uint8_t *)endptr;
3957       }
3958 
3959     /* Multiple strings are put end to end. */
3960 
3961     else
3962       {
3963       char *nn = (char *)field;
3964       if (len > 0)                    /* Add new name */
3965         {
3966         if (len > MAX_NAME_SIZE)
3967           {
3968           fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3969           return FALSE;
3970           }
3971         while (*nn != 0) nn += strlen(nn) + 1;
3972         if (nn + len + 2 - (char *)field > LENCPYGET)
3973           {
3974           fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3975             m->name);
3976           return FALSE;
3977           }
3978         memcpy(nn, pp, len);
3979         }
3980       nn[len] = 0 ;
3981       nn[len+1] = 0;
3982       pp = ep;
3983       }
3984     break;
3985 
3986     case MOD_STR:
3987     if (len + 1 > m->value)
3988       {
3989       fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3990         m->name, m->value - 1);
3991       return FALSE;
3992       }
3993     memcpy(field, pp, len);
3994     ((uint8_t *)field)[len] = 0;
3995     pp = ep;
3996     break;
3997     }
3998 
3999   if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
4000     {
4001     fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
4002     return FALSE;
4003     }
4004 
4005   p = pp;
4006   first = FALSE;
4007 
4008   if (ctx == CTX_POPPAT &&
4009      (pctl->options != 0 ||
4010       pctl->tables_id != 0 ||
4011       pctl->locale[0] != 0 ||
4012       (pctl->control & NOTPOP_CONTROLS) != 0))
4013     {
4014     fprintf(outfile, "** '%s' is not valid here\n", m->name);
4015     return FALSE;
4016     }
4017   }
4018 
4019 return TRUE;
4020 
4021 INVALID_VALUE:
4022 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
4023 return FALSE;
4024 }
4025 
4026 
4027 /*************************************************
4028 *             Get info from a pattern            *
4029 *************************************************/
4030 
4031 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
4032 pattern.
4033 
4034 Arguments:
4035   what        code for the required information
4036   where       where to put the answer
4037   unsetok     PCRE2_ERROR_UNSET is an "expected" result
4038 
4039 Returns:      the return from pcre2_pattern_info()
4040 */
4041 
4042 static int
pattern_info(int what,void * where,BOOL unsetok)4043 pattern_info(int what, void *where, BOOL unsetok)
4044 {
4045 int rc;
4046 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL);  /* Exercise the code */
4047 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
4048 if (rc >= 0) return 0;
4049 if (rc != PCRE2_ERROR_UNSET || !unsetok)
4050   {
4051   fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
4052     what);
4053   if (rc == PCRE2_ERROR_BADMODE)
4054     fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
4055       "%d-bit mode\n", test_mode,
4056       8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
4057   }
4058 return rc;
4059 }
4060 
4061 
4062 
4063 #ifdef SUPPORT_PCRE2_8
4064 /*************************************************
4065 *             Show something in a list           *
4066 *************************************************/
4067 
4068 /* This function just helps to keep the code that uses it tidier. It's used for
4069 various lists of things where there needs to be introductory text before the
4070 first item. As these calls are all in the POSIX-support code, they happen only
4071 when 8-bit mode is supported. */
4072 
4073 static void
prmsg(const char ** msg,const char * s)4074 prmsg(const char **msg, const char *s)
4075 {
4076 fprintf(outfile, "%s %s", *msg, s);
4077 *msg = "";
4078 }
4079 #endif  /* SUPPORT_PCRE2_8 */
4080 
4081 
4082 
4083 /*************************************************
4084 *                Show control bits               *
4085 *************************************************/
4086 
4087 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4088 Because the bits are unique, this can be used for both pattern and data control
4089 words.
4090 
4091 Arguments:
4092   controls    control bits
4093   controls2   more control bits
4094   before      text to print before
4095 
4096 Returns:      nothing
4097 */
4098 
4099 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4100 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4101 {
4102 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4103   before,
4104   ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4105   ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4106   ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4107   ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4108   ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
4109   ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4110   ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4111   ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4112   ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4113   ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4114   ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4115   ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4116   ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4117   ((controls & CTL_DFA) != 0)? " dfa" : "",
4118   ((controls & CTL_EXPAND) != 0)? " expand" : "",
4119   ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4120   ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
4121   ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4122   ((controls & CTL_GETALL) != 0)? " getall" : "",
4123   ((controls & CTL_GLOBAL) != 0)? " global" : "",
4124   ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4125   ((controls & CTL_INFO) != 0)? " info" : "",
4126   ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4127   ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4128   ((controls & CTL_MARK) != 0)? " mark" : "",
4129   ((controls & CTL_MEMORY) != 0)? " memory" : "",
4130   ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4131   ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4132   ((controls & CTL_POSIX) != 0)? " posix" : "",
4133   ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4134   ((controls & CTL_PUSH) != 0)? " push" : "",
4135   ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4136   ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4137   ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4138   ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
4139   ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4140   ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "",
4141   ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "",
4142   ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4143   ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "",
4144   ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4145   ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4146   ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4147   ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4148   ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4149 }
4150 
4151 
4152 
4153 /*************************************************
4154 *                Show compile options            *
4155 *************************************************/
4156 
4157 /* Called from show_pattern_info() and for unsupported POSIX options.
4158 
4159 Arguments:
4160   options     an options word
4161   before      text to print before
4162   after       text to print after
4163 
4164 Returns:      nothing
4165 */
4166 
4167 static void
show_compile_options(uint32_t options,const char * before,const char * after)4168 show_compile_options(uint32_t options, const char *before, const char *after)
4169 {
4170 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4171 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4172   before,
4173   ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4174   ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4175   ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4176   ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4177   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4178   ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4179   ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4180   ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4181   ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4182   ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4183   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4184   ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4185   ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4186   ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4187   ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4188   ((options & PCRE2_MATCH_INVALID_UTF) != 0)? " match_invalid_utf" : "",
4189   ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4190   ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4191   ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4192   ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4193   ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4194   ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4195   ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4196   ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4197   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4198   ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4199   ((options & PCRE2_UCP) != 0)? " ucp" : "",
4200   ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4201   ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4202   ((options & PCRE2_UTF) != 0)? " utf" : "",
4203   after);
4204 }
4205 
4206 
4207 /*************************************************
4208 *           Show compile extra options           *
4209 *************************************************/
4210 
4211 /* Called from show_pattern_info() and for unsupported POSIX options.
4212 
4213 Arguments:
4214   options     an options word
4215   before      text to print before
4216   after       text to print after
4217 
4218 Returns:      nothing
4219 */
4220 
4221 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4222 show_compile_extra_options(uint32_t options, const char *before,
4223   const char *after)
4224 {
4225 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4226 else fprintf(outfile, "%s%s%s%s%s%s%s%s",
4227   before,
4228   ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4229   ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4230   ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
4231   ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4232   ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4233   ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
4234   after);
4235 }
4236 
4237 
4238 
4239 #ifdef SUPPORT_PCRE2_8
4240 /*************************************************
4241 *                Show match options              *
4242 *************************************************/
4243 
4244 /* Called for unsupported POSIX options. */
4245 
4246 static void
show_match_options(uint32_t options)4247 show_match_options(uint32_t options)
4248 {
4249 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
4250   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4251   ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
4252   ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4253   ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4254   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4255   ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
4256   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4257   ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4258   ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4259   ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4260   ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4261   ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4262   ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4263 }
4264 #endif  /* SUPPORT_PCRE2_8 */
4265 
4266 
4267 
4268 /*************************************************
4269 *      Show memory usage info for a pattern      *
4270 *************************************************/
4271 
4272 static void
show_memory_info(void)4273 show_memory_info(void)
4274 {
4275 uint32_t name_count, name_entry_size;
4276 size_t size, cblock_size;
4277 
4278 /* One of the test_mode values will always be true, but to stop a compiler
4279 warning we must initialize cblock_size. */
4280 
4281 cblock_size = 0;
4282 #ifdef SUPPORT_PCRE2_8
4283 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4284 #endif
4285 #ifdef SUPPORT_PCRE2_16
4286 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4287 #endif
4288 #ifdef SUPPORT_PCRE2_32
4289 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4290 #endif
4291 
4292 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4293 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4294 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4295 fprintf(outfile, "Memory allocation (code space): %d\n",
4296   (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
4297 if (pat_patctl.jit != 0)
4298   {
4299   (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4300   fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
4301   }
4302 }
4303 
4304 
4305 
4306 /*************************************************
4307 *       Show frame size info for a pattern       *
4308 *************************************************/
4309 
4310 static void
show_framesize(void)4311 show_framesize(void)
4312 {
4313 size_t frame_size;
4314 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4315 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
4316 }
4317 
4318 
4319 
4320 /*************************************************
4321 *         Get and output an error message        *
4322 *************************************************/
4323 
4324 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4325 print_error_message(int errorcode, const char *before, const char *after)
4326 {
4327 int len;
4328 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4329 if (len < 0)
4330   {
4331   fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4332     "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4333   }
4334 else
4335   {
4336   fprintf(outfile, "%s", before);
4337   PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4338   fprintf(outfile, "%s", after);
4339   }
4340 return len >= 0;
4341 }
4342 
4343 
4344 /*************************************************
4345 *     Callback function for callout enumeration  *
4346 *************************************************/
4347 
4348 /* The only differences in the callout emumeration block for different code
4349 unit widths are that the pointers to the subject, the most recent MARK, and a
4350 callout argument string point to strings of the appropriate width. Casts can be
4351 used to deal with this.
4352 
4353 Argument:
4354   cb            pointer to enumerate block
4355   callout_data  user data
4356 
4357 Returns:    0
4358 */
4359 
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4360 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4361   void *callout_data)
4362 {
4363 uint32_t i;
4364 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4365 
4366 (void)callout_data;  /* Not currently displayed */
4367 
4368 fprintf(outfile, "Callout ");
4369 if (cb->callout_string != NULL)
4370   {
4371   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4372   fprintf(outfile, "%c", delimiter);
4373   PCHARSV(cb->callout_string, 0,
4374     cb->callout_string_length, utf, outfile);
4375   for (i = 0; callout_start_delims[i] != 0; i++)
4376     if (delimiter == callout_start_delims[i])
4377       {
4378       delimiter = callout_end_delims[i];
4379       break;
4380       }
4381   fprintf(outfile, "%c  ", delimiter);
4382   }
4383 else fprintf(outfile, "%d  ", cb->callout_number);
4384 
4385 fprintf(outfile, "%.*s\n",
4386   (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4387   pbuffer8 + cb->pattern_position);
4388 
4389 return 0;
4390 }
4391 
4392 
4393 
4394 /*************************************************
4395 *        Show information about a pattern        *
4396 *************************************************/
4397 
4398 /* This function is called after a pattern has been compiled if any of the
4399 information-requesting controls have been set.
4400 
4401 Arguments:  none
4402 
4403 Returns:    PR_OK     continue processing next line
4404             PR_SKIP   skip to a blank line
4405             PR_ABEND  abort the pcre2test run
4406 */
4407 
4408 static int
show_pattern_info(void)4409 show_pattern_info(void)
4410 {
4411 uint32_t compile_options, overall_options, extra_options;
4412 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4413 
4414 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4415   {
4416   fprintf(outfile, "------------------------------------------------------------------\n");
4417   PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4418   }
4419 
4420 if ((pat_patctl.control & CTL_INFO) != 0)
4421   {
4422   int rc;
4423   void *nametable;
4424   uint8_t *start_bits;
4425   BOOL heap_limit_set, match_limit_set, depth_limit_set;
4426   uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4427     hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4428     depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4429     newline_convention;
4430 
4431   /* Exercise the error route. */
4432 
4433   PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4434   (void)rc;
4435 
4436   /* These info requests may return PCRE2_ERROR_UNSET. */
4437 
4438   switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4439     {
4440     case 0:
4441     heap_limit_set = TRUE;
4442     break;
4443 
4444     case PCRE2_ERROR_UNSET:
4445     heap_limit_set = FALSE;
4446     break;
4447 
4448     default:
4449     return PR_ABEND;
4450     }
4451 
4452   switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4453     {
4454     case 0:
4455     match_limit_set = TRUE;
4456     break;
4457 
4458     case PCRE2_ERROR_UNSET:
4459     match_limit_set = FALSE;
4460     break;
4461 
4462     default:
4463     return PR_ABEND;
4464     }
4465 
4466   switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4467     {
4468     case 0:
4469     depth_limit_set = TRUE;
4470     break;
4471 
4472     case PCRE2_ERROR_UNSET:
4473     depth_limit_set = FALSE;
4474     break;
4475 
4476     default:
4477     return PR_ABEND;
4478     }
4479 
4480   /* These info requests should always succeed. */
4481 
4482   if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4483       pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4484       pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4485       pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4486       pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4487       pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4488       pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4489       pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4490       pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4491       pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4492       pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4493       pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4494       pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4495       pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4496       pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4497       pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4498       pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4499       != 0)
4500     return PR_ABEND;
4501 
4502   fprintf(outfile, "Capture group count = %d\n", capture_count);
4503 
4504   if (backrefmax > 0)
4505     fprintf(outfile, "Max back reference = %d\n", backrefmax);
4506 
4507   if (maxlookbehind > 0)
4508     fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4509 
4510   if (heap_limit_set)
4511     fprintf(outfile, "Heap limit = %u\n", heap_limit);
4512 
4513   if (match_limit_set)
4514     fprintf(outfile, "Match limit = %u\n", match_limit);
4515 
4516   if (depth_limit_set)
4517     fprintf(outfile, "Depth limit = %u\n", depth_limit);
4518 
4519   if (namecount > 0)
4520     {
4521     fprintf(outfile, "Named capture groups:\n");
4522     for (; namecount > 0; namecount--)
4523       {
4524       int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4525       uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4526       fprintf(outfile, "  ");
4527 
4528       /* In UTF mode the name may be a UTF string containing non-ASCII
4529       letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
4530       use the normal string printing functions, which use escapes for all
4531       non-ASCII characters. */
4532 
4533       if (utf)
4534         {
4535 #ifdef SUPPORT_PCRE2_32
4536         if (test_mode == PCRE32_MODE)
4537           {
4538           PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
4539           while (*nameptr != 0)
4540             {
4541             uint8_t u8buff[6];
4542             int len = ord2utf8(*nameptr++, u8buff);
4543             fprintf(outfile, "%.*s", len, u8buff);
4544             }
4545           }
4546 #endif
4547 #ifdef SUPPORT_PCRE2_16
4548         if (test_mode == PCRE16_MODE)
4549           {
4550           PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
4551           while (*nameptr != 0)
4552             {
4553             int len;
4554             uint8_t u8buff[6];
4555             uint32_t c = *nameptr++ & 0xffff;
4556             if (c >= 0xD800 && c < 0xDC00)
4557               c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
4558             len = ord2utf8(c, u8buff);
4559             fprintf(outfile, "%.*s", len, u8buff);
4560             }
4561           }
4562 #endif
4563 #ifdef SUPPORT_PCRE2_8
4564         if (test_mode == PCRE8_MODE)
4565           fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
4566 #endif
4567         }
4568       else  /* Not UTF mode */
4569         {
4570         PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4571         }
4572 
4573       while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4574 
4575 #ifdef SUPPORT_PCRE2_32
4576       if (test_mode == PCRE32_MODE)
4577         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4578 #endif
4579 #ifdef SUPPORT_PCRE2_16
4580       if (test_mode == PCRE16_MODE)
4581         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4582 #endif
4583 #ifdef SUPPORT_PCRE2_8
4584       if (test_mode == PCRE8_MODE)
4585         fprintf(outfile, "%3d\n", (int)(
4586         ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4587 #endif
4588 
4589       nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4590       }
4591     }
4592 
4593   if (hascrorlf)     fprintf(outfile, "Contains explicit CR or LF match\n");
4594   if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4595   if (match_empty)   fprintf(outfile, "May match empty string\n");
4596 
4597   pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4598   pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4599   pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4600 
4601   /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4602   cluttering up the verification output of non-UTF test files. */
4603 
4604   if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4605     {
4606     compile_options &= ~PCRE2_NEVER_UTF;
4607     overall_options &= ~PCRE2_NEVER_UTF;
4608     }
4609 
4610   if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4611     {
4612     compile_options &= ~PCRE2_NEVER_UCP;
4613     overall_options &= ~PCRE2_NEVER_UCP;
4614     }
4615 
4616   if ((compile_options|overall_options) != 0)
4617     {
4618     if (compile_options == overall_options)
4619       show_compile_options(compile_options, "Options:", "\n");
4620     else
4621       {
4622       show_compile_options(compile_options, "Compile options:", "\n");
4623       show_compile_options(overall_options, "Overall options:", "\n");
4624       }
4625     }
4626 
4627   if (extra_options != 0)
4628     show_compile_extra_options(extra_options, "Extra options:", "\n");
4629 
4630   if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4631 
4632   if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4633       (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4634     fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4635       "any Unicode newline" : "CR, LF, or CRLF");
4636 
4637   if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4638     {
4639     switch (newline_convention)
4640       {
4641       case PCRE2_NEWLINE_CR:
4642       fprintf(outfile, "Forced newline is CR\n");
4643       break;
4644 
4645       case PCRE2_NEWLINE_LF:
4646       fprintf(outfile, "Forced newline is LF\n");
4647       break;
4648 
4649       case PCRE2_NEWLINE_CRLF:
4650       fprintf(outfile, "Forced newline is CRLF\n");
4651       break;
4652 
4653       case PCRE2_NEWLINE_ANYCRLF:
4654       fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4655       break;
4656 
4657       case PCRE2_NEWLINE_ANY:
4658       fprintf(outfile, "Forced newline is any Unicode newline\n");
4659       break;
4660 
4661       case PCRE2_NEWLINE_NUL:
4662       fprintf(outfile, "Forced newline is NUL\n");
4663       break;
4664 
4665       default:
4666       break;
4667       }
4668     }
4669 
4670   if (first_ctype == 2)
4671     {
4672     fprintf(outfile, "First code unit at start or follows newline\n");
4673     }
4674   else if (first_ctype == 1)
4675     {
4676     const char *caseless =
4677       ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4678       "" : " (caseless)";
4679     if (PRINTOK(first_cunit))
4680       fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4681     else
4682       {
4683       fprintf(outfile, "First code unit = ");
4684       pchar(first_cunit, FALSE, outfile);
4685       fprintf(outfile, "%s\n", caseless);
4686       }
4687     }
4688   else if (start_bits != NULL)
4689     {
4690     int i;
4691     int c = 24;
4692     fprintf(outfile, "Starting code units: ");
4693     for (i = 0; i < 256; i++)
4694       {
4695       if ((start_bits[i/8] & (1u << (i&7))) != 0)
4696         {
4697         if (c > 75)
4698           {
4699           fprintf(outfile, "\n  ");
4700           c = 2;
4701           }
4702         if (PRINTOK(i) && i != ' ')
4703           {
4704           fprintf(outfile, "%c ", i);
4705           c += 2;
4706           }
4707         else
4708           {
4709           fprintf(outfile, "\\x%02x ", i);
4710           c += 5;
4711           }
4712         }
4713       }
4714     fprintf(outfile, "\n");
4715     }
4716 
4717   if (last_ctype != 0)
4718     {
4719     const char *caseless =
4720       ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4721       "" : " (caseless)";
4722     if (PRINTOK(last_cunit))
4723       fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4724     else
4725       {
4726       fprintf(outfile, "Last code unit = ");
4727       pchar(last_cunit, FALSE, outfile);
4728       fprintf(outfile, "%s\n", caseless);
4729       }
4730     }
4731 
4732   if ((FLD(compiled_code, overall_options) & PCRE2_NO_START_OPTIMIZE) == 0)
4733     fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4734 
4735   if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4736     {
4737     if (FLD(compiled_code, executable_jit) != NULL)
4738       fprintf(outfile, "JIT compilation was successful\n");
4739     else
4740       {
4741 #ifdef SUPPORT_JIT
4742       fprintf(outfile, "JIT compilation was not successful");
4743       if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4744         return PR_ABEND;
4745       fprintf(outfile, "\n");
4746 #else
4747       fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4748 #endif
4749       }
4750     }
4751   }
4752 
4753 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4754   {
4755   int errorcode;
4756   PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4757   if (errorcode != 0)
4758     {
4759     fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4760     if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4761       return PR_ABEND;
4762     return PR_SKIP;
4763     }
4764   }
4765 
4766 return PR_OK;
4767 }
4768 
4769 
4770 
4771 /*************************************************
4772 *              Handle serialization error        *
4773 *************************************************/
4774 
4775 /* Print an error message after a serialization failure.
4776 
4777 Arguments:
4778   rc         the error code
4779   msg        an initial message for what failed
4780 
4781 Returns:     FALSE if print_error_message() fails
4782 */
4783 
4784 static BOOL
serial_error(int rc,const char * msg)4785 serial_error(int rc, const char *msg)
4786 {
4787 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4788 return print_error_message(rc, "", "\n");
4789 }
4790 
4791 
4792 
4793 /*************************************************
4794 *        Open file for save/load commands        *
4795 *************************************************/
4796 
4797 /* This function decodes the file name and opens the file.
4798 
4799 Arguments:
4800   buffptr     point after the #command
4801   mode        open mode
4802   fptr        points to the FILE variable
4803   name        name of # command
4804 
4805 Returns:      PR_OK or PR_ABEND
4806 */
4807 
4808 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr,const char * name)4809 open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
4810 {
4811 char *endf;
4812 char *filename = (char *)buffptr;
4813 while (isspace(*filename)) filename++;
4814 endf = filename + strlen8(filename);
4815 while (endf > filename && isspace(endf[-1])) endf--;
4816 
4817 if (endf == filename)
4818   {
4819   fprintf(outfile, "** File name expected after %s\n", name);
4820   return PR_ABEND;
4821   }
4822 
4823 *endf = 0;
4824 *fptr = fopen((const char *)filename, mode);
4825 if (*fptr == NULL)
4826   {
4827   fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4828   return PR_ABEND;
4829   }
4830 
4831 return PR_OK;
4832 }
4833 
4834 
4835 
4836 /*************************************************
4837 *               Process command line             *
4838 *************************************************/
4839 
4840 /* This function is called for lines beginning with # and a character that is
4841 not ! or whitespace, when encountered between tests, which means that there is
4842 no compiled pattern (compiled_code is NULL). The line is in buffer.
4843 
4844 Arguments:  none
4845 
4846 Returns:    PR_OK     continue processing next line
4847             PR_SKIP   skip to a blank line
4848             PR_ABEND  abort the pcre2test run
4849 */
4850 
4851 static int
process_command(void)4852 process_command(void)
4853 {
4854 FILE *f;
4855 PCRE2_SIZE serial_size;
4856 size_t i;
4857 int rc, cmd, cmdlen, yield;
4858 uint16_t first_listed_newline;
4859 const char *cmdname;
4860 uint8_t *argptr, *serial;
4861 
4862 yield = PR_OK;
4863 cmd = CMD_UNKNOWN;
4864 cmdlen = 0;
4865 
4866 for (i = 0; i < cmdlistcount; i++)
4867   {
4868   cmdname = cmdlist[i].name;
4869   cmdlen = strlen(cmdname);
4870   if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4871       isspace(buffer[cmdlen+1]))
4872     {
4873     cmd = cmdlist[i].value;
4874     break;
4875     }
4876   }
4877 
4878 argptr = buffer + cmdlen + 1;
4879 
4880 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
4881   {
4882   fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
4883   return PR_ABEND;
4884   }
4885 
4886 switch(cmd)
4887   {
4888   case CMD_UNKNOWN:
4889   fprintf(outfile, "** Unknown command: %s", buffer);
4890   break;
4891 
4892   case CMD_FORBID_UTF:
4893   forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4894   break;
4895 
4896   case CMD_PERLTEST:
4897   restrict_for_perl_test = TRUE;
4898   break;
4899 
4900   /* Set default pattern modifiers */
4901 
4902   case CMD_PATTERN:
4903   (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4904   if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4905     def_patctl.jit = JIT_DEFAULT;
4906   break;
4907 
4908   /* Set default subject modifiers */
4909 
4910   case CMD_SUBJECT:
4911   (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4912   break;
4913 
4914   /* Check the default newline, and if not one of those listed, set up the
4915   first one to be forced. An empty list unsets. */
4916 
4917   case CMD_NEWLINE_DEFAULT:
4918   local_newline_default = 0;   /* Unset */
4919   first_listed_newline = 0;
4920   for (;;)
4921     {
4922     while (isspace(*argptr)) argptr++;
4923     if (*argptr == 0) break;
4924     for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4925       {
4926       size_t nlen = strlen(newlines[i]);
4927       if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4928           isspace(argptr[nlen]))
4929         {
4930         if (i == NEWLINE_DEFAULT) return PR_OK;  /* Default is valid */
4931         if (first_listed_newline == 0) first_listed_newline = i;
4932         }
4933       }
4934     while (*argptr != 0 && !isspace(*argptr)) argptr++;
4935     }
4936   local_newline_default = first_listed_newline;
4937   break;
4938 
4939   /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4940   the compiled pattern (e.g. to give information) are permitted. The default
4941   pattern modifiers are ignored. */
4942 
4943   case CMD_POP:
4944   case CMD_POPCOPY:
4945   if (patstacknext <= 0)
4946     {
4947     fprintf(outfile, "** Can't pop off an empty stack\n");
4948     return PR_SKIP;
4949     }
4950   memset(&pat_patctl, 0, sizeof(patctl));   /* Completely unset */
4951   if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4952     return PR_SKIP;
4953 
4954   if (cmd == CMD_POP)
4955     {
4956     SET(compiled_code, patstack[--patstacknext]);
4957     }
4958   else
4959     {
4960     PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4961     }
4962 
4963   if (pat_patctl.jit != 0)
4964     {
4965     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4966     }
4967   if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4968   if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4969   if ((pat_patctl.control & CTL_ANYINFO) != 0)
4970     {
4971     rc = show_pattern_info();
4972     if (rc != PR_OK) return rc;
4973     }
4974   break;
4975 
4976   /* Save the stack of compiled patterns to a file, then empty the stack. */
4977 
4978   case CMD_SAVE:
4979   if (patstacknext <= 0)
4980     {
4981     fprintf(outfile, "** No stacked patterns to save\n");
4982     return PR_OK;
4983     }
4984 
4985   rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
4986   if (rc != PR_OK) return rc;
4987 
4988   PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4989     general_context);
4990   if (rc < 0)
4991     {
4992     fclose(f);
4993     if (!serial_error(rc, "Serialization")) return PR_ABEND;
4994     break;
4995     }
4996 
4997   /* Write the length at the start of the file to make it straightforward to
4998   get the right memory when re-loading. This saves having to read the file size
4999   in different operating systems. To allow for different endianness (even
5000   though reloading with the opposite endianness does not work), write the
5001   length byte-by-byte. */
5002 
5003   for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
5004   if (fwrite(serial, 1, serial_size, f) != serial_size)
5005     {
5006     fprintf(outfile, "** Wrong return from fwrite()\n");
5007     fclose(f);
5008     return PR_ABEND;
5009     }
5010 
5011   fclose(f);
5012   PCRE2_SERIALIZE_FREE(serial);
5013   while(patstacknext > 0)
5014     {
5015     SET(compiled_code, patstack[--patstacknext]);
5016     SUB1(pcre2_code_free, compiled_code);
5017     }
5018   SET(compiled_code, NULL);
5019   break;
5020 
5021   /* Load a set of compiled patterns from a file onto the stack */
5022 
5023   case CMD_LOAD:
5024   rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
5025   if (rc != PR_OK) return rc;
5026 
5027   serial_size = 0;
5028   for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
5029 
5030   serial = malloc(serial_size);
5031   if (serial == NULL)
5032     {
5033     fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
5034       SIZ_CAST serial_size);
5035     fclose(f);
5036     return PR_ABEND;
5037     }
5038 
5039   i = fread(serial, 1, serial_size, f);
5040   fclose(f);
5041 
5042   if (i != serial_size)
5043     {
5044     fprintf(outfile, "** Wrong return from fread()\n");
5045     yield = PR_ABEND;
5046     }
5047   else
5048     {
5049     PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
5050     if (rc < 0)
5051       {
5052       if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
5053       }
5054     else
5055       {
5056       if (rc + patstacknext > PATSTACKSIZE)
5057         {
5058         fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
5059           rc, (rc == 1)? "" : "s");
5060         rc = PATSTACKSIZE - patstacknext;
5061         fprintf(outfile, "** Decoding %d pattern%s\n", rc,
5062           (rc == 1)? "" : "s");
5063         }
5064       PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
5065         general_context);
5066       if (rc < 0)
5067         {
5068         if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
5069         }
5070       else patstacknext += rc;
5071       }
5072     }
5073 
5074   free(serial);
5075   break;
5076 
5077   /* Load a set of binary tables into tables3. */
5078 
5079   case CMD_LOADTABLES:
5080   rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
5081   if (rc != PR_OK) return rc;
5082 
5083   if (tables3 == NULL)
5084     {
5085     (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
5086     tables3 = malloc(loadtables_length);
5087     }
5088 
5089   if (tables3 == NULL)
5090     {
5091     fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
5092     yield = PR_ABEND;
5093     }
5094   else if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
5095     {
5096     fprintf(outfile, "** Wrong return from fread()\n");
5097     yield = PR_ABEND;
5098     }
5099 
5100   fclose(f);
5101   break;
5102   }
5103 
5104 return yield;
5105 }
5106 
5107 
5108 
5109 /*************************************************
5110 *               Process pattern line             *
5111 *************************************************/
5112 
5113 /* This function is called when the input buffer contains the start of a
5114 pattern. The first character is known to be a valid delimiter. The pattern is
5115 read, modifiers are interpreted, and a suitable local context is set up for
5116 this test. The pattern is then compiled.
5117 
5118 Arguments:  none
5119 
5120 Returns:    PR_OK     continue processing next line
5121             PR_SKIP   skip to a blank line
5122             PR_ABEND  abort the pcre2test run
5123 */
5124 
5125 static int
process_pattern(void)5126 process_pattern(void)
5127 {
5128 BOOL utf;
5129 uint32_t k;
5130 uint8_t *p = buffer;
5131 unsigned int delimiter = *p++;
5132 int errorcode;
5133 void *use_pat_context;
5134 uint32_t use_forbid_utf = forbid_utf;
5135 PCRE2_SIZE patlen;
5136 PCRE2_SIZE valgrind_access_length;
5137 PCRE2_SIZE erroroffset;
5138 
5139 /* The perltest.sh script supports only / as a delimiter. */
5140 
5141 if (restrict_for_perl_test && delimiter != '/')
5142   {
5143   fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
5144   return PR_ABEND;
5145   }
5146 
5147 /* Initialize the context and pattern/data controls for this test from the
5148 defaults. */
5149 
5150 PATCTXCPY(pat_context, default_pat_context);
5151 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
5152 
5153 /* Find the end of the pattern, reading more lines if necessary. */
5154 
5155 for(;;)
5156   {
5157   while (*p != 0)
5158     {
5159     if (*p == '\\' && p[1] != 0) p++;
5160       else if (*p == delimiter) break;
5161     p++;
5162     }
5163   if (*p != 0) break;
5164   if ((p = extend_inputline(infile, p, "    > ")) == NULL)
5165     {
5166     fprintf(outfile, "** Unexpected EOF\n");
5167     return PR_ABEND;
5168     }
5169   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
5170   }
5171 
5172 /* If the first character after the delimiter is backslash, make the pattern
5173 end with backslash. This is purely to provide a way of testing for the error
5174 message when a pattern ends with backslash. */
5175 
5176 if (p[1] == '\\') *p++ = '\\';
5177 
5178 /* Terminate the pattern at the delimiter, and compute the length. */
5179 
5180 *p++ = 0;
5181 patlen = p - buffer - 2;
5182 
5183 /* Look for modifiers and options after the final delimiter. */
5184 
5185 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5186 
5187 /* Note that the match_invalid_utf option also sets utf when passed to
5188 pcre2_compile(). */
5189 
5190 utf = (pat_patctl.options & (PCRE2_UTF|PCRE2_MATCH_INVALID_UTF)) != 0;
5191 
5192 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5193 exclusive with the utf modifier. */
5194 
5195 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5196   {
5197   if (test_mode == PCRE8_MODE)
5198     {
5199     fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5200     return PR_SKIP;
5201     }
5202   if (utf)
5203     {
5204     fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5205     return PR_SKIP;
5206     }
5207   }
5208 
5209 /* The convert and posix modifiers are mutually exclusive. */
5210 
5211 if (pat_patctl.convert_type != CONVERT_UNSET &&
5212     (pat_patctl.control & CTL_POSIX) != 0)
5213   {
5214   fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5215   return PR_SKIP;
5216   }
5217 
5218 /* Check for mutually exclusive control modifiers. At present, these are all in
5219 the first control word. */
5220 
5221 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5222   {
5223   uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5224   if (c != 0 && c != (c & (~c+1)))
5225     {
5226     show_controls(c, 0, "** Not allowed together:");
5227     fprintf(outfile, "\n");
5228     return PR_SKIP;
5229     }
5230   }
5231 
5232 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5233 specified. */
5234 
5235 if (pat_patctl.jit == 0 &&
5236     (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5237   pat_patctl.jit = JIT_DEFAULT;
5238 
5239 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5240 in callouts. Convert from hex if requested (literal strings in quotes may be
5241 present within the hexadecimal pairs). The result must necessarily be fewer
5242 characters so will always fit in pbuffer8. */
5243 
5244 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5245   {
5246   uint8_t *pp, *pt;
5247   uint32_t c, d;
5248 
5249   pt = pbuffer8;
5250   for (pp = buffer + 1; *pp != 0; pp++)
5251     {
5252     if (isspace(*pp)) continue;
5253     c = *pp++;
5254 
5255     /* Handle a literal substring */
5256 
5257     if (c == '\'' || c == '"')
5258       {
5259       uint8_t *pq = pp;
5260       for (;; pp++)
5261         {
5262         d = *pp;
5263         if (d == 0)
5264           {
5265           fprintf(outfile, "** Missing closing quote in hex pattern: "
5266             "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5267           return PR_SKIP;
5268           }
5269         if (d == c) break;
5270         *pt++ = d;
5271         }
5272       }
5273 
5274     /* Expect a hex pair */
5275 
5276     else
5277       {
5278       if (!isxdigit(c))
5279         {
5280         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5281           PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5282         return PR_SKIP;
5283         }
5284       if (*pp == 0)
5285         {
5286         fprintf(outfile, "** Odd number of digits in hex pattern\n");
5287         return PR_SKIP;
5288         }
5289       d = *pp;
5290       if (!isxdigit(d))
5291         {
5292         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5293           PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5294         return PR_SKIP;
5295         }
5296       c = toupper(c);
5297       d = toupper(d);
5298       *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5299                (isdigit(d)? (d - '0') : (d - 'A' + 10));
5300       }
5301     }
5302   *pt = 0;
5303   patlen = pt - pbuffer8;
5304   }
5305 
5306 /* If not a hex string, process for repetition expansion if requested. */
5307 
5308 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5309   {
5310   uint8_t *pp, *pt;
5311 
5312   pt = pbuffer8;
5313   for (pp = buffer + 1; *pp != 0; pp++)
5314     {
5315     uint8_t *pc = pp;
5316     uint32_t count = 1;
5317     size_t length = 1;
5318 
5319     /* Check for replication syntax; if not found, the defaults just set will
5320     prevail and one character will be copied. */
5321 
5322     if (pp[0] == '\\' && pp[1] == '[')
5323       {
5324       uint8_t *pe;
5325       for (pe = pp + 2; *pe != 0; pe++)
5326         {
5327         if (pe[0] == ']' && pe[1] == '{')
5328           {
5329           uint32_t clen = pe - pc - 2;
5330           uint32_t i = 0;
5331           unsigned long uli;
5332           char *endptr;
5333 
5334           pe += 2;
5335           uli = strtoul((const char *)pe, &endptr, 10);
5336           if (U32OVERFLOW(uli))
5337             {
5338             fprintf(outfile, "** Pattern repeat count too large\n");
5339             return PR_SKIP;
5340             }
5341 
5342           i = (uint32_t)uli;
5343           pe = (uint8_t *)endptr;
5344           if (*pe == '}')
5345             {
5346             if (i == 0)
5347               {
5348               fprintf(outfile, "** Zero repeat not allowed\n");
5349               return PR_SKIP;
5350               }
5351             pc += 2;
5352             count = i;
5353             length = clen;
5354             pp = pe;
5355             break;
5356             }
5357           }
5358         }
5359       }
5360 
5361     /* Add to output. If the buffer is too small expand it. The function for
5362     expanding buffers always keeps buffer and pbuffer8 in step as far as their
5363     size goes. */
5364 
5365     while (pt + count * length > pbuffer8 + pbuffer8_size)
5366       {
5367       size_t pc_offset = pc - buffer;
5368       size_t pp_offset = pp - buffer;
5369       size_t pt_offset = pt - pbuffer8;
5370       expand_input_buffers();
5371       pc = buffer + pc_offset;
5372       pp = buffer + pp_offset;
5373       pt = pbuffer8 + pt_offset;
5374       }
5375 
5376     for (; count > 0; count--)
5377       {
5378       memcpy(pt, pc, length);
5379       pt += length;
5380       }
5381     }
5382 
5383   *pt = 0;
5384   patlen = pt - pbuffer8;
5385 
5386   if ((pat_patctl.control & CTL_INFO) != 0)
5387     fprintf(outfile, "Expanded: %s\n", pbuffer8);
5388   }
5389 
5390 /* Neither hex nor expanded, just copy the input verbatim. */
5391 
5392 else
5393   {
5394   strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5395   }
5396 
5397 /* Sort out character tables */
5398 
5399 if (pat_patctl.locale[0] != 0)
5400   {
5401   if (pat_patctl.tables_id != 0)
5402     {
5403     fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5404     return PR_SKIP;
5405     }
5406   if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5407     {
5408     fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5409     return PR_SKIP;
5410     }
5411   if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5412     {
5413     strcpy((char *)locale_name, (char *)pat_patctl.locale);
5414     if (locale_tables != NULL) free((void *)locale_tables);
5415     PCRE2_MAKETABLES(locale_tables);
5416     }
5417   use_tables = locale_tables;
5418   }
5419 
5420 else switch (pat_patctl.tables_id)
5421   {
5422   case 0: use_tables = NULL; break;
5423   case 1: use_tables = tables1; break;
5424   case 2: use_tables = tables2; break;
5425 
5426   case 3:
5427   if (tables3 == NULL)
5428     {
5429     fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
5430       "been loaded\n");
5431     return PR_SKIP;
5432     }
5433   use_tables = tables3;
5434   break;
5435 
5436   default:
5437   fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
5438   return PR_SKIP;
5439   }
5440 
5441 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5442 
5443 /* Set up for the stackguard test. */
5444 
5445 if (pat_patctl.stackguard_test != 0)
5446   {
5447   PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5448   }
5449 
5450 /* Handle compiling via the POSIX interface, which doesn't support the
5451 timing, showing, or debugging options, nor the ability to pass over
5452 local character tables. Neither does it have 16-bit or 32-bit support. */
5453 
5454 if ((pat_patctl.control & CTL_POSIX) != 0)
5455   {
5456 #ifdef SUPPORT_PCRE2_8
5457   int rc;
5458   int cflags = 0;
5459   const char *msg = "** Ignored with POSIX interface:";
5460 #endif
5461 
5462   if (test_mode != PCRE8_MODE)
5463     {
5464     fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5465     return PR_SKIP;
5466     }
5467 
5468 #ifdef SUPPORT_PCRE2_8
5469   /* Check for features that the POSIX interface does not support. */
5470 
5471   if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5472   if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5473   if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5474   if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5475   if (timeit > 0) prmsg(&msg, "timing");
5476   if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5477 
5478   if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5479     {
5480     show_compile_options(
5481       pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
5482     msg = "";
5483     }
5484 
5485   if ((FLD(pat_context, extra_options) &
5486        ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
5487     {
5488     show_compile_extra_options(
5489       FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
5490         msg, "");
5491     msg = "";
5492     }
5493 
5494   if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5495       (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
5496     {
5497     show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
5498       pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
5499     msg = "";
5500     }
5501 
5502   if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5503   if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5504     prmsg(&msg, "max_pattern_length");
5505   if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5506     prmsg(&msg, "parens_nest_limit");
5507 
5508   if (msg[0] == 0) fprintf(outfile, "\n");
5509 
5510   /* Translate PCRE2 options to POSIX options and then compile. */
5511 
5512   if (utf) cflags |= REG_UTF;
5513   if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5514   if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5515   if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5516   if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5517   if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5518   if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5519   if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5520 
5521   if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5522     {
5523     preg.re_endp = (char *)pbuffer8 + patlen;
5524     cflags |= REG_PEND;
5525     }
5526 
5527   rc = regcomp(&preg, (char *)pbuffer8, cflags);
5528 
5529   /* Compiling failed */
5530 
5531   if (rc != 0)
5532     {
5533     size_t bsize, usize;
5534     int psize;
5535 
5536     preg.re_pcre2_code = NULL;     /* In case something was left in there */
5537     preg.re_match_data = NULL;
5538 
5539     bsize = (pat_patctl.regerror_buffsize != 0)?
5540       pat_patctl.regerror_buffsize : pbuffer8_size;
5541     if (bsize + 8 < pbuffer8_size)
5542       memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5543     usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5544 
5545     /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5546     versions of snprintf() put a zero byte at the end, but others do not.
5547     Therefore, we print a maximum of one less than the size of the buffer. */
5548 
5549     psize = (int)bsize - 1;
5550     fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5551     if (usize > bsize)
5552       {
5553       fprintf(outfile, "** regerror() message truncated\n");
5554       if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5555         fprintf(outfile, "** regerror() buffer overflow\n");
5556       }
5557     return PR_SKIP;
5558     }
5559 
5560   /* Compiling succeeded. Check that the values in the preg block are sensible.
5561   It can happen that pcre2test is accidentally linked with a different POSIX
5562   library which succeeds, but of course puts different things into preg. In
5563   this situation, calling regfree() may cause a segfault (or invalid free() in
5564   valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5565   calling of regfree() on exit. */
5566 
5567   if (preg.re_pcre2_code == NULL ||
5568       ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5569       ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5570       preg.re_match_data == NULL ||
5571       preg.re_cflags != cflags)
5572     {
5573     fprintf(outfile,
5574       "** The regcomp() function returned zero (success), but the values set\n"
5575       "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5576       "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5577       "** some other POSIX regex library.\n**\n");
5578     preg.re_pcre2_code = NULL;
5579     return PR_ABEND;
5580     }
5581 
5582   return PR_OK;
5583 #endif  /* SUPPORT_PCRE2_8 */
5584   }
5585 
5586 /* Handle compiling via the native interface. Controls that act later are
5587 ignored with "push". Replacements are locked out. */
5588 
5589 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5590   {
5591   if (pat_patctl.replacement[0] != 0)
5592     {
5593     fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5594     return PR_OK;
5595     }
5596   if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5597       (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5598     {
5599     show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5600                   pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5601       "** Ignored when compiled pattern is stacked with 'push':");
5602     fprintf(outfile, "\n");
5603     }
5604   if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5605       (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5606     {
5607     show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5608                   pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5609       "** Applies only to compile when pattern is stacked with 'push':");
5610     fprintf(outfile, "\n");
5611     }
5612   }
5613 
5614 /* Convert the input in non-8-bit modes. */
5615 
5616 errorcode = 0;
5617 
5618 #ifdef SUPPORT_PCRE2_16
5619 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5620 #endif
5621 
5622 #ifdef SUPPORT_PCRE2_32
5623 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5624 #endif
5625 
5626 switch(errorcode)
5627   {
5628   case -1:
5629   fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5630     "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5631   return PR_SKIP;
5632 
5633   case -2:
5634   fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5635     "cannot be converted to UTF\n");
5636   return PR_SKIP;
5637 
5638   case -3:
5639   fprintf(outfile, "** Failed: character value greater than 0xffff "
5640     "cannot be converted to 16-bit in non-UTF mode\n");
5641   return PR_SKIP;
5642 
5643   default:
5644   break;
5645   }
5646 
5647 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5648 patlen. If it is to be converted, copy the result back afterwards so that it
5649 ends up back in the usual place. */
5650 
5651 if (pat_patctl.convert_type != CONVERT_UNSET)
5652   {
5653   int rc;
5654   int convert_return = PR_OK;
5655   uint32_t convert_options = pat_patctl.convert_type;
5656   void *converted_pattern;
5657   PCRE2_SIZE converted_length;
5658 
5659   if (pat_patctl.convert_length != 0)
5660     {
5661     converted_length = pat_patctl.convert_length;
5662     converted_pattern = malloc(converted_length * code_unit_size);
5663     if (converted_pattern == NULL)
5664       {
5665       fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5666       return PR_SKIP;
5667       }
5668     }
5669   else converted_pattern = NULL;  /* Let the library allocate */
5670 
5671   if (utf) convert_options |= PCRE2_CONVERT_UTF;
5672   if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5673     convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5674 
5675   CONCTXCPY(con_context, default_con_context);
5676 
5677   if (pat_patctl.convert_glob_escape != 0)
5678     {
5679     uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5680       pat_patctl.convert_glob_escape;
5681     PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5682     if (rc != 0)
5683       {
5684       fprintf(outfile, "** Invalid glob escape '%c'\n",
5685         pat_patctl.convert_glob_escape);
5686       convert_return = PR_SKIP;
5687       goto CONVERT_FINISH;
5688       }
5689     }
5690 
5691   if (pat_patctl.convert_glob_separator != 0)
5692     {
5693     PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5694     if (rc != 0)
5695       {
5696       fprintf(outfile, "** Invalid glob separator '%c'\n",
5697         pat_patctl.convert_glob_separator);
5698       convert_return = PR_SKIP;
5699       goto CONVERT_FINISH;
5700       }
5701     }
5702 
5703   PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5704     &converted_pattern, &converted_length, con_context);
5705 
5706   if (rc != 0)
5707     {
5708     fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5709       SIZ_CAST converted_length);
5710     convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5711     }
5712 
5713   /* Output the converted pattern, then copy it. */
5714 
5715   else
5716     {
5717     PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5718     fprintf(outfile, "\n");
5719     patlen = converted_length;
5720     CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5721     }
5722 
5723   /* Free the converted pattern. */
5724 
5725   CONVERT_FINISH:
5726   if (pat_patctl.convert_length != 0)
5727     free(converted_pattern);
5728   else
5729     PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5730 
5731   /* Return if conversion was unsuccessful. */
5732 
5733   if (convert_return != PR_OK) return convert_return;
5734   }
5735 
5736 /* By default we pass a zero-terminated pattern, but a length is passed if
5737 "use_length" was specified or this is a hex pattern (which might contain binary
5738 zeros). When valgrind is supported, arrange for the unused part of the buffer
5739 to be marked as no access. */
5740 
5741 valgrind_access_length = patlen;
5742 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5743   {
5744   patlen = PCRE2_ZERO_TERMINATED;
5745   valgrind_access_length += 1;  /* For the terminating zero */
5746   }
5747 
5748 #ifdef SUPPORT_VALGRIND
5749 #ifdef SUPPORT_PCRE2_8
5750 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5751   {
5752   VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5753     pbuffer8_size - valgrind_access_length);
5754   }
5755 #endif
5756 #ifdef SUPPORT_PCRE2_16
5757 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5758   {
5759   VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5760     pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5761   }
5762 #endif
5763 #ifdef SUPPORT_PCRE2_32
5764 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5765   {
5766   VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5767     pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5768   }
5769 #endif
5770 #else  /* Valgrind not supported */
5771 (void)valgrind_access_length;  /* Avoid compiler warning */
5772 #endif
5773 
5774 /* If #newline_default has been used and the library was not compiled with an
5775 appropriate default newline setting, local_newline_default will be non-zero. We
5776 use this if there is no explicit newline modifier. */
5777 
5778 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5779   {
5780   SETFLD(pat_context, newline_convention, local_newline_default);
5781   }
5782 
5783 /* The null_context modifier is used to test calling pcre2_compile() with a
5784 NULL context. */
5785 
5786 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5787   NULL : PTR(pat_context);
5788 
5789 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5790 and PCRE2_NEVER_UCP are invalid with it. */
5791 
5792 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5793 
5794 /* Compile many times when timing. */
5795 
5796 if (timeit > 0)
5797   {
5798   int i;
5799   clock_t time_taken = 0;
5800   for (i = 0; i < timeit; i++)
5801     {
5802     clock_t start_time = clock();
5803     PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5804       pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5805         use_pat_context);
5806     time_taken += clock() - start_time;
5807     if (TEST(compiled_code, !=, NULL))
5808       { SUB1(pcre2_code_free, compiled_code); }
5809     }
5810   total_compile_time += time_taken;
5811   fprintf(outfile, "Compile time %.4f milliseconds\n",
5812     (((double)time_taken * 1000.0) / (double)timeit) /
5813       (double)CLOCKS_PER_SEC);
5814   }
5815 
5816 /* A final compile that is used "for real". */
5817 
5818 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
5819   &errorcode, &erroroffset, use_pat_context);
5820 
5821 /* Call the JIT compiler if requested. When timing, we must free and recompile
5822 the pattern each time because that is the only way to free the JIT compiled
5823 code. We know that compilation will always succeed. */
5824 
5825 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5826   {
5827   if (timeit > 0)
5828     {
5829     int i;
5830     clock_t time_taken = 0;
5831 
5832     for (i = 0; i < timeit; i++)
5833       {
5834       clock_t start_time;
5835       SUB1(pcre2_code_free, compiled_code);
5836       PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5837         pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5838         use_pat_context);
5839       start_time = clock();
5840       PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5841       time_taken += clock() - start_time;
5842       }
5843     total_jit_compile_time += time_taken;
5844     fprintf(outfile, "JIT compile  %.4f milliseconds\n",
5845       (((double)time_taken * 1000.0) / (double)timeit) /
5846         (double)CLOCKS_PER_SEC);
5847     }
5848   else
5849     {
5850     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5851     }
5852   }
5853 
5854 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5855 and 32-bit buffers can be marked completely undefined, but we must leave the
5856 pattern in the 8-bit buffer defined because it may be read from a callout
5857 during matching. */
5858 
5859 #ifdef SUPPORT_VALGRIND
5860 #ifdef SUPPORT_PCRE2_8
5861 if (test_mode == PCRE8_MODE)
5862   {
5863   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5864     pbuffer8_size - valgrind_access_length);
5865   }
5866 #endif
5867 #ifdef SUPPORT_PCRE2_16
5868 if (test_mode == PCRE16_MODE)
5869   {
5870   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5871   }
5872 #endif
5873 #ifdef SUPPORT_PCRE2_32
5874 if (test_mode == PCRE32_MODE)
5875   {
5876   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5877   }
5878 #endif
5879 #endif
5880 
5881 /* Compilation failed; go back for another re, skipping to blank line
5882 if non-interactive. */
5883 
5884 if (TEST(compiled_code, ==, NULL))
5885   {
5886   fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5887     (int)erroroffset);
5888   if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
5889   return PR_SKIP;
5890   }
5891 
5892 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5893 locked out at compile time, but we must also check for occurrences of \P, \p,
5894 and \X, which are only supported when Unicode is supported. */
5895 
5896 if (forbid_utf != 0)
5897   {
5898   if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5899     {
5900     fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5901       "#forbid_utf command\n");
5902     return PR_SKIP;
5903     }
5904   }
5905 
5906 /* Remember the maximum lookbehind, for partial matching. */
5907 
5908 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5909   return PR_ABEND;
5910 
5911 /* Remember the number of captures. */
5912 
5913 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
5914   return PR_ABEND;
5915 
5916 /* If an explicit newline modifier was given, set the information flag in the
5917 pattern so that it is preserved over push/pop. */
5918 
5919 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
5920   {
5921   SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5922   }
5923 
5924 /* Output code size and other information if requested. */
5925 
5926 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5927 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5928 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5929   {
5930   int rc = show_pattern_info();
5931   if (rc != PR_OK) return rc;
5932   }
5933 
5934 /* The "push" control requests that the compiled pattern be remembered on a
5935 stack. This is mainly for testing the serialization functionality. */
5936 
5937 if ((pat_patctl.control & CTL_PUSH) != 0)
5938   {
5939   if (patstacknext >= PATSTACKSIZE)
5940     {
5941     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5942     return PR_ABEND;
5943     }
5944   patstack[patstacknext++] = PTR(compiled_code);
5945   SET(compiled_code, NULL);
5946   }
5947 
5948 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5949 copy of the pattern, the latter with a copy of its character tables. This tests
5950 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5951 
5952 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5953   {
5954   if (patstacknext >= PATSTACKSIZE)
5955     {
5956     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5957     return PR_ABEND;
5958     }
5959   if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5960     {
5961     PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5962     }
5963   else
5964     {
5965     PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5966       compiled_code); }
5967   }
5968 
5969 return PR_OK;
5970 }
5971 
5972 
5973 
5974 /*************************************************
5975 *          Check heap, match or depth limit      *
5976 *************************************************/
5977 
5978 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
5979 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
5980 
5981 Arguments:
5982   pp        the subject string
5983   ulen      length of subject or PCRE2_ZERO_TERMINATED
5984   errnumber defines which limit to test
5985   msg       string to include in final message
5986 
5987 Returns:    the return from the final match function call
5988 */
5989 
5990 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)5991 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
5992 {
5993 int capcount;
5994 uint32_t min = 0;
5995 uint32_t mid = 64;
5996 uint32_t max = UINT32_MAX;
5997 
5998 PCRE2_SET_MATCH_LIMIT(dat_context, max);
5999 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
6000 PCRE2_SET_HEAP_LIMIT(dat_context, max);
6001 
6002 for (;;)
6003   {
6004   uint32_t stack_start = 0;
6005 
6006   if (errnumber == PCRE2_ERROR_HEAPLIMIT)
6007     {
6008     PCRE2_SET_HEAP_LIMIT(dat_context, mid);
6009     }
6010   else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
6011     {
6012     PCRE2_SET_MATCH_LIMIT(dat_context, mid);
6013     }
6014   else
6015     {
6016     PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
6017     }
6018 
6019   if ((dat_datctl.control & CTL_DFA) != 0)
6020     {
6021     stack_start = DFA_START_RWS_SIZE/1024;
6022     if (dfa_workspace == NULL)
6023       dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6024     if (dfa_matched++ == 0)
6025       dfa_workspace[0] = -1;  /* To catch bad restart */
6026     PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6027       dat_datctl.options, match_data,
6028       PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
6029     }
6030 
6031   else if ((pat_patctl.control & CTL_JITFAST) != 0)
6032     PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6033       dat_datctl.options, match_data, PTR(dat_context));
6034 
6035   else
6036     {
6037     stack_start = START_FRAMES_SIZE/1024;
6038     PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6039       dat_datctl.options, match_data, PTR(dat_context));
6040     }
6041 
6042   if (capcount == errnumber)
6043     {
6044     if ((mid & 0x80000000u) != 0)
6045       {
6046       fprintf(outfile, "Can't find minimum %s limit: check pattern for "
6047         "restriction\n", msg);
6048       break;
6049       }
6050 
6051     min = mid;
6052     mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
6053     }
6054   else if (capcount >= 0 ||
6055            capcount == PCRE2_ERROR_NOMATCH ||
6056            capcount == PCRE2_ERROR_PARTIAL)
6057     {
6058     /* If we've not hit the error with a heap limit less than the size of the
6059     initial stack frame vector (for pcre2_match()) or the initial stack
6060     workspace vector (for pcre2_dfa_match()), the heap is not being used, so
6061     the minimum limit is zero; there's no need to go on. The other limits are
6062     always greater than zero. */
6063 
6064     if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
6065       {
6066       fprintf(outfile, "Minimum %s limit = 0\n", msg);
6067       break;
6068       }
6069     if (mid == min + 1)
6070       {
6071       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
6072       break;
6073       }
6074     max = mid;
6075     mid = (min + max)/2;
6076     }
6077   else break;    /* Some other error */
6078   }
6079 
6080 return capcount;
6081 }
6082 
6083 
6084 
6085 /*************************************************
6086 *        Substitute callout function             *
6087 *************************************************/
6088 
6089 /* Called from pcre2_substitute() when the substitute_callout modifier is set.
6090 Print out the data that is passed back. The substitute callout block is
6091 identical for all code unit widths, so we just pick one.
6092 
6093 Arguments:
6094   scb         pointer to substitute callout block
6095   data_ptr    callout data
6096 
6097 Returns:      nothing
6098 */
6099 
6100 static int
substitute_callout_function(pcre2_substitute_callout_block_8 * scb,void * data_ptr)6101 substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
6102   void *data_ptr)
6103 {
6104 int yield = 0;
6105 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6106 (void)data_ptr;   /* Not used */
6107 
6108 fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
6109   scb->subscount, scb->oveccount,
6110   SIZ_CAST scb->ovector[0], SIZ_CAST scb->ovector[1]);
6111 
6112 PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
6113   utf, outfile);
6114 
6115 fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
6116   SIZ_CAST scb->output_offsets[0], SIZ_CAST scb->output_offsets[1]);
6117 
6118 PCHARSV(scb->output, scb->output_offsets[0],
6119   scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
6120 
6121 if (scb->subscount == dat_datctl.substitute_stop)
6122   {
6123   yield = -1;
6124   fprintf(outfile, " STOPPED");
6125   }
6126 else if (scb->subscount == dat_datctl.substitute_skip)
6127   {
6128   yield = +1;
6129   fprintf(outfile, " SKIPPED");
6130   }
6131 
6132 fprintf(outfile, "\"\n");
6133 return yield;
6134 }
6135 
6136 
6137 /*************************************************
6138 *              Callout function                  *
6139 *************************************************/
6140 
6141 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
6142 we are in the match (unless suppressed). Yield zero unless more callouts than
6143 the fail count, or the callout data is not zero. The only differences in the
6144 callout block for different code unit widths are that the pointers to the
6145 subject, the most recent MARK, and a callout argument string point to strings
6146 of the appropriate width. Casts can be used to deal with this.
6147 
6148 Arguments:
6149   cb                a pointer to a callout block
6150   callout_data_ptr  the provided callout data
6151 
6152 Returns:            0 or 1 or an error, as determined by settings
6153 */
6154 
6155 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)6156 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
6157 {
6158 FILE *f, *fdefault;
6159 uint32_t i, pre_start, post_start, subject_length;
6160 PCRE2_SIZE current_position;
6161 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6162 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
6163 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
6164 
6165 /* The FILE f is used for echoing the subject string if it is non-NULL. This
6166 happens only once in simple cases, but we want to repeat after any additional
6167 output caused by CALLOUT_EXTRA. */
6168 
6169 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
6170   NULL : outfile;
6171 
6172 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
6173   {
6174   f = outfile;
6175   switch (cb->callout_flags)
6176     {
6177     case PCRE2_CALLOUT_BACKTRACK:
6178     fprintf(f, "Backtrack\n");
6179     break;
6180 
6181     case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
6182     fprintf(f, "Backtrack\nNo other matching paths\n");
6183     /* Fall through */
6184 
6185     case PCRE2_CALLOUT_STARTMATCH:
6186     fprintf(f, "New match attempt\n");
6187     break;
6188 
6189     default:
6190     f = fdefault;
6191     break;
6192     }
6193   }
6194 else f = fdefault;
6195 
6196 /* For a callout with a string argument, show the string first because there
6197 isn't a tidy way to fit it in the rest of the data. */
6198 
6199 if (cb->callout_string != NULL)
6200   {
6201   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
6202   fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
6203     SIZ_CAST cb->callout_string_offset, delimiter);
6204   PCHARSV(cb->callout_string, 0,
6205     cb->callout_string_length, utf, outfile);
6206   for (i = 0; callout_start_delims[i] != 0; i++)
6207     if (delimiter == callout_start_delims[i])
6208       {
6209       delimiter = callout_end_delims[i];
6210       break;
6211       }
6212   fprintf(outfile, "%c", delimiter);
6213   if (!callout_capture) fprintf(outfile, "\n");
6214   }
6215 
6216 /* Show captured strings if required */
6217 
6218 if (callout_capture)
6219   {
6220   if (cb->callout_string == NULL)
6221     fprintf(outfile, "Callout %d:", cb->callout_number);
6222   fprintf(outfile, " last capture = %d\n", cb->capture_last);
6223   for (i = 2; i < cb->capture_top * 2; i += 2)
6224     {
6225     fprintf(outfile, "%2d: ", i/2);
6226     if (cb->offset_vector[i] == PCRE2_UNSET)
6227       fprintf(outfile, "<unset>");
6228     else
6229       {
6230       PCHARSV(cb->subject, cb->offset_vector[i],
6231         cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
6232       }
6233     fprintf(outfile, "\n");
6234     }
6235   }
6236 
6237 /* Unless suppressed, re-print the subject in canonical form (with escapes for
6238 non-printing characters), the first time, or if giving full details. On
6239 subsequent calls in the same match, we use PCHARS() just to find the printed
6240 lengths of the substrings. */
6241 
6242 if (callout_where)
6243   {
6244   if (f != NULL) fprintf(f, "--->");
6245 
6246   /* The subject before the match start. */
6247 
6248   PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
6249 
6250   /* If a lookbehind is involved, the current position may be earlier than the
6251   match start. If so, use the match start instead. */
6252 
6253   current_position = (cb->current_position >= cb->start_match)?
6254     cb->current_position : cb->start_match;
6255 
6256   /* The subject between the match start and the current position. */
6257 
6258   PCHARS(post_start, cb->subject, cb->start_match,
6259     current_position - cb->start_match, utf, f);
6260 
6261   /* Print from the current position to the end. */
6262 
6263   PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6264     utf, f);
6265 
6266   /* Calculate the total subject printed length (no print). */
6267 
6268   PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6269 
6270   if (f != NULL) fprintf(f, "\n");
6271 
6272   /* For automatic callouts, show the pattern offset. Otherwise, for a
6273   numerical callout whose number has not already been shown with captured
6274   strings, show the number here. A callout with a string argument has been
6275   displayed above. */
6276 
6277   if (cb->callout_number == 255)
6278     {
6279     fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6280     if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
6281     }
6282   else
6283     {
6284     if (callout_capture || cb->callout_string != NULL) fprintf(outfile, "    ");
6285       else fprintf(outfile, "%3d ", cb->callout_number);
6286     }
6287 
6288   /* Now show position indicators */
6289 
6290   for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6291   fprintf(outfile, "^");
6292 
6293   if (post_start > 0)
6294     {
6295     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6296     fprintf(outfile, "^");
6297     }
6298 
6299   for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6300     fprintf(outfile, " ");
6301 
6302   if (cb->next_item_length != 0)
6303     fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6304       pbuffer8 + cb->pattern_position);
6305   else
6306     fprintf(outfile, "End of pattern");
6307 
6308   fprintf(outfile, "\n");
6309   }
6310 
6311 first_callout = FALSE;
6312 
6313 /* Show any mark info */
6314 
6315 if (cb->mark != last_callout_mark)
6316   {
6317   if (cb->mark == NULL)
6318     fprintf(outfile, "Latest Mark: <unset>\n");
6319   else
6320     {
6321     fprintf(outfile, "Latest Mark: ");
6322     PCHARSV(cb->mark, -1, -1, utf, outfile);
6323     putc('\n', outfile);
6324     }
6325   last_callout_mark = cb->mark;
6326   }
6327 
6328 /* Show callout data */
6329 
6330 if (callout_data_ptr != NULL)
6331   {
6332   int callout_data = *((int32_t *)callout_data_ptr);
6333   if (callout_data != 0)
6334     {
6335     fprintf(outfile, "Callout data = %d\n", callout_data);
6336     return callout_data;
6337     }
6338   }
6339 
6340 /* Keep count and give the appropriate return code */
6341 
6342 callout_count++;
6343 
6344 if (cb->callout_number == dat_datctl.cerror[0] &&
6345     callout_count >= dat_datctl.cerror[1])
6346   return PCRE2_ERROR_CALLOUT;
6347 
6348 if (cb->callout_number == dat_datctl.cfail[0] &&
6349     callout_count >= dat_datctl.cfail[1])
6350   return 1;
6351 
6352 return 0;
6353 }
6354 
6355 
6356 
6357 /*************************************************
6358 *       Handle *MARK and copy/get tests          *
6359 *************************************************/
6360 
6361 /* This function is called after complete and partial matches. It runs the
6362 tests for substring extraction.
6363 
6364 Arguments:
6365   utf       TRUE for utf
6366   capcount  return from pcre2_match()
6367 
6368 Returns:    FALSE if print_error_message() fails
6369 */
6370 
6371 static BOOL
copy_and_get(BOOL utf,int capcount)6372 copy_and_get(BOOL utf, int capcount)
6373 {
6374 int i;
6375 uint8_t *nptr;
6376 
6377 /* Test copy strings by number */
6378 
6379 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6380   {
6381   int rc;
6382   PCRE2_SIZE length, length2;
6383   uint32_t copybuffer[256];
6384   uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6385   length = sizeof(copybuffer)/code_unit_size;
6386   PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6387   if (rc < 0)
6388     {
6389     fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6390     if (!print_error_message(rc, "", "\n")) return FALSE;
6391     }
6392   else
6393     {
6394     PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6395     if (rc < 0)
6396       {
6397       fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6398       if (!print_error_message(rc, "", "\n")) return FALSE;
6399       }
6400     else if (length2 != length)
6401       {
6402       fprintf(outfile, "Mismatched substring lengths: %"
6403         SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6404       }
6405     fprintf(outfile, "%2dC ", n);
6406     PCHARSV(copybuffer, 0, length, utf, outfile);
6407     fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6408     }
6409   }
6410 
6411 /* Test copy strings by name */
6412 
6413 nptr = dat_datctl.copy_names;
6414 for (;;)
6415   {
6416   int rc;
6417   int groupnumber;
6418   PCRE2_SIZE length, length2;
6419   uint32_t copybuffer[256];
6420   int namelen = strlen((const char *)nptr);
6421 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6422   PCRE2_SIZE cnl = namelen;
6423 #endif
6424   if (namelen == 0) break;
6425 
6426 #ifdef SUPPORT_PCRE2_8
6427   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6428 #endif
6429 #ifdef SUPPORT_PCRE2_16
6430   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6431 #endif
6432 #ifdef SUPPORT_PCRE2_32
6433   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6434 #endif
6435 
6436   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6437   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6438     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6439 
6440   length = sizeof(copybuffer)/code_unit_size;
6441   PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6442   if (rc < 0)
6443     {
6444     fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6445     if (!print_error_message(rc, "", "\n")) return FALSE;
6446     }
6447   else
6448     {
6449     PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6450     if (rc < 0)
6451       {
6452       fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6453       if (!print_error_message(rc, "", "\n")) return FALSE;
6454       }
6455     else if (length2 != length)
6456       {
6457       fprintf(outfile, "Mismatched substring lengths: %"
6458         SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6459       }
6460     fprintf(outfile, "  C ");
6461     PCHARSV(copybuffer, 0, length, utf, outfile);
6462     fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6463     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6464       else fprintf(outfile, " (non-unique)\n");
6465     }
6466   nptr += namelen + 1;
6467   }
6468 
6469 /* Test get strings by number */
6470 
6471 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6472   {
6473   int rc;
6474   PCRE2_SIZE length;
6475   void *gotbuffer;
6476   uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6477   PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6478   if (rc < 0)
6479     {
6480     fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6481     if (!print_error_message(rc, "", "\n")) return FALSE;
6482     }
6483   else
6484     {
6485     fprintf(outfile, "%2dG ", n);
6486     PCHARSV(gotbuffer, 0, length, utf, outfile);
6487     fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6488     PCRE2_SUBSTRING_FREE(gotbuffer);
6489     }
6490   }
6491 
6492 /* Test get strings by name */
6493 
6494 nptr = dat_datctl.get_names;
6495 for (;;)
6496   {
6497   PCRE2_SIZE length;
6498   void *gotbuffer;
6499   int rc;
6500   int groupnumber;
6501   int namelen = strlen((const char *)nptr);
6502 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6503   PCRE2_SIZE cnl = namelen;
6504 #endif
6505   if (namelen == 0) break;
6506 
6507 #ifdef SUPPORT_PCRE2_8
6508   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6509 #endif
6510 #ifdef SUPPORT_PCRE2_16
6511   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6512 #endif
6513 #ifdef SUPPORT_PCRE2_32
6514   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6515 #endif
6516 
6517   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6518   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6519     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6520 
6521   PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6522   if (rc < 0)
6523     {
6524     fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6525     if (!print_error_message(rc, "", "\n")) return FALSE;
6526     }
6527   else
6528     {
6529     fprintf(outfile, "  G ");
6530     PCHARSV(gotbuffer, 0, length, utf, outfile);
6531     fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6532     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6533       else fprintf(outfile, " (non-unique)\n");
6534     PCRE2_SUBSTRING_FREE(gotbuffer);
6535     }
6536   nptr += namelen + 1;
6537   }
6538 
6539 /* Test getting the complete list of captured strings. */
6540 
6541 if ((dat_datctl.control & CTL_GETALL) != 0)
6542   {
6543   int rc;
6544   void **stringlist;
6545   PCRE2_SIZE *lengths;
6546   PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6547   if (rc < 0)
6548     {
6549     fprintf(outfile, "get substring list failed (%d): ", rc);
6550     if (!print_error_message(rc, "", "\n")) return FALSE;
6551     }
6552   else
6553     {
6554     for (i = 0; i < capcount; i++)
6555       {
6556       fprintf(outfile, "%2dL ", i);
6557       PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6558       putc('\n', outfile);
6559       }
6560     if (stringlist[i] != NULL)
6561       fprintf(outfile, "string list not terminated by NULL\n");
6562     PCRE2_SUBSTRING_LIST_FREE(stringlist);
6563     }
6564   }
6565 
6566 return TRUE;
6567 }
6568 
6569 
6570 
6571 /*************************************************
6572 *            Show an entire ovector              *
6573 *************************************************/
6574 
6575 /* This function is called after partial matching or match failure, when the
6576 "allvector" modifier is set. It is a means of checking the contents of the
6577 entire ovector, to ensure no modification of fields that should be unchanged.
6578 
6579 Arguments:
6580   ovector      points to the ovector
6581   oveccount    number of pairs
6582 
6583 Returns:       nothing
6584 */
6585 
6586 static void
show_ovector(PCRE2_SIZE * ovector,uint32_t oveccount)6587 show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
6588 {
6589 uint32_t i;
6590 for (i = 0; i < 2*oveccount; i += 2)
6591   {
6592   PCRE2_SIZE start = ovector[i];
6593   PCRE2_SIZE end = ovector[i+1];
6594 
6595   fprintf(outfile, "%2d: ", i/2);
6596   if (start == PCRE2_UNSET && end == PCRE2_UNSET)
6597     fprintf(outfile, "<unset>\n");
6598   else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
6599     fprintf(outfile, "<unchanged>\n");
6600   else
6601     fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
6602       (unsigned long int)end);
6603   }
6604 }
6605 
6606 
6607 /*************************************************
6608 *               Process a data line              *
6609 *************************************************/
6610 
6611 /* The line is in buffer; it will not be empty.
6612 
6613 Arguments:  none
6614 
6615 Returns:    PR_OK     continue processing next line
6616             PR_SKIP   skip to a blank line
6617             PR_ABEND  abort the pcre2test run
6618 */
6619 
6620 static int
process_data(void)6621 process_data(void)
6622 {
6623 PCRE2_SIZE len, ulen, arg_ulen;
6624 uint32_t gmatched;
6625 uint32_t c, k;
6626 uint32_t g_notempty = 0;
6627 uint8_t *p, *pp, *start_rep;
6628 size_t needlen;
6629 void *use_dat_context;
6630 BOOL utf;
6631 BOOL subject_literal;
6632 
6633 PCRE2_SIZE *ovector;
6634 PCRE2_SIZE ovecsave[3];
6635 uint32_t oveccount;
6636 
6637 #ifdef SUPPORT_PCRE2_8
6638 uint8_t *q8 = NULL;
6639 #endif
6640 #ifdef SUPPORT_PCRE2_16
6641 uint16_t *q16 = NULL;
6642 #endif
6643 #ifdef SUPPORT_PCRE2_32
6644 uint32_t *q32 = NULL;
6645 #endif
6646 
6647 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6648 
6649 /* Copy the default context and data control blocks to the active ones. Then
6650 copy from the pattern the controls that can be set in either the pattern or the
6651 data. This allows them to be overridden in the data line. We do not do this for
6652 options because those that are common apply separately to compiling and
6653 matching. */
6654 
6655 DATCTXCPY(dat_context, default_dat_context);
6656 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6657 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6658 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6659 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6660 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6661 
6662 if (dat_datctl.substitute_skip == 0)
6663     dat_datctl.substitute_skip = pat_patctl.substitute_skip;
6664 if (dat_datctl.substitute_stop == 0)
6665     dat_datctl.substitute_stop = pat_patctl.substitute_stop;
6666 
6667 /* Initialize for scanning the data line. */
6668 
6669 #ifdef SUPPORT_PCRE2_8
6670 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6671   ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6672   FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6673 #else
6674 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6675 #endif
6676 
6677 start_rep = NULL;
6678 len = strlen((const char *)buffer);
6679 while (len > 0 && isspace(buffer[len-1])) len--;
6680 buffer[len] = 0;
6681 p = buffer;
6682 while (isspace(*p)) p++;
6683 
6684 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6685 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6686 
6687 if (utf)
6688   {
6689   uint8_t *q;
6690   uint32_t cc;
6691   int n = 1;
6692   for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6693   if (n <= 0)
6694     {
6695     fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6696       "in UTF mode\n");
6697     return PR_OK;
6698     }
6699   }
6700 
6701 #ifdef SUPPORT_VALGRIND
6702 /* Mark the dbuffer as addressable but undefined again. */
6703 if (dbuffer != NULL)
6704   {
6705   VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6706   }
6707 #endif
6708 
6709 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6710 the number of code units that will be needed (though the buffer may have to be
6711 extended if replication is involved). */
6712 
6713 needlen = (size_t)((len+1) * code_unit_size);
6714 if (dbuffer == NULL || needlen >= dbuffer_size)
6715   {
6716   while (needlen >= dbuffer_size) dbuffer_size *= 2;
6717   dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6718   if (dbuffer == NULL)
6719     {
6720     fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6721     exit(1);
6722     }
6723   }
6724 SETCASTPTR(q, dbuffer);  /* Sets q8, q16, or q32, as appropriate. */
6725 
6726 /* Scan the data line, interpreting data escapes, and put the result into a
6727 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6728 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6729 */
6730 
6731 while ((c = *p++) != 0)
6732   {
6733   int32_t i = 0;
6734   size_t replen;
6735 
6736   /* ] may mark the end of a replicated sequence */
6737 
6738   if (c == ']' && start_rep != NULL)
6739     {
6740     long li;
6741     char *endptr;
6742     size_t qoffset = CAST8VAR(q) - dbuffer;
6743     size_t rep_offset = start_rep - dbuffer;
6744 
6745     if (*p++ != '{')
6746       {
6747       fprintf(outfile, "** Expected '{' after \\[....]\n");
6748       return PR_OK;
6749       }
6750 
6751     li = strtol((const char *)p, &endptr, 10);
6752     if (S32OVERFLOW(li))
6753       {
6754       fprintf(outfile, "** Repeat count too large\n");
6755       return PR_OK;
6756       }
6757 
6758     p = (uint8_t *)endptr;
6759     if (*p++ != '}')
6760       {
6761       fprintf(outfile, "** Expected '}' after \\[...]{...\n");
6762       return PR_OK;
6763       }
6764 
6765     i = (int32_t)li;
6766     if (i-- == 0)
6767       {
6768       fprintf(outfile, "** Zero repeat not allowed\n");
6769       return PR_OK;
6770       }
6771 
6772     replen = CAST8VAR(q) - start_rep;
6773     needlen += replen * i;
6774 
6775     if (needlen >= dbuffer_size)
6776       {
6777       while (needlen >= dbuffer_size) dbuffer_size *= 2;
6778       dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6779       if (dbuffer == NULL)
6780         {
6781         fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6782         exit(1);
6783         }
6784       SETCASTPTR(q, dbuffer + qoffset);
6785       start_rep = dbuffer + rep_offset;
6786       }
6787 
6788     while (i-- > 0)
6789       {
6790       memcpy(CAST8VAR(q), start_rep, replen);
6791       SETPLUS(q, replen/code_unit_size);
6792       }
6793 
6794     start_rep = NULL;
6795     continue;
6796     }
6797 
6798   /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
6799   set, do the fudge for setting the top bit. */
6800 
6801   if (c != '\\' || subject_literal)
6802     {
6803     uint32_t topbit = 0;
6804     if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
6805       {
6806       topbit = 0x80000000;
6807       c = *p++;
6808       }
6809     if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
6810       HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
6811     c |= topbit;
6812     }
6813 
6814   /* Handle backslash escapes */
6815 
6816   else switch ((c = *p++))
6817     {
6818     case '\\': break;
6819     case 'a': c = CHAR_BEL; break;
6820     case 'b': c = '\b'; break;
6821     case 'e': c = CHAR_ESC; break;
6822     case 'f': c = '\f'; break;
6823     case 'n': c = '\n'; break;
6824     case 'r': c = '\r'; break;
6825     case 't': c = '\t'; break;
6826     case 'v': c = '\v'; break;
6827 
6828     case '0': case '1': case '2': case '3':
6829     case '4': case '5': case '6': case '7':
6830     c -= '0';
6831     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
6832       c = c * 8 + *p++ - '0';
6833     break;
6834 
6835     case 'o':
6836     if (*p == '{')
6837       {
6838       uint8_t *pt = p;
6839       c = 0;
6840       for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
6841         {
6842         if (++i == 12)
6843           fprintf(outfile, "** Too many octal digits in \\o{...} item; "
6844                            "using only the first twelve.\n");
6845         else c = c * 8 + *pt - '0';
6846         }
6847       if (*pt == '}') p = pt + 1;
6848         else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
6849       }
6850     break;
6851 
6852     case 'x':
6853     if (*p == '{')
6854       {
6855       uint8_t *pt = p;
6856       c = 0;
6857 
6858       /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
6859       when isxdigit() is a macro that refers to its argument more than
6860       once. This is banned by the C Standard, but apparently happens in at
6861       least one MacOS environment. */
6862 
6863       for (pt++; isxdigit(*pt); pt++)
6864         {
6865         if (++i == 9)
6866           fprintf(outfile, "** Too many hex digits in \\x{...} item; "
6867                            "using only the first eight.\n");
6868         else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
6869         }
6870       if (*pt == '}')
6871         {
6872         p = pt + 1;
6873         break;
6874         }
6875       /* Not correct form for \x{...}; fall through */
6876       }
6877 
6878     /* \x without {} always defines just one byte in 8-bit mode. This
6879     allows UTF-8 characters to be constructed byte by byte, and also allows
6880     invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
6881     Otherwise, pass it down as data. */
6882 
6883     c = 0;
6884     while (i++ < 2 && isxdigit(*p))
6885       {
6886       c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
6887       p++;
6888       }
6889 #if defined SUPPORT_PCRE2_8
6890     if (utf && (test_mode == PCRE8_MODE))
6891       {
6892       *q8++ = c;
6893       continue;
6894       }
6895 #endif
6896     break;
6897 
6898     case 0:     /* \ followed by EOF allows for an empty line */
6899     p--;
6900     continue;
6901 
6902     case '=':   /* \= terminates the data, starts modifiers */
6903     goto ENDSTRING;
6904 
6905     case '[':   /* \[ introduces a replicated character sequence */
6906     if (start_rep != NULL)
6907       {
6908       fprintf(outfile, "** Nested replication is not supported\n");
6909       return PR_OK;
6910       }
6911     start_rep = CAST8VAR(q);
6912     continue;
6913 
6914     default:
6915     if (isalnum(c))
6916       {
6917       fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6918       return PR_OK;
6919       }
6920     }
6921 
6922   /* We now have a character value in c that may be greater than 255.
6923   In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6924   than 127 in UTF mode must have come from \x{...} or octal constructs
6925   because values from \x.. get this far only in non-UTF mode. */
6926 
6927 #ifdef SUPPORT_PCRE2_8
6928   if (test_mode == PCRE8_MODE)
6929     {
6930     if (utf)
6931       {
6932       if (c > 0x7fffffff)
6933         {
6934         fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6935           "and so cannot be converted to UTF-8\n", c);
6936         return PR_OK;
6937         }
6938       q8 += ord2utf8(c, q8);
6939       }
6940     else
6941       {
6942       if (c > 0xffu)
6943         {
6944         fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6945           "and UTF-8 mode is not enabled.\n", c);
6946         fprintf(outfile, "** Truncation will probably give the wrong "
6947           "result.\n");
6948         }
6949       *q8++ = (uint8_t)c;
6950       }
6951     }
6952 #endif
6953 #ifdef SUPPORT_PCRE2_16
6954   if (test_mode == PCRE16_MODE)
6955     {
6956     if (utf)
6957       {
6958       if (c > 0x10ffffu)
6959         {
6960         fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6961           "0x10ffff and so cannot be converted to UTF-16\n", c);
6962         return PR_OK;
6963         }
6964       else if (c >= 0x10000u)
6965         {
6966         c-= 0x10000u;
6967         *q16++ = 0xD800 | (c >> 10);
6968         *q16++ = 0xDC00 | (c & 0x3ff);
6969         }
6970       else
6971         *q16++ = c;
6972       }
6973     else
6974       {
6975       if (c > 0xffffu)
6976         {
6977         fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6978           "and UTF-16 mode is not enabled.\n", c);
6979         fprintf(outfile, "** Truncation will probably give the wrong "
6980           "result.\n");
6981         }
6982 
6983       *q16++ = (uint16_t)c;
6984       }
6985     }
6986 #endif
6987 #ifdef SUPPORT_PCRE2_32
6988   if (test_mode == PCRE32_MODE)
6989     {
6990     *q32++ = c;
6991     }
6992 #endif
6993   }
6994 
6995 ENDSTRING:
6996 SET(*q, 0);
6997 len = CASTVAR(uint8_t *, q) - dbuffer;    /* Length in bytes */
6998 ulen = len/code_unit_size;                /* Length in code units */
6999 arg_ulen = ulen;                          /* Value to use in match arg */
7000 
7001 /* If the string was terminated by \= we must now interpret modifiers. */
7002 
7003 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
7004   return PR_OK;
7005 
7006 /* Setting substitute_{skip,fail} implies a substitute callout. */
7007 
7008 if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
7009   dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
7010 
7011 /* Check for mutually exclusive modifiers. At present, these are all in the
7012 first control word. */
7013 
7014 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
7015   {
7016   c = dat_datctl.control & exclusive_dat_controls[k];
7017   if (c != 0 && c != (c & (~c+1)))
7018     {
7019     show_controls(c, 0, "** Not allowed together:");
7020     fprintf(outfile, "\n");
7021     return PR_OK;
7022     }
7023   }
7024 
7025 if (pat_patctl.replacement[0] != 0)
7026   {
7027   if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
7028       (dat_datctl.control & CTL_NULLCONTEXT) != 0)
7029     {
7030     fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
7031     return PR_OK;
7032     }
7033 
7034   if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7035     fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
7036   }
7037 
7038 /* Warn for modifiers that are ignored for DFA. */
7039 
7040 if ((dat_datctl.control & CTL_DFA) != 0)
7041   {
7042   if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7043     fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
7044   }
7045 
7046 /* We now have the subject in dbuffer, with len containing the byte length, and
7047 ulen containing the code unit length, with a copy in arg_ulen for use in match
7048 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
7049 zero_terminate modifier is present).
7050 
7051 Move the data to the end of the buffer so that a read over the end can be
7052 caught by valgrind or other means. If we have explicit valgrind support, mark
7053 the unused start of the buffer unaddressable. If we are using the POSIX
7054 interface, or testing zero-termination, we must include the terminating zero in
7055 the usable data. */
7056 
7057 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
7058                        (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
7059 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
7060 #ifdef SUPPORT_VALGRIND
7061   VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
7062 #endif
7063 
7064 /* Now pp points to the subject string. POSIX matching is only possible in
7065 8-bit mode, and it does not support timing or other fancy features. Some were
7066 checked at compile time, but we need to check the match-time settings here. */
7067 
7068 #ifdef SUPPORT_PCRE2_8
7069 if ((pat_patctl.control & CTL_POSIX) != 0)
7070   {
7071   int rc;
7072   int eflags = 0;
7073   regmatch_t *pmatch = NULL;
7074   const char *msg = "** Ignored with POSIX interface:";
7075 
7076   if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
7077     prmsg(&msg, "callout_error");
7078   if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
7079     prmsg(&msg, "callout_fail");
7080   if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
7081     prmsg(&msg, "copy");
7082   if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
7083     prmsg(&msg, "get");
7084   if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
7085   if (dat_datctl.offset != 0) prmsg(&msg, "offset");
7086 
7087   if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
7088     {
7089     fprintf(outfile, "%s", msg);
7090     show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
7091     msg = "";
7092     }
7093   if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
7094       (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
7095     {
7096     show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
7097                   dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
7098     msg = "";
7099     }
7100 
7101   if (msg[0] == 0) fprintf(outfile, "\n");
7102 
7103   if (dat_datctl.oveccount > 0)
7104     {
7105     pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
7106     if (pmatch == NULL)
7107       {
7108       fprintf(outfile, "** Failed to get memory for recording matching "
7109         "information (size set = %du)\n", dat_datctl.oveccount);
7110       return PR_OK;
7111       }
7112     }
7113 
7114   if (dat_datctl.startend[0] != CFORE_UNSET)
7115     {
7116     pmatch[0].rm_so = dat_datctl.startend[0];
7117     pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
7118       dat_datctl.startend[1] : len;
7119     eflags |= REG_STARTEND;
7120     }
7121 
7122   if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
7123   if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
7124   if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
7125 
7126   rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
7127   if (rc != 0)
7128     {
7129     (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
7130     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
7131     }
7132   else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
7133     fprintf(outfile, "Matched with REG_NOSUB\n");
7134   else if (dat_datctl.oveccount == 0)
7135     fprintf(outfile, "Matched without capture\n");
7136   else
7137     {
7138     size_t i, j;
7139     size_t last_printed = (size_t)dat_datctl.oveccount;
7140     for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
7141       {
7142       if (pmatch[i].rm_so >= 0)
7143         {
7144         PCRE2_SIZE start = pmatch[i].rm_so;
7145         PCRE2_SIZE end = pmatch[i].rm_eo;
7146         for (j = last_printed + 1; j < i; j++)
7147           fprintf(outfile, "%2d: <unset>\n", (int)j);
7148         last_printed = i;
7149         if (start > end)
7150           {
7151           start = pmatch[i].rm_eo;
7152           end = pmatch[i].rm_so;
7153           fprintf(outfile, "Start of matched string is beyond its end - "
7154             "displaying from end to start.\n");
7155           }
7156         fprintf(outfile, "%2d: ", (int)i);
7157         PCHARSV(pp, start, end - start, utf, outfile);
7158         fprintf(outfile, "\n");
7159 
7160         if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
7161             (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
7162           {
7163           fprintf(outfile, "%2d+ ", (int)i);
7164           /* Note: don't use the start/end variables here because we want to
7165           show the text from what is reported as the end. */
7166           PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
7167           fprintf(outfile, "\n"); }
7168         }
7169       }
7170     }
7171   free(pmatch);
7172   return PR_OK;
7173   }
7174 #endif  /* SUPPORT_PCRE2_8 */
7175 
7176  /* Handle matching via the native interface. Check for consistency of
7177 modifiers. */
7178 
7179 if (dat_datctl.startend[0] != CFORE_UNSET)
7180   fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
7181 
7182 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
7183 matching, even if the JIT compiler was used. */
7184 
7185 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
7186     FLD(compiled_code, executable_jit) != NULL)
7187   {
7188   fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
7189   dat_datctl.control &= ~CTL_ALLUSEDTEXT;
7190   }
7191 
7192 /* Handle passing the subject as zero-terminated. */
7193 
7194 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7195   arg_ulen = PCRE2_ZERO_TERMINATED;
7196 
7197 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
7198 NULL context. */
7199 
7200 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
7201   NULL : PTR(dat_context);
7202 
7203 /* Enable display of malloc/free if wanted. We can do this only if either the
7204 pattern or the subject is processed with a context. */
7205 
7206 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
7207 
7208 if (show_memory &&
7209     (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
7210   fprintf(outfile, "** \\=memory requires either a pattern or a subject "
7211     "context: ignored\n");
7212 
7213 /* Create and assign a JIT stack if requested. */
7214 
7215 if (dat_datctl.jitstack != 0)
7216   {
7217   if (dat_datctl.jitstack != jit_stack_size)
7218     {
7219     PCRE2_JIT_STACK_FREE(jit_stack);
7220     PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
7221     jit_stack_size = dat_datctl.jitstack;
7222     }
7223   PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
7224   }
7225 
7226 /* Or de-assign */
7227 
7228 else if (jit_stack != NULL)
7229   {
7230   PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
7231   PCRE2_JIT_STACK_FREE(jit_stack);
7232   jit_stack = NULL;
7233   jit_stack_size = 0;
7234   }
7235 
7236 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
7237 if we want to verify that JIT was actually used. */
7238 
7239 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
7240    {
7241    PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
7242    }
7243 
7244 /* Adjust match_data according to size of offsets required. A size of zero
7245 causes a new match data block to be obtained that exactly fits the pattern. */
7246 
7247 if (dat_datctl.oveccount == 0)
7248   {
7249   PCRE2_MATCH_DATA_FREE(match_data);
7250   PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
7251   PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
7252   }
7253 else if (dat_datctl.oveccount <= max_oveccount)
7254   {
7255   SETFLD(match_data, oveccount, dat_datctl.oveccount);
7256   }
7257 else
7258   {
7259   max_oveccount = dat_datctl.oveccount;
7260   PCRE2_MATCH_DATA_FREE(match_data);
7261   PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
7262   }
7263 
7264 if (CASTVAR(void *, match_data) == NULL)
7265   {
7266   fprintf(outfile, "** Failed to get memory for recording matching "
7267     "information (size requested: %d)\n", dat_datctl.oveccount);
7268   max_oveccount = 0;
7269   return PR_OK;
7270   }
7271 
7272 ovector = FLD(match_data, ovector);
7273 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7274 
7275 /* Replacement processing is ignored for DFA matching. */
7276 
7277 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
7278   {
7279   fprintf(outfile, "** Ignored for DFA matching: replace\n");
7280   dat_datctl.replacement[0] = 0;
7281   }
7282 
7283 /* If a replacement string is provided, call pcre2_substitute() instead of one
7284 of the matching functions. First we have to convert the replacement string to
7285 the appropriate width. */
7286 
7287 if (dat_datctl.replacement[0] != 0)
7288   {
7289   int rc;
7290   uint8_t *pr;
7291   uint8_t rbuffer[REPLACE_BUFFSIZE];
7292   uint8_t nbuffer[REPLACE_BUFFSIZE];
7293   uint32_t xoptions;
7294   uint32_t emoption;  /* External match option */
7295   PCRE2_SIZE j, rlen, nsize, erroroffset;
7296   BOOL badutf = FALSE;
7297 
7298 #ifdef SUPPORT_PCRE2_8
7299   uint8_t *r8 = NULL;
7300 #endif
7301 #ifdef SUPPORT_PCRE2_16
7302   uint16_t *r16 = NULL;
7303 #endif
7304 #ifdef SUPPORT_PCRE2_32
7305   uint32_t *r32 = NULL;
7306 #endif
7307 
7308   /* Fill the ovector with junk to detect elements that do not get set
7309   when they should be (relevant only when "allvector" is specified). */
7310 
7311   for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7312 
7313   if (timeitm)
7314     fprintf(outfile, "** Timing is not supported with replace: ignored\n");
7315 
7316   if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
7317     fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
7318 
7319   /* Check for a test that does substitution after an initial external match.
7320   If this is set, we run the external match, but leave the interpretation of
7321   its output to pcre2_substitute(). */
7322 
7323   emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 :
7324     PCRE2_SUBSTITUTE_MATCHED;
7325 
7326   if (emoption != 0)
7327     {
7328     PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7329       dat_datctl.options, match_data, use_dat_context);
7330     }
7331 
7332   xoptions = emoption |
7333              (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
7334                 PCRE2_SUBSTITUTE_GLOBAL) |
7335              (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
7336                 PCRE2_SUBSTITUTE_EXTENDED) |
7337              (((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
7338                 PCRE2_SUBSTITUTE_LITERAL) |
7339              (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7340                 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7341              (((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 :
7342                 PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) |
7343              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7344                 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7345              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7346                 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7347 
7348   SETCASTPTR(r, rbuffer);  /* Sets r8, r16, or r32, as appropriate. */
7349   pr = dat_datctl.replacement;
7350 
7351   /* If the replacement starts with '[<number>]' we interpret that as length
7352   value for the replacement buffer. */
7353 
7354   nsize = REPLACE_BUFFSIZE/code_unit_size;
7355   if (*pr == '[')
7356     {
7357     PCRE2_SIZE n = 0;
7358     while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7359     if (*pr++ != ']')
7360       {
7361       fprintf(outfile, "Bad buffer size in replacement string\n");
7362       return PR_OK;
7363       }
7364     if (n > nsize)
7365       {
7366       fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7367         "large (max %" SIZ_FORM ")\n", SIZ_CAST n, SIZ_CAST nsize);
7368       return PR_OK;
7369       }
7370     nsize = n;
7371     }
7372 
7373   /* Now copy the replacement string to a buffer of the appropriate width. No
7374   escape processing is done for replacements. In UTF mode, check for an invalid
7375   UTF-8 input string, and if it is invalid, just copy its code units without
7376   UTF interpretation. This provides a means of checking that an invalid string
7377   is detected. Otherwise, UTF-8 can be used to include wide characters in a
7378   replacement. */
7379 
7380   if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7381 
7382   /* Not UTF or invalid UTF-8: just copy the code units. */
7383 
7384   if (!utf || badutf)
7385     {
7386     while ((c = *pr++) != 0)
7387       {
7388 #ifdef SUPPORT_PCRE2_8
7389       if (test_mode == PCRE8_MODE) *r8++ = c;
7390 #endif
7391 #ifdef SUPPORT_PCRE2_16
7392       if (test_mode == PCRE16_MODE) *r16++ = c;
7393 #endif
7394 #ifdef SUPPORT_PCRE2_32
7395       if (test_mode == PCRE32_MODE) *r32++ = c;
7396 #endif
7397       }
7398     }
7399 
7400   /* Valid UTF-8 replacement string */
7401 
7402   else while ((c = *pr++) != 0)
7403     {
7404     if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7405 
7406 #ifdef SUPPORT_PCRE2_8
7407     if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7408 #endif
7409 
7410 #ifdef SUPPORT_PCRE2_16
7411     if (test_mode == PCRE16_MODE)
7412       {
7413       if (c >= 0x10000u)
7414         {
7415         c-= 0x10000u;
7416         *r16++ = 0xD800 | (c >> 10);
7417         *r16++ = 0xDC00 | (c & 0x3ff);
7418         }
7419       else *r16++ = c;
7420       }
7421 #endif
7422 
7423 #ifdef SUPPORT_PCRE2_32
7424     if (test_mode == PCRE32_MODE) *r32++ = c;
7425 #endif
7426     }
7427 
7428   SET(*r, 0);
7429   if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7430     rlen = PCRE2_ZERO_TERMINATED;
7431   else
7432     rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7433 
7434   if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
7435     {
7436     PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
7437     }
7438   else
7439     {
7440     PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL);  /* No callout */
7441     }
7442 
7443   PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7444     dat_datctl.options|xoptions, match_data, use_dat_context,
7445     rbuffer, rlen, nbuffer, &nsize);
7446 
7447   if (rc < 0)
7448     {
7449     fprintf(outfile, "Failed: error %d", rc);
7450     if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7451       fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7452     fprintf(outfile, ": ");
7453     if (!print_error_message(rc, "", "")) return PR_ABEND;
7454     if (rc == PCRE2_ERROR_NOMEMORY &&
7455         (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7456       fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7457     }
7458   else
7459     {
7460     fprintf(outfile, "%2d: ", rc);
7461     PCHARSV(nbuffer, 0, nsize, utf, outfile);
7462     }
7463 
7464   fprintf(outfile, "\n");
7465   show_memory = FALSE;
7466 
7467   /* Show final ovector contents if requested. */
7468 
7469   if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7470     show_ovector(ovector, oveccount);
7471 
7472   return PR_OK;
7473   }   /* End of substitution handling */
7474 
7475 /* When a replacement string is not provided, run a loop for global matching
7476 with one of the basic matching functions. For altglobal (or first time round
7477 the loop), set an "unset" value for the previous match info. */
7478 
7479 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7480 
7481 for (gmatched = 0;; gmatched++)
7482   {
7483   PCRE2_SIZE j;
7484   int capcount;
7485 
7486   /* Fill the ovector with junk to detect elements that do not get set
7487   when they should be. */
7488 
7489   for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7490 
7491   /* When matching is via pcre2_match(), we will detect the use of JIT via the
7492   stack callback function. */
7493 
7494   jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7495 
7496   /* Do timing if required. */
7497 
7498   if (timeitm > 0)
7499     {
7500     int i;
7501     clock_t start_time, time_taken;
7502 
7503     if ((dat_datctl.control & CTL_DFA) != 0)
7504       {
7505       if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7506         {
7507         fprintf(outfile, "Timing DFA restarts is not supported\n");
7508         return PR_OK;
7509         }
7510       if (dfa_workspace == NULL)
7511         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7512       start_time = clock();
7513       for (i = 0; i < timeitm; i++)
7514         {
7515         PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7516           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7517           use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7518         }
7519       }
7520 
7521     else if ((pat_patctl.control & CTL_JITFAST) != 0)
7522       {
7523       start_time = clock();
7524       for (i = 0; i < timeitm; i++)
7525         {
7526         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7527           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7528           use_dat_context);
7529         }
7530       }
7531 
7532     else
7533       {
7534       start_time = clock();
7535       for (i = 0; i < timeitm; i++)
7536         {
7537         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7538           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7539           use_dat_context);
7540         }
7541       }
7542     total_match_time += (time_taken = clock() - start_time);
7543     fprintf(outfile, "Match time %.4f milliseconds\n",
7544       (((double)time_taken * 1000.0) / (double)timeitm) /
7545         (double)CLOCKS_PER_SEC);
7546     }
7547 
7548   /* Find the heap, match and depth limits if requested. The depth and heap
7549   limits are not relevant for JIT. The return from check_match_limit() is the
7550   return from the final call to pcre2_match() or pcre2_dfa_match(). */
7551 
7552   if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
7553     {
7554     capcount = 0;  /* This stops compiler warnings */
7555 
7556     if (FLD(compiled_code, executable_jit) == NULL ||
7557           (dat_datctl.options & PCRE2_NO_JIT) != 0)
7558       {
7559       (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7560       }
7561 
7562     capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7563       "match");
7564 
7565     if (FLD(compiled_code, executable_jit) == NULL ||
7566         (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7567         (dat_datctl.control & CTL_DFA) != 0)
7568       {
7569       capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7570         "depth");
7571       }
7572 
7573     if (capcount == 0)
7574       {
7575       fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7576       capcount = dat_datctl.oveccount;
7577       }
7578     }
7579 
7580   /* Otherwise just run a single match, setting up a callout if required (the
7581   default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7582 
7583   else
7584     {
7585     if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7586       {
7587       PCRE2_SET_CALLOUT(dat_context, callout_function,
7588         (void *)(&dat_datctl.callout_data));
7589       first_callout = TRUE;
7590       last_callout_mark = NULL;
7591       callout_count = 0;
7592       }
7593     else
7594       {
7595       PCRE2_SET_CALLOUT(dat_context, NULL, NULL);  /* No callout */
7596       }
7597 
7598     /* Run a single DFA or NFA match. */
7599 
7600     if ((dat_datctl.control & CTL_DFA) != 0)
7601       {
7602       if (dfa_workspace == NULL)
7603         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7604       if (dfa_matched++ == 0)
7605         dfa_workspace[0] = -1;  /* To catch bad restart */
7606       PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7607         dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7608         use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7609       if (capcount == 0)
7610         {
7611         fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7612         capcount = dat_datctl.oveccount;
7613         }
7614       }
7615     else
7616       {
7617       if ((pat_patctl.control & CTL_JITFAST) != 0)
7618         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7619           dat_datctl.options | g_notempty, match_data, use_dat_context);
7620       else
7621         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7622           dat_datctl.options | g_notempty, match_data, use_dat_context);
7623       if (capcount == 0)
7624         {
7625         fprintf(outfile, "Matched, but too many substrings\n");
7626         capcount = dat_datctl.oveccount;
7627         }
7628       }
7629     }
7630 
7631   /* The result of the match is now in capcount. First handle a successful
7632   match. */
7633 
7634   if (capcount >= 0)
7635     {
7636     int i;
7637 
7638     if (capcount > (int)oveccount)   /* Check for lunatic return value */
7639       {
7640       fprintf(outfile,
7641         "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7642         capcount, oveccount);
7643       capcount = oveccount;
7644       if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7645         {
7646         fprintf(outfile, "** Global loop abandoned\n");
7647         dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7648         }
7649       }
7650 
7651     /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
7652     should be, but not for fast JIT, where it isn't supported. */
7653 
7654     if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
7655         (pat_patctl.control & CTL_JITFAST) == 0)
7656       {
7657       if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
7658         fprintf(outfile,
7659           "** PCRE2 error: flag not set after copy_matched_subject\n");
7660 
7661       if (CASTFLD(void *, match_data, subject) == pp)
7662         fprintf(outfile,
7663           "** PCRE2 error: copy_matched_subject has not copied\n");
7664 
7665       if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
7666         fprintf(outfile,
7667           "** PCRE2 error: copy_matched_subject mismatch\n");
7668       }
7669 
7670     /* If this is not the first time round a global loop, check that the
7671     returned string has changed. If it has not, check for an empty string match
7672     at different starting offset from the previous match. This is a failed test
7673     retry for null-matching patterns that don't match at their starting offset,
7674     for example /(?<=\G.)/. A repeated match at the same point is not such a
7675     pattern, and must be discarded, and we then proceed to seek a non-null
7676     match at the current point. For any other repeated match, there is a bug
7677     somewhere and we must break the loop because it will go on for ever. We
7678     know that there are always at least two elements in the ovector. */
7679 
7680     if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7681       {
7682       if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7683         {
7684         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7685         ovecsave[2] = dat_datctl.offset;
7686         continue;    /* Back to the top of the loop */
7687         }
7688       fprintf(outfile,
7689         "** PCRE2 error: global repeat returned the same string as previous\n");
7690       fprintf(outfile, "** Global loop abandoned\n");
7691       dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7692       }
7693 
7694     /* "allcaptures" requests showing of all captures in the pattern, to check
7695     unset ones at the end. It may be set on the pattern or the data. Implement
7696     by setting capcount to the maximum. This is not relevant for DFA matching,
7697     so ignore it (warning given above). */
7698 
7699     if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
7700       {
7701       capcount = maxcapcount + 1;   /* Allow for full match */
7702       if (capcount > (int)oveccount) capcount = oveccount;
7703       }
7704 
7705     /* "allvector" request showing the entire ovector. */
7706 
7707     if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
7708 
7709     /* Output the captured substrings. Note that, for the matched string,
7710     the use of \K in an assertion can make the start later than the end. */
7711 
7712     for (i = 0; i < 2*capcount; i += 2)
7713       {
7714       PCRE2_SIZE lleft, lmiddle, lright;
7715       PCRE2_SIZE start = ovector[i];
7716       PCRE2_SIZE end = ovector[i+1];
7717 
7718       if (start > end)
7719         {
7720         start = ovector[i+1];
7721         end = ovector[i];
7722         fprintf(outfile, "Start of matched string is beyond its end - "
7723           "displaying from end to start.\n");
7724         }
7725 
7726       fprintf(outfile, "%2d: ", i/2);
7727 
7728       /* Check for an unset group */
7729 
7730       if (start == PCRE2_UNSET && end == PCRE2_UNSET)
7731         {
7732         fprintf(outfile, "<unset>\n");
7733         continue;
7734         }
7735 
7736       /* Check for silly offsets, in particular, values that have not been
7737       set when they should have been. However, if we are past the end of the
7738       captures for this pattern ("allvector" causes this), or if we are DFA
7739       matching, it isn't an error if the entry is unchanged. */
7740 
7741       if (start > ulen || end > ulen)
7742         {
7743         if (((dat_datctl.control & CTL_DFA) != 0 ||
7744               i >= (int)(2*maxcapcount + 2)) &&
7745             start == JUNK_OFFSET && end == JUNK_OFFSET)
7746           fprintf(outfile, "<unchanged>\n");
7747         else
7748           fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
7749             (unsigned long int)start, (unsigned long int)end);
7750         continue;
7751         }
7752 
7753       /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
7754       JIT, it is disabled above, with a comment.) When the match is done by the
7755       interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
7756       set, and if the leftmost consulted character is before the start of the
7757       match or the rightmost consulted character is past the end of the match,
7758       we want to show all consulted characters for the main matched string, and
7759       indicate which were lookarounds. */
7760 
7761       if (i == 0)
7762         {
7763         BOOL showallused;
7764         PCRE2_SIZE leftchar, rightchar;
7765 
7766         if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7767           {
7768           leftchar = FLD(match_data, leftchar);
7769           rightchar = FLD(match_data, rightchar);
7770           showallused = i == 0 && (leftchar < start || rightchar > end);
7771           }
7772         else showallused = FALSE;
7773 
7774         if (showallused)
7775           {
7776           PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
7777           PCHARS(lmiddle, pp, start, end - start, utf, outfile);
7778           PCHARS(lright, pp, end, rightchar - end, utf, outfile);
7779           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7780             fprintf(outfile, " (JIT)");
7781           fprintf(outfile, "\n    ");
7782           for (j = 0; j < lleft; j++) fprintf(outfile, "<");
7783           for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
7784           for (j = 0; j < lright; j++) fprintf(outfile, ">");
7785           }
7786 
7787         /* When a pattern contains \K, the start of match position may be
7788         different to the start of the matched string. When this is the case,
7789         show it when requested. */
7790 
7791         else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
7792           {
7793           PCRE2_SIZE startchar;
7794           PCRE2_GET_STARTCHAR(startchar, match_data);
7795           PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
7796           PCHARSV(pp, start, end - start, utf, outfile);
7797           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7798             fprintf(outfile, " (JIT)");
7799           if (startchar != start)
7800             {
7801             fprintf(outfile, "\n    ");
7802             for (j = 0; j < lleft; j++) fprintf(outfile, "^");
7803             }
7804           }
7805 
7806         /* Otherwise, just show the matched string. */
7807 
7808         else
7809           {
7810           PCHARSV(pp, start, end - start, utf, outfile);
7811           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7812             fprintf(outfile, " (JIT)");
7813           }
7814         }
7815 
7816       /* Not the main matched string. Just show it unadorned. */
7817 
7818       else
7819         {
7820         PCHARSV(pp, start, end - start, utf, outfile);
7821         }
7822 
7823       fprintf(outfile, "\n");
7824 
7825       /* Note: don't use the start/end variables here because we want to
7826       show the text from what is reported as the end. */
7827 
7828       if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
7829           (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
7830         {
7831         fprintf(outfile, "%2d+ ", i/2);
7832         PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
7833         fprintf(outfile, "\n");
7834         }
7835       }
7836 
7837     /* Output (*MARK) data if requested */
7838 
7839     if ((dat_datctl.control & CTL_MARK) != 0 &&
7840          TESTFLD(match_data, mark, !=, NULL))
7841       {
7842       fprintf(outfile, "MK: ");
7843       PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
7844       fprintf(outfile, "\n");
7845       }
7846 
7847     /* Process copy/get strings */
7848 
7849     if (!copy_and_get(utf, capcount)) return PR_ABEND;
7850 
7851     }    /* End of handling a successful match */
7852 
7853   /* There was a partial match. The value of ovector[0] is the bumpalong point,
7854   that is, startchar, not any \K point that might have been passed. When JIT is
7855   not in use, "allusedtext" may be set, in which case we indicate the leftmost
7856   consulted character. */
7857 
7858   else if (capcount == PCRE2_ERROR_PARTIAL)
7859     {
7860     PCRE2_SIZE leftchar;
7861     int backlength;
7862     int rubriclength = 0;
7863 
7864     if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7865       {
7866       leftchar = FLD(match_data, leftchar);
7867       }
7868     else leftchar = ovector[0];
7869 
7870     fprintf(outfile, "Partial match");
7871     if ((dat_datctl.control & CTL_MARK) != 0 &&
7872          TESTFLD(match_data, mark, !=, NULL))
7873       {
7874       fprintf(outfile, ", mark=");
7875       PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf,
7876         outfile);
7877       rubriclength += 7;
7878       }
7879     fprintf(outfile, ": ");
7880     rubriclength += 15;
7881 
7882     PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile);
7883     PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
7884 
7885     if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7886       fprintf(outfile, " (JIT)");
7887     fprintf(outfile, "\n");
7888 
7889     if (backlength != 0)
7890       {
7891       int i;
7892       for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
7893       for (i = 0; i < backlength; i++) fprintf(outfile, "<");
7894       fprintf(outfile, "\n");
7895       }
7896 
7897     if (ulen != ovector[1])
7898       fprintf(outfile, "** ovector[1] is not equal to the subject length: "
7899         "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
7900 
7901     /* Process copy/get strings */
7902 
7903     if (!copy_and_get(utf, 1)) return PR_ABEND;
7904 
7905     /* "allvector" outputs the entire vector */
7906 
7907     if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7908       show_ovector(ovector, oveccount);
7909 
7910     break;  /* Out of the /g loop */
7911     }       /* End of handling partial match */
7912 
7913   /* Failed to match. If this is a /g or /G loop, we might previously have
7914   set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
7915   If that is the case, this is not necessarily the end. We want to advance the
7916   start offset, and continue. We won't be at the end of the string - that was
7917   checked before setting g_notempty. We achieve the effect by pretending that a
7918   single character was matched.
7919 
7920   Complication arises in the case when the newline convention is "any", "crlf",
7921   or "anycrlf". If the previous match was at the end of a line terminated by
7922   CRLF, an advance of one character just passes the CR, whereas we should
7923   prefer the longer newline sequence, as does the code in pcre2_match().
7924 
7925   Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
7926   character, not one byte. */
7927 
7928   else if (g_notempty != 0)   /* There was a previous null match */
7929     {
7930     uint16_t nl = FLD(compiled_code, newline_convention);
7931     PCRE2_SIZE start_offset = dat_datctl.offset;    /* Where the match was */
7932     PCRE2_SIZE end_offset = start_offset + 1;
7933 
7934     if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
7935          nl == PCRE2_NEWLINE_ANYCRLF) &&
7936         start_offset < ulen - 1 &&
7937         CODE_UNIT(pp, start_offset) == '\r' &&
7938         CODE_UNIT(pp, end_offset) == '\n')
7939       end_offset++;
7940 
7941     else if (utf && test_mode != PCRE32_MODE)
7942       {
7943       if (test_mode == PCRE8_MODE)
7944         {
7945         for (; end_offset < ulen; end_offset++)
7946           if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7947         }
7948       else  /* 16-bit mode */
7949         {
7950         for (; end_offset < ulen; end_offset++)
7951           if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7952         }
7953       }
7954 
7955     SETFLDVEC(match_data, ovector, 0, start_offset);
7956     SETFLDVEC(match_data, ovector, 1, end_offset);
7957     }  /* End of handling null match in a global loop */
7958 
7959   /* A "normal" match failure. There will be a negative error number in
7960   capcount. */
7961 
7962   else
7963     {
7964     switch(capcount)
7965       {
7966       case PCRE2_ERROR_NOMATCH:
7967       if (gmatched == 0)
7968         {
7969         fprintf(outfile, "No match");
7970         if ((dat_datctl.control & CTL_MARK) != 0 &&
7971              TESTFLD(match_data, mark, !=, NULL))
7972           {
7973           fprintf(outfile, ", mark = ");
7974           PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
7975           }
7976         if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7977           fprintf(outfile, " (JIT)");
7978         fprintf(outfile, "\n");
7979 
7980         /* "allvector" outputs the entire vector */
7981 
7982         if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7983           show_ovector(ovector, oveccount);
7984         }
7985       break;
7986 
7987       case PCRE2_ERROR_BADUTFOFFSET:
7988       fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
7989       break;
7990 
7991       default:
7992       fprintf(outfile, "Failed: error %d: ", capcount);
7993       if (!print_error_message(capcount, "", "")) return PR_ABEND;
7994       if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
7995           capcount >= PCRE2_ERROR_UTF32_ERR2)
7996         {
7997         PCRE2_SIZE startchar;
7998         PCRE2_GET_STARTCHAR(startchar, match_data);
7999         fprintf(outfile, " at offset %" SIZ_FORM, SIZ_CAST startchar);
8000         }
8001       fprintf(outfile, "\n");
8002       break;
8003       }
8004 
8005     break;  /* Out of the /g loop */
8006     }       /* End of failed match handling */
8007 
8008   /* Control reaches here in two circumstances: (a) after a match, and (b)
8009   after a non-match that immediately followed a match on an empty string when
8010   doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
8011   PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
8012   of one character. So effectively we get here only after a match. If we
8013   are not doing a global search, we are done. */
8014 
8015   if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
8016     {
8017     PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
8018     PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
8019 
8020     /* We must now set up for the next iteration of a global search. If we have
8021     matched an empty string, first check to see if we are at the end of the
8022     subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
8023     does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
8024     at the same point. If this fails it will be picked up above, where a fake
8025     match is set up so that at this point we advance to the next character.
8026 
8027     However, in order to cope with patterns that never match at their starting
8028     offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
8029     than the starting offset. This means there will be a retry with the
8030     starting offset at the match offset. If this returns the same match again,
8031     it is picked up above and ignored, and the special action is then taken. */
8032 
8033     if (match_offset == end_offset)
8034       {
8035       if (end_offset == ulen) break;           /* End of subject */
8036       if (match_offset <= dat_datctl.offset)
8037         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
8038       }
8039 
8040     /* However, even after matching a non-empty string, there is still one
8041     tricky case. If a pattern contains \K within a lookbehind assertion at the
8042     start, the end of the matched string can be at the offset where the match
8043     started. In the case of a normal /g iteration without special action, this
8044     leads to a loop that keeps on returning the same substring. The loop would
8045     be caught above, but we really want to move on to the next match. */
8046 
8047     else
8048       {
8049       g_notempty = 0;   /* Set for a "normal" repeat */
8050       if ((dat_datctl.control & CTL_GLOBAL) != 0)
8051         {
8052         PCRE2_SIZE startchar;
8053         PCRE2_GET_STARTCHAR(startchar, match_data);
8054         if (end_offset <= startchar)
8055           {
8056           if (startchar >= ulen) break;       /* End of subject */
8057           end_offset = startchar + 1;
8058           if (utf && test_mode != PCRE32_MODE)
8059             {
8060             if (test_mode == PCRE8_MODE)
8061               {
8062               for (; end_offset < ulen; end_offset++)
8063                 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8064               }
8065             else  /* 16-bit mode */
8066               {
8067               for (; end_offset < ulen; end_offset++)
8068                 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8069               }
8070             }
8071           }
8072         }
8073       }
8074 
8075     /* For a normal global (/g) iteration, save the current ovector[0,1] and
8076     the starting offset so that we can check that they do change each time.
8077     Otherwise a matching bug that returns the same string causes an infinite
8078     loop. It has happened! Then update the start offset, leaving other
8079     parameters alone. */
8080 
8081     if ((dat_datctl.control & CTL_GLOBAL) != 0)
8082       {
8083       ovecsave[0] = ovector[0];
8084       ovecsave[1] = ovector[1];
8085       ovecsave[2] = dat_datctl.offset;
8086       dat_datctl.offset = end_offset;
8087       }
8088 
8089     /* For altglobal, just update the pointer and length. */
8090 
8091     else
8092       {
8093       pp += end_offset * code_unit_size;
8094       len -= end_offset * code_unit_size;
8095       ulen -= end_offset;
8096       if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
8097       }
8098     }
8099   }  /* End of global loop */
8100 
8101 show_memory = FALSE;
8102 return PR_OK;
8103 }
8104 
8105 
8106 
8107 
8108 /*************************************************
8109 *               Print PCRE2 version              *
8110 *************************************************/
8111 
8112 static void
print_version(FILE * f)8113 print_version(FILE *f)
8114 {
8115 VERSION_TYPE *vp;
8116 fprintf(f, "PCRE2 version ");
8117 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
8118 fprintf(f, "\n");
8119 }
8120 
8121 
8122 
8123 /*************************************************
8124 *               Print Unicode version            *
8125 *************************************************/
8126 
8127 static void
print_unicode_version(FILE * f)8128 print_unicode_version(FILE *f)
8129 {
8130 VERSION_TYPE *vp;
8131 fprintf(f, "Unicode version ");
8132 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
8133 }
8134 
8135 
8136 
8137 /*************************************************
8138 *               Print JIT target                 *
8139 *************************************************/
8140 
8141 static void
print_jit_target(FILE * f)8142 print_jit_target(FILE *f)
8143 {
8144 VERSION_TYPE *vp;
8145 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
8146 }
8147 
8148 
8149 
8150 /*************************************************
8151 *       Print newline configuration              *
8152 *************************************************/
8153 
8154 /* Output is always to stdout.
8155 
8156 Arguments:
8157   rc         the return code from PCRE2_CONFIG_NEWLINE
8158   isc        TRUE if called from "-C newline"
8159 Returns:     nothing
8160 */
8161 
8162 static void
print_newline_config(uint32_t optval,BOOL isc)8163 print_newline_config(uint32_t optval, BOOL isc)
8164 {
8165 if (!isc) printf("  Default newline sequence is ");
8166 if (optval < sizeof(newlines)/sizeof(char *))
8167   printf("%s\n", newlines[optval]);
8168 else
8169   printf("a non-standard value: %d\n", optval);
8170 }
8171 
8172 
8173 
8174 /*************************************************
8175 *             Usage function                     *
8176 *************************************************/
8177 
8178 static void
usage(void)8179 usage(void)
8180 {
8181 printf("Usage:     pcre2test [options] [<input file> [<output file>]]\n\n");
8182 printf("Input and output default to stdin and stdout.\n");
8183 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8184 printf("If input is a terminal, readline() is used to read from it.\n");
8185 #else
8186 printf("This version of pcre2test is not linked with readline().\n");
8187 #endif
8188 printf("\nOptions:\n");
8189 #ifdef SUPPORT_PCRE2_8
8190 printf("  -8            use the 8-bit library\n");
8191 #endif
8192 #ifdef SUPPORT_PCRE2_16
8193 printf("  -16           use the 16-bit library\n");
8194 #endif
8195 #ifdef SUPPORT_PCRE2_32
8196 printf("  -32           use the 32-bit library\n");
8197 #endif
8198 printf("  -ac           set default pattern modifier PCRE2_AUTO_CALLOUT\n");
8199 printf("  -AC           as -ac, but also set subject 'callout_extra' modifier\n");
8200 printf("  -b            set default pattern modifier 'fullbincode'\n");
8201 printf("  -C            show PCRE2 compile-time options and exit\n");
8202 printf("  -C arg        show a specific compile-time option and exit with its\n");
8203 printf("                  value if numeric (else 0). The arg can be:\n");
8204 printf("     backslash-C    use of \\C is enabled [0, 1]\n");
8205 printf("     bsr            \\R type [ANYCRLF, ANY]\n");
8206 printf("     ebcdic         compiled for EBCDIC character code [0,1]\n");
8207 printf("     ebcdic-nl      NL code if compiled for EBCDIC\n");
8208 printf("     jit            just-in-time compiler supported [0, 1]\n");
8209 printf("     linksize       internal link size [2, 3, 4]\n");
8210 printf("     newline        newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
8211 printf("     pcre2-8        8 bit library support enabled [0, 1]\n");
8212 printf("     pcre2-16       16 bit library support enabled [0, 1]\n");
8213 printf("     pcre2-32       32 bit library support enabled [0, 1]\n");
8214 printf("     unicode        Unicode and UTF support enabled [0, 1]\n");
8215 printf("  -d            set default pattern modifier 'debug'\n");
8216 printf("  -dfa          set default subject modifier 'dfa'\n");
8217 printf("  -error <n,m,..>  show messages for error numbers, then exit\n");
8218 printf("  -help         show usage information\n");
8219 printf("  -i            set default pattern modifier 'info'\n");
8220 printf("  -jit          set default pattern modifier 'jit'\n");
8221 printf("  -jitfast      set default pattern modifier 'jitfast'\n");
8222 printf("  -jitverify    set default pattern modifier 'jitverify'\n");
8223 printf("  -LM           list pattern and subject modifiers, then exit\n");
8224 printf("  -q            quiet: do not output PCRE2 version number at start\n");
8225 printf("  -pattern <s>  set default pattern modifier fields\n");
8226 printf("  -subject <s>  set default subject modifier fields\n");
8227 printf("  -S <n>        set stack size to <n> mebibytes\n");
8228 printf("  -t [<n>]      time compilation and execution, repeating <n> times\n");
8229 printf("  -tm [<n>]     time execution (matching) only, repeating <n> times\n");
8230 printf("  -T            same as -t, but show total times at the end\n");
8231 printf("  -TM           same as -tm, but show total time at the end\n");
8232 printf("  -version      show PCRE2 version and exit\n");
8233 }
8234 
8235 
8236 
8237 /*************************************************
8238 *             Handle -C option                   *
8239 *************************************************/
8240 
8241 /* This option outputs configuration options and sets an appropriate return
8242 code when asked for a single option. The code is abstracted into a separate
8243 function because of its size. Use whichever pcre2_config() function is
8244 available.
8245 
8246 Argument:   an option name or NULL
8247 Returns:    the return code
8248 */
8249 
8250 static int
c_option(const char * arg)8251 c_option(const char *arg)
8252 {
8253 uint32_t optval;
8254 unsigned int i = COPTLISTCOUNT;
8255 int yield = 0;
8256 
8257 if (arg != NULL && arg[0] != CHAR_MINUS)
8258   {
8259   for (i = 0; i < COPTLISTCOUNT; i++)
8260     if (strcmp(arg, coptlist[i].name) == 0) break;
8261 
8262   if (i >= COPTLISTCOUNT)
8263     {
8264     fprintf(stderr, "** Unknown -C option '%s'\n", arg);
8265     return 0;
8266     }
8267 
8268   switch (coptlist[i].type)
8269     {
8270     case CONF_BSR:
8271     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8272     printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
8273     break;
8274 
8275     case CONF_FIX:
8276     yield = coptlist[i].value;
8277     printf("%d\n", yield);
8278     break;
8279 
8280     case CONF_FIZ:
8281     optval = coptlist[i].value;
8282     printf("%d\n", optval);
8283     break;
8284 
8285     case CONF_INT:
8286     (void)PCRE2_CONFIG(coptlist[i].value, &yield);
8287     printf("%d\n", yield);
8288     break;
8289 
8290     case CONF_NL:
8291     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8292     print_newline_config(optval, TRUE);
8293     break;
8294     }
8295 
8296 /* For VMS, return the value by setting a symbol, for certain values only. This
8297 is contributed code which the PCRE2 developers have no means of testing. */
8298 
8299 #ifdef __VMS
8300 
8301 /* This is the original code provided by the first VMS contributor. */
8302 #ifdef NEVER
8303   if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8304     {
8305     char ucname[16];
8306     strcpy(ucname, coptlist[i].name);
8307     for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
8308     vms_setsymbol(ucname, 0, optval);
8309     }
8310 #endif
8311 
8312 /* This is the new code, provided by a second VMS contributor. */
8313 
8314   if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8315     {
8316     char nam_buf[22], val_buf[4];
8317     $DESCRIPTOR(nam, nam_buf);
8318     $DESCRIPTOR(val, val_buf);
8319 
8320     strcpy(nam_buf, coptlist[i].name);
8321     nam.dsc$w_length = strlen(nam_buf);
8322     sprintf(val_buf, "%d", yield);
8323     val.dsc$w_length = strlen(val_buf);
8324     lib$set_symbol(&nam, &val);
8325     }
8326 #endif  /* __VMS */
8327 
8328   return yield;
8329   }
8330 
8331 /* No argument for -C: output all configuration information. */
8332 
8333 print_version(stdout);
8334 printf("Compiled with\n");
8335 
8336 #ifdef EBCDIC
8337 printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
8338 #if defined NATIVE_ZOS
8339 printf("  EBCDIC code page %s or similar\n", pcrz_cpversion());
8340 #endif
8341 #endif
8342 
8343 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
8344 if (optval & 1) printf("  8-bit support\n");
8345 if (optval & 2) printf("  16-bit support\n");
8346 if (optval & 4) printf("  32-bit support\n");
8347 
8348 #ifdef SUPPORT_VALGRIND
8349 printf("  Valgrind support\n");
8350 #endif
8351 
8352 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
8353 if (optval != 0)
8354   {
8355   printf("  UTF and UCP support (");
8356   print_unicode_version(stdout);
8357   printf(")\n");
8358   }
8359 else printf("  No Unicode support\n");
8360 
8361 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
8362 if (optval != 0)
8363   {
8364   printf("  Just-in-time compiler support: ");
8365   print_jit_target(stdout);
8366   printf("\n");
8367   }
8368 else
8369   {
8370   printf("  No just-in-time compiler support\n");
8371   }
8372 
8373 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
8374 print_newline_config(optval, FALSE);
8375 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
8376 printf("  \\R matches %s\n",
8377   (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
8378                                  "all Unicode newlines");
8379 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
8380 printf("  \\C is %ssupported\n", optval? "not ":"");
8381 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
8382 printf("  Internal link size = %d\n", optval);
8383 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
8384 printf("  Parentheses nest limit = %d\n", optval);
8385 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
8386 printf("  Default heap limit = %d kibibytes\n", optval);
8387 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
8388 printf("  Default match limit = %d\n", optval);
8389 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
8390 printf("  Default depth limit = %d\n", optval);
8391 
8392 #if defined SUPPORT_LIBREADLINE
8393 printf("  pcre2test has libreadline support\n");
8394 #elif defined SUPPORT_LIBEDIT
8395 printf("  pcre2test has libedit support\n");
8396 #else
8397 printf("  pcre2test has neither libreadline nor libedit support\n");
8398 #endif
8399 
8400 return 0;
8401 }
8402 
8403 
8404 
8405 /*************************************************
8406 *              Display one modifier              *
8407 *************************************************/
8408 
8409 static void
display_one_modifier(modstruct * m,BOOL for_pattern)8410 display_one_modifier(modstruct *m, BOOL for_pattern)
8411 {
8412 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
8413   '*' : ' ';
8414 printf("%c%s", c, m->name);
8415 }
8416 
8417 
8418 
8419 /*************************************************
8420 *       Display pattern or subject modifiers     *
8421 *************************************************/
8422 
8423 /* In order to print in two columns, first scan without printing to get a list
8424 of the modifiers that are required.
8425 
8426 Arguments:
8427   for_pattern   TRUE for pattern modifiers, FALSE for subject modifiers
8428   title         string to be used in title
8429 
8430 Returns:        nothing
8431 */
8432 
8433 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8434 display_selected_modifiers(BOOL for_pattern, const char *title)
8435 {
8436 uint32_t i, j;
8437 uint32_t n = 0;
8438 uint32_t list[MODLISTCOUNT];
8439 
8440 for (i = 0; i < MODLISTCOUNT; i++)
8441   {
8442   BOOL is_pattern = TRUE;
8443   modstruct *m = modlist + i;
8444 
8445   switch (m->which)
8446     {
8447     case MOD_CTC:       /* Compile context */
8448     case MOD_PAT:       /* Pattern */
8449     case MOD_PATP:      /* Pattern, OK for Perl-compatible test */
8450     break;
8451 
8452     /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8453     subjects, but can be given with a pattern. We list them as subject
8454     modifiers, but marked with an asterisk.*/
8455 
8456     case MOD_CTM:       /* Match context */
8457     case MOD_DAT:       /* Subject line */
8458     case MOD_PND:       /* As PD, but not default pattern */
8459     case MOD_PNDP:      /* As PND, OK for Perl-compatible test */
8460     is_pattern = FALSE;
8461     break;
8462 
8463     default: printf("** Unknown type for modifier '%s'\n", m->name);
8464     /* Fall through */
8465     case MOD_PD:        /* Pattern or subject */
8466     case MOD_PDP:       /* As PD, OK for Perl-compatible test */
8467     is_pattern = for_pattern;
8468     break;
8469     }
8470 
8471   if (for_pattern == is_pattern) list[n++] = i;
8472   }
8473 
8474 /* Now print from the list in two columns. */
8475 
8476 printf("-------------- %s MODIFIERS --------------\n", title);
8477 
8478 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8479   {
8480   modstruct *m = modlist + list[i];
8481   display_one_modifier(m, for_pattern);
8482   if (j < n)
8483     {
8484     uint32_t k = 27 - strlen(m->name);
8485     while (k-- > 0) printf(" ");
8486     display_one_modifier(modlist + list[j], for_pattern);
8487     }
8488   printf("\n");
8489   }
8490 }
8491 
8492 
8493 
8494 /*************************************************
8495 *          Display the list of modifiers         *
8496 *************************************************/
8497 
8498 static void
display_modifiers(void)8499 display_modifiers(void)
8500 {
8501 printf(
8502   "An asterisk on a subject modifier means that it may be given on a pattern\n"
8503   "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8504   "that are listed for both patterns and subjects have different effects in\n"
8505   "each case.\n\n");
8506 display_selected_modifiers(TRUE, "PATTERN");
8507 printf("\n");
8508 display_selected_modifiers(FALSE, "SUBJECT");
8509 }
8510 
8511 
8512 
8513 /*************************************************
8514 *                Main Program                    *
8515 *************************************************/
8516 
8517 int
main(int argc,char ** argv)8518 main(int argc, char **argv)
8519 {
8520 uint32_t temp;
8521 uint32_t yield = 0;
8522 uint32_t op = 1;
8523 BOOL notdone = TRUE;
8524 BOOL quiet = FALSE;
8525 BOOL showtotaltimes = FALSE;
8526 BOOL skipping = FALSE;
8527 char *arg_subject = NULL;
8528 char *arg_pattern = NULL;
8529 char *arg_error = NULL;
8530 
8531 /* The offsets to the options and control bits fields of the pattern and data
8532 control blocks must be the same so that common options and controls such as
8533 "anchored" or "memory" can work for either of them from a single table entry.
8534 We cannot test this till runtime because "offsetof" does not work in the
8535 preprocessor. */
8536 
8537 if (PO(options) != DO(options) || PO(control) != DO(control) ||
8538     PO(control2) != DO(control2))
8539   {
8540   fprintf(stderr, "** Coding error: "
8541     "options and control offsets for pattern and data must be the same.\n");
8542   return 1;
8543   }
8544 
8545 /* Get the PCRE2 and Unicode version number and JIT target information, at the
8546 same time checking that a request for the length gives the same answer. Also
8547 check lengths for non-string items. */
8548 
8549 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
8550     PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
8551 
8552     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
8553     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
8554 
8555     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
8556     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
8557 
8558     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
8559     PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
8560   {
8561   fprintf(stderr, "** Error in pcre2_config(): bad length\n");
8562   return 1;
8563   }
8564 
8565 /* Check that bad options are diagnosed. */
8566 
8567 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
8568     PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
8569   {
8570   fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
8571   return 1;
8572   }
8573 
8574 /* This configuration option is now obsolete, but running a quick check ensures
8575 that its code is covered. */
8576 
8577 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
8578 
8579 /* Get buffers from malloc() so that valgrind will check their misuse when
8580 debugging. They grow automatically when very long lines are read. The 16-
8581 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
8582 
8583 buffer = (uint8_t *)malloc(pbuffer8_size);
8584 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
8585 
8586 /* The following  _setmode() stuff is some Windows magic that tells its runtime
8587 library to translate CRLF into a single LF character. At least, that's what
8588 I've been told: never having used Windows I take this all on trust. Originally
8589 it set 0x8000, but then I was advised that _O_BINARY was better. */
8590 
8591 #if defined(_WIN32) || defined(WIN32)
8592 _setmode( _fileno( stdout ), _O_BINARY );
8593 #endif
8594 
8595 /* Initialization that does not depend on the running mode. */
8596 
8597 locale_name[0] = 0;
8598 
8599 memset(&def_patctl, 0, sizeof(patctl));
8600 def_patctl.convert_type = CONVERT_UNSET;
8601 
8602 memset(&def_datctl, 0, sizeof(datctl));
8603 def_datctl.oveccount = DEFAULT_OVECCOUNT;
8604 def_datctl.copy_numbers[0] = -1;
8605 def_datctl.get_numbers[0] = -1;
8606 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
8607 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
8608 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
8609 
8610 /* Scan command line options. */
8611 
8612 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
8613   {
8614   char *endptr;
8615   char *arg = argv[op];
8616   unsigned long uli;
8617 
8618   /* List modifiers and exit. */
8619 
8620   if (strcmp(arg, "-LM") == 0)
8621     {
8622     display_modifiers();
8623     goto EXIT;
8624     }
8625 
8626   /* Display and/or set return code for configuration options. */
8627 
8628   if (strcmp(arg, "-C") == 0)
8629     {
8630     yield = c_option(argv[op + 1]);
8631     goto EXIT;
8632     }
8633 
8634   /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
8635   and 32-bit modes because that won't happen naturally when 8-bit is also
8636   configured. Also call some other functions that are not otherwise used. This
8637   means that a coverage report won't claim there are uncalled functions. */
8638 
8639   if (strcmp(arg, "-8") == 0)
8640     {
8641 #ifdef SUPPORT_PCRE2_8
8642     test_mode = PCRE8_MODE;
8643     (void)pcre2_set_bsr_8(pat_context8, 999);
8644     (void)pcre2_set_newline_8(pat_context8, 999);
8645 #else
8646     fprintf(stderr,
8647       "** This version of PCRE2 was built without 8-bit support\n");
8648     exit(1);
8649 #endif
8650     }
8651 
8652   else if (strcmp(arg, "-16") == 0)
8653     {
8654 #ifdef SUPPORT_PCRE2_16
8655     test_mode = PCRE16_MODE;
8656     (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
8657     (void)pcre2_set_bsr_16(pat_context16, 999);
8658     (void)pcre2_set_newline_16(pat_context16, 999);
8659 #else
8660     fprintf(stderr,
8661       "** This version of PCRE2 was built without 16-bit support\n");
8662     exit(1);
8663 #endif
8664     }
8665 
8666   else if (strcmp(arg, "-32") == 0)
8667     {
8668 #ifdef SUPPORT_PCRE2_32
8669     test_mode = PCRE32_MODE;
8670     (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
8671     (void)pcre2_set_bsr_32(pat_context32, 999);
8672     (void)pcre2_set_newline_32(pat_context32, 999);
8673 #else
8674     fprintf(stderr,
8675       "** This version of PCRE2 was built without 32-bit support\n");
8676     exit(1);
8677 #endif
8678     }
8679 
8680   /* Set quiet (no version verification) */
8681 
8682   else if (strcmp(arg, "-q") == 0) quiet = TRUE;
8683 
8684   /* Set system stack size */
8685 
8686   else if (strcmp(arg, "-S") == 0 && argc > 2 &&
8687       ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
8688     {
8689 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
8690     fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
8691     exit(1);
8692 #else
8693     int rc;
8694     uint32_t stack_size;
8695     struct rlimit rlim;
8696     if (U32OVERFLOW(uli))
8697       {
8698       fprintf(stderr, "** Argument for -S is too big\n");
8699       exit(1);
8700       }
8701     stack_size = (uint32_t)uli;
8702     getrlimit(RLIMIT_STACK, &rlim);
8703     rlim.rlim_cur = stack_size * 1024 * 1024;
8704     if (rlim.rlim_cur > rlim.rlim_max)
8705       {
8706       fprintf(stderr,
8707         "pcre2test: requested stack size %luMiB is greater than hard limit "
8708           "%luMiB\n", (unsigned long int)stack_size,
8709           (unsigned long int)(rlim.rlim_max));
8710       exit(1);
8711       }
8712     rc = setrlimit(RLIMIT_STACK, &rlim);
8713     if (rc != 0)
8714       {
8715       fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
8716         (unsigned long int)stack_size, strerror(errno));
8717       exit(1);
8718       }
8719     op++;
8720     argc--;
8721 #endif
8722     }
8723 
8724   /* Set some common pattern and subject controls */
8725 
8726   else if (strcmp(arg, "-AC") == 0)
8727     {
8728     def_patctl.options |= PCRE2_AUTO_CALLOUT;
8729     def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
8730     }
8731   else if (strcmp(arg, "-ac") == 0)  def_patctl.options |= PCRE2_AUTO_CALLOUT;
8732   else if (strcmp(arg, "-b") == 0)   def_patctl.control |= CTL_FULLBINCODE;
8733   else if (strcmp(arg, "-d") == 0)   def_patctl.control |= CTL_DEBUG;
8734   else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
8735   else if (strcmp(arg, "-i") == 0)   def_patctl.control |= CTL_INFO;
8736   else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0 ||
8737            strcmp(arg, "-jitfast") == 0)
8738     {
8739     if (arg[4] == 'v') def_patctl.control |= CTL_JITVERIFY;
8740       else if (arg[4] == 'f') def_patctl.control |= CTL_JITFAST;
8741     def_patctl.jit = JIT_DEFAULT;  /* full & partial */
8742 #ifndef SUPPORT_JIT
8743     fprintf(stderr, "** Warning: JIT support is not available: "
8744                     "-jit[fast|verify] calls functions that do nothing.\n");
8745 #endif
8746     }
8747 
8748   /* Set timing parameters */
8749 
8750   else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
8751            strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
8752     {
8753     int both = arg[2] == 0;
8754     showtotaltimes = arg[1] == 'T';
8755     if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
8756       {
8757       if (uli == 0)
8758         {
8759         fprintf(stderr, "** Argument for %s must not be zero\n", arg);
8760         exit(1);
8761         }
8762       if (U32OVERFLOW(uli))
8763         {
8764         fprintf(stderr, "** Argument for %s is too big\n", arg);
8765         exit(1);
8766         }
8767       timeitm = (int)uli;
8768       op++;
8769       argc--;
8770       }
8771     else timeitm = LOOPREPEAT;
8772     if (both) timeit = timeitm;
8773     }
8774 
8775   /* Give help */
8776 
8777   else if (strcmp(arg, "-help") == 0 ||
8778            strcmp(arg, "--help") == 0)
8779     {
8780     usage();
8781     goto EXIT;
8782     }
8783 
8784   /* Show version */
8785 
8786   else if (strcmp(arg, "-version") == 0 ||
8787            strcmp(arg, "--version") == 0)
8788     {
8789     print_version(stdout);
8790     goto EXIT;
8791     }
8792 
8793   /* The following options save their data for processing once we know what
8794   the running mode is. */
8795 
8796   else if (strcmp(arg, "-error") == 0)
8797     {
8798     arg_error = argv[op+1];
8799     goto CHECK_VALUE_EXISTS;
8800     }
8801 
8802   else if (strcmp(arg, "-subject") == 0)
8803     {
8804     arg_subject = argv[op+1];
8805     goto CHECK_VALUE_EXISTS;
8806     }
8807 
8808   else if (strcmp(arg, "-pattern") == 0)
8809     {
8810     arg_pattern = argv[op+1];
8811     CHECK_VALUE_EXISTS:
8812     if (argc <= 2)
8813       {
8814       fprintf(stderr, "** Missing value for %s\n", arg);
8815       yield = 1;
8816       goto EXIT;
8817       }
8818     op++;
8819     argc--;
8820     }
8821 
8822   /* Unrecognized option */
8823 
8824   else
8825     {
8826     fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
8827     usage();
8828     yield = 1;
8829     goto EXIT;
8830     }
8831   op++;
8832   argc--;
8833   }
8834 
8835 /* If -error was present, get the error numbers, show the messages, and exit.
8836 We wait to do this until we know which mode we are in. */
8837 
8838 if (arg_error != NULL)
8839   {
8840   int len;
8841   int errcode;
8842   char *endptr;
8843 
8844 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
8845 least 128 code units, because it is used for retrieving error messages. */
8846 
8847 #ifdef SUPPORT_PCRE2_16
8848   if (test_mode == PCRE16_MODE)
8849     {
8850     pbuffer16_size = 256;
8851     pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
8852     if (pbuffer16 == NULL)
8853       {
8854       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
8855         SIZ_CAST pbuffer16_size);
8856       yield = 1;
8857       goto EXIT;
8858       }
8859     }
8860 #endif
8861 
8862 #ifdef SUPPORT_PCRE2_32
8863   if (test_mode == PCRE32_MODE)
8864     {
8865     pbuffer32_size = 512;
8866     pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
8867     if (pbuffer32 == NULL)
8868       {
8869       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
8870         SIZ_CAST pbuffer32_size);
8871       yield = 1;
8872       goto EXIT;
8873       }
8874     }
8875 #endif
8876 
8877   /* Loop along a list of error numbers. */
8878 
8879   for (;;)
8880     {
8881     errcode = strtol(arg_error, &endptr, 10);
8882     if (*endptr != 0 && *endptr != CHAR_COMMA)
8883       {
8884       fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
8885       yield = 1;
8886       goto EXIT;
8887       }
8888     printf("Error %d: ", errcode);
8889     PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
8890     if (len < 0)
8891       {
8892       switch (len)
8893         {
8894         case PCRE2_ERROR_BADDATA:
8895         printf("PCRE2_ERROR_BADDATA (unknown error number)");
8896         break;
8897 
8898         case PCRE2_ERROR_NOMEMORY:
8899         printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
8900         break;
8901 
8902         default:
8903         printf("Unexpected return (%d) from pcre2_get_error_message()", len);
8904         break;
8905         }
8906       }
8907     else
8908       {
8909       PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
8910       }
8911     printf("\n");
8912     if (*endptr == 0) goto EXIT;
8913     arg_error = endptr + 1;
8914     }
8915   /* Control never reaches here */
8916   }  /* End of -error handling */
8917 
8918 /* Initialize things that cannot be done until we know which test mode we are
8919 running in. Exercise the general context copying and match data size functions,
8920 which are not otherwise used. */
8921 
8922 code_unit_size = test_mode/8;
8923 max_oveccount = DEFAULT_OVECCOUNT;
8924 
8925 /* Use macros to save a lot of duplication. */
8926 
8927 #define CREATECONTEXTS \
8928   G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
8929   G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
8930   G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
8931   G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
8932   G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
8933   G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
8934   G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
8935   G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
8936   G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
8937 
8938 #define CONTEXTTESTS \
8939   (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
8940   (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
8941   (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
8942   (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL); \
8943   (void)G(pcre2_get_match_data_size_,BITS)(G(match_data,BITS))
8944 
8945 
8946 /* Call the appropriate functions for the current mode, and exercise some
8947 functions that are not otherwise called. */
8948 
8949 #ifdef SUPPORT_PCRE2_8
8950 #undef BITS
8951 #define BITS 8
8952 if (test_mode == PCRE8_MODE)
8953   {
8954   CREATECONTEXTS;
8955   CONTEXTTESTS;
8956   }
8957 #endif
8958 
8959 #ifdef SUPPORT_PCRE2_16
8960 #undef BITS
8961 #define BITS 16
8962 if (test_mode == PCRE16_MODE)
8963   {
8964   CREATECONTEXTS;
8965   CONTEXTTESTS;
8966   }
8967 #endif
8968 
8969 #ifdef SUPPORT_PCRE2_32
8970 #undef BITS
8971 #define BITS 32
8972 if (test_mode == PCRE32_MODE)
8973   {
8974   CREATECONTEXTS;
8975   CONTEXTTESTS;
8976   }
8977 #endif
8978 
8979 /* Set a default parentheses nest limit that is large enough to run the
8980 standard tests (this also exercises the function). */
8981 
8982 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
8983 
8984 /* Handle command line modifier settings, sending any error messages to
8985 stderr. We need to know the mode before modifying the context, and it is tidier
8986 to do them all in the same way. */
8987 
8988 outfile = stderr;
8989 if ((arg_pattern != NULL &&
8990     !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
8991     (arg_subject != NULL &&
8992     !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
8993   {
8994   yield = 1;
8995   goto EXIT;
8996   }
8997 
8998 /* Sort out the input and output files, defaulting to stdin/stdout. */
8999 
9000 infile = stdin;
9001 outfile = stdout;
9002 
9003 if (argc > 1 && strcmp(argv[op], "-") != 0)
9004   {
9005   infile = fopen(argv[op], INPUT_MODE);
9006   if (infile == NULL)
9007     {
9008     printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
9009     yield = 1;
9010     goto EXIT;
9011     }
9012   }
9013 
9014 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9015 if (INTERACTIVE(infile)) using_history();
9016 #endif
9017 
9018 if (argc > 2)
9019   {
9020   outfile = fopen(argv[op+1], OUTPUT_MODE);
9021   if (outfile == NULL)
9022     {
9023     printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
9024     yield = 1;
9025     goto EXIT;
9026     }
9027   }
9028 
9029 /* Output a heading line unless quiet, then process input lines. */
9030 
9031 if (!quiet) print_version(outfile);
9032 
9033 SET(compiled_code, NULL);
9034 
9035 #ifdef SUPPORT_PCRE2_8
9036 preg.re_pcre2_code = NULL;
9037 preg.re_match_data = NULL;
9038 #endif
9039 
9040 while (notdone)
9041   {
9042   uint8_t *p;
9043   int rc = PR_OK;
9044   BOOL expectdata = TEST(compiled_code, !=, NULL);
9045 #ifdef SUPPORT_PCRE2_8
9046   expectdata |= preg.re_pcre2_code != NULL;
9047 #endif
9048 
9049   if (extend_inputline(infile, buffer, expectdata? "data> " : "  re> ") == NULL)
9050     break;
9051   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
9052   fflush(outfile);
9053   p = buffer;
9054 
9055   /* If we have a pattern set up for testing, or we are skipping after a
9056   compile failure, a blank line terminates this test. */
9057 
9058   if (expectdata || skipping)
9059     {
9060     while (isspace(*p)) p++;
9061     if (*p == 0)
9062       {
9063 #ifdef SUPPORT_PCRE2_8
9064       if (preg.re_pcre2_code != NULL)
9065         {
9066         regfree(&preg);
9067         preg.re_pcre2_code = NULL;
9068         preg.re_match_data = NULL;
9069         }
9070 #endif  /* SUPPORT_PCRE2_8 */
9071       if (TEST(compiled_code, !=, NULL))
9072         {
9073         SUB1(pcre2_code_free, compiled_code);
9074         SET(compiled_code, NULL);
9075         }
9076       skipping = FALSE;
9077       setlocale(LC_CTYPE, "C");
9078       }
9079 
9080     /* Otherwise, if we are not skipping, and the line is not a data comment
9081     line starting with "\=", process a data line. */
9082 
9083     else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
9084       {
9085       rc = process_data();
9086       }
9087     }
9088 
9089   /* We do not have a pattern set up for testing. Lines starting with # are
9090   either comments or special commands. Blank lines are ignored. Otherwise, the
9091   line must start with a valid delimiter. It is then processed as a pattern
9092   line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
9093   valgrind, make the unused part of the buffer undefined, to catch overruns. */
9094 
9095   else if (*p == '#')
9096     {
9097     if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
9098     rc = process_command();
9099     }
9100 
9101   else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
9102     {
9103     rc = process_pattern();
9104     dfa_matched = 0;
9105     }
9106 
9107   else
9108     {
9109     while (isspace(*p)) p++;
9110     if (*p != 0)
9111       {
9112       fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
9113         *buffer);
9114       rc = PR_SKIP;
9115       }
9116     }
9117 
9118   if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
9119   else if (rc == PR_ABEND)
9120     {
9121     fprintf(outfile, "** pcre2test run abandoned\n");
9122     yield = 1;
9123     goto EXIT;
9124     }
9125   }
9126 
9127 /* Finish off a normal run. */
9128 
9129 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
9130 
9131 if (showtotaltimes)
9132   {
9133   const char *pad = "";
9134   fprintf(outfile, "--------------------------------------\n");
9135   if (timeit > 0)
9136     {
9137     fprintf(outfile, "Total compile time %.4f milliseconds\n",
9138       (((double)total_compile_time * 1000.0) / (double)timeit) /
9139         (double)CLOCKS_PER_SEC);
9140     if (total_jit_compile_time > 0)
9141       fprintf(outfile, "Total JIT compile  %.4f milliseconds\n",
9142         (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
9143           (double)CLOCKS_PER_SEC);
9144     pad = "  ";
9145     }
9146   fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
9147     (((double)total_match_time * 1000.0) / (double)timeitm) /
9148       (double)CLOCKS_PER_SEC);
9149   }
9150 
9151 
9152 EXIT:
9153 
9154 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9155 if (infile != NULL && INTERACTIVE(infile)) clear_history();
9156 #endif
9157 
9158 if (infile != NULL && infile != stdin) fclose(infile);
9159 if (outfile != NULL && outfile != stdout) fclose(outfile);
9160 
9161 free(buffer);
9162 free(dbuffer);
9163 free(pbuffer8);
9164 free(dfa_workspace);
9165 free((void *)locale_tables);
9166 free(tables3);
9167 PCRE2_MATCH_DATA_FREE(match_data);
9168 SUB1(pcre2_code_free, compiled_code);
9169 
9170 while(patstacknext-- > 0)
9171   {
9172   SET(compiled_code, patstack[patstacknext]);
9173   SUB1(pcre2_code_free, compiled_code);
9174   }
9175 
9176 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
9177 if (jit_stack != NULL)
9178   {
9179   PCRE2_JIT_STACK_FREE(jit_stack);
9180   }
9181 
9182 #define FREECONTEXTS \
9183   G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
9184   G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
9185   G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
9186   G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
9187   G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
9188   G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
9189   G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
9190   G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
9191 
9192 #ifdef SUPPORT_PCRE2_8
9193 #undef BITS
9194 #define BITS 8
9195 if (preg.re_pcre2_code != NULL) regfree(&preg);
9196 FREECONTEXTS;
9197 #endif
9198 
9199 #ifdef SUPPORT_PCRE2_16
9200 #undef BITS
9201 #define BITS 16
9202 free(pbuffer16);
9203 FREECONTEXTS;
9204 #endif
9205 
9206 #ifdef SUPPORT_PCRE2_32
9207 #undef BITS
9208 #define BITS 32
9209 free(pbuffer32);
9210 FREECONTEXTS;
9211 #endif
9212 
9213 #if defined(__VMS)
9214   yield = SS$_NORMAL;  /* Return values via DCL symbols */
9215 #endif
9216 
9217 return yield;
9218 }
9219 
9220 /* End of pcre2test.c */
9221