1 /*************************************************
2 *             PCRE2 testing program              *
3 *************************************************/
4 
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11 
12                        Written by Philip Hazel
13      Original code Copyright (c) 1997-2012 University of Cambridge
14     Rewritten code Copyright (c) 2016-2018 University of Cambridge
15 
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19 
20     * Redistributions of source code must retain the above copyright notice,
21       this list of conditions and the following disclaimer.
22 
23     * Redistributions in binary form must reproduce the above copyright
24       notice, this list of conditions and the following disclaimer in the
25       documentation and/or other materials provided with the distribution.
26 
27     * Neither the name of the University of Cambridge nor the names of its
28       contributors may be used to endorse or promote products derived from
29       this software without specific prior written permission.
30 
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44 
45 
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56 
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60 
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68 
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76 
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 
81 /* Debugging code enabler */
82 
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84 
85 /* Both libreadline and libedit are optionally supported. The user-supplied
86 original patch uses readline/readline.h for libedit, but in at least one system
87 it is installed as editline/readline.h, so the configuration code now looks for
88 that first, falling back to readline/readline.h. */
89 
90 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
91 #if defined(SUPPORT_LIBREADLINE)
92 #include <readline/readline.h>
93 #include <readline/history.h>
94 #else
95 #if defined(HAVE_EDITLINE_READLINE_H)
96 #include <editline/readline.h>
97 #else
98 #include <readline/readline.h>
99 #endif
100 #endif
101 #endif
102 
103 /* Put the test for interactive input into a macro so that it can be changed if
104 required for different environments. */
105 
106 #define INTERACTIVE(f) isatty(fileno(f))
107 
108 
109 /* ---------------------- System-specific definitions ---------------------- */
110 
111 /* A number of things vary for Windows builds. Originally, pcretest opened its
112 input and output without "b"; then I was told that "b" was needed in some
113 environments, so it was added for release 5.0 to both the input and output. (It
114 makes no difference on Unix-like systems.) Later I was told that it is wrong
115 for the input on Windows. I've now abstracted the modes into macros that are
116 set here, to make it easier to fiddle with them, and removed "b" from the input
117 mode under Windows. The BINARY versions are used when saving/restoring compiled
118 patterns. */
119 
120 #if defined(_WIN32) || defined(WIN32)
121 #include <io.h>                /* For _setmode() */
122 #include <fcntl.h>             /* For _O_BINARY */
123 #define INPUT_MODE          "r"
124 #define OUTPUT_MODE         "wb"
125 #define BINARY_INPUT_MODE   "rb"
126 #define BINARY_OUTPUT_MODE  "wb"
127 
128 #ifndef isatty
129 #define isatty _isatty         /* This is what Windows calls them, I'm told, */
130 #endif                         /* though in some environments they seem to   */
131                                /* be already defined, hence the #ifndefs.    */
132 #ifndef fileno
133 #define fileno _fileno
134 #endif
135 
136 /* A user sent this fix for Borland Builder 5 under Windows. */
137 
138 #ifdef __BORLANDC__
139 #define _setmode(handle, mode) setmode(handle, mode)
140 #endif
141 
142 /* Not Windows */
143 
144 #else
145 #include <sys/time.h>          /* These two includes are needed */
146 #include <sys/resource.h>      /* for setrlimit(). */
147 #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
148 #define INPUT_MODE   "r"
149 #define OUTPUT_MODE  "w"
150 #define BINARY_INPUT_MODE   "rb"
151 #define BINARY_OUTPUT_MODE  "wb"
152 #else
153 #define INPUT_MODE          "rb"
154 #define OUTPUT_MODE         "wb"
155 #define BINARY_INPUT_MODE   "rb"
156 #define BINARY_OUTPUT_MODE  "wb"
157 #endif
158 #endif
159 
160 #ifdef __VMS
161 #include <ssdef.h>
162 void vms_setsymbol( char *, char *, int );
163 #endif
164 
165 /* VC and older compilers don't support %td or %zu. */
166 
167 #if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L
168 #define PTR_FORM "lu"
169 #define SIZ_FORM "lu"
170 #define SIZ_CAST (unsigned long int)
171 #else
172 #define PTR_FORM "td"
173 #define SIZ_FORM "zu"
174 #define SIZ_CAST
175 #endif
176 
177 /* ------------------End of system-specific definitions -------------------- */
178 
179 /* Glueing macros that are used in several places below. */
180 
181 #define glue(a,b) a##b
182 #define G(a,b) glue(a,b)
183 
184 /* Miscellaneous parameters and manifests */
185 
186 #ifndef CLOCKS_PER_SEC
187 #ifdef CLK_TCK
188 #define CLOCKS_PER_SEC CLK_TCK
189 #else
190 #define CLOCKS_PER_SEC 100
191 #endif
192 #endif
193 
194 #define CFORE_UNSET UINT32_MAX    /* Unset value for startend/cfail/cerror fields */
195 #define CONVERT_UNSET UINT32_MAX  /* Unset value for convert_type field */
196 #define DFA_WS_DIMENSION 1000     /* Size of DFA workspace */
197 #define DEFAULT_OVECCOUNT 15      /* Default ovector count */
198 #define JUNK_OFFSET 0xdeadbeef    /* For initializing ovector */
199 #define LOCALESIZE 32             /* Size of locale name */
200 #define LOOPREPEAT 500000         /* Default loop count for timing */
201 #define MALLOCLISTSIZE 20         /* For remembering mallocs */
202 #define PARENS_NEST_DEFAULT 220   /* Default parentheses nest limit */
203 #define PATSTACKSIZE 20           /* Pattern stack for save/restore testing */
204 #define REPLACE_MODSIZE 100       /* Field for reading 8-bit replacement */
205 #define VERSION_SIZE 64           /* Size of buffer for the version strings */
206 
207 /* Make sure the buffer into which replacement strings are copied is big enough
208 to hold them as 32-bit code units. */
209 
210 #define REPLACE_BUFFSIZE 1024   /* This is a byte value */
211 
212 /* Execution modes */
213 
214 #define PCRE8_MODE   8
215 #define PCRE16_MODE 16
216 #define PCRE32_MODE 32
217 
218 /* Processing returns */
219 
220 enum { PR_OK, PR_SKIP, PR_ABEND };
221 
222 /* The macro PRINTABLE determines whether to print an output character as-is or
223 as a hex value when showing compiled patterns. is We use it in cases when the
224 locale has not been explicitly changed, so as to get consistent output from
225 systems that differ in their output from isprint() even in the "C" locale. */
226 
227 #ifdef EBCDIC
228 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
229 #else
230 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
231 #endif
232 
233 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
234 
235 /* We have to include some of the library source files because we need
236 to use some of the macros, internal structure definitions, and other internal
237 values - pcre2test has "inside information" compared to an application program
238 that strictly follows the PCRE2 API.
239 
240 Before including pcre2_internal.h we define PRIV so that it does not get
241 defined therein. This ensures that PRIV names in the included files do not
242 clash with those in the libraries. Also, although pcre2_internal.h does itself
243 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
244 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
245 for building the library. */
246 
247 #define PRIV(name) name
248 #define PCRE2_CODE_UNIT_WIDTH 0
249 #include "pcre2.h"
250 #include "pcre2posix.h"
251 #include "pcre2_internal.h"
252 
253 /* We need access to some of the data tables that PCRE2 uses. Defining
254 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
255 of PRIV avoids name clashes. */
256 
257 #define PCRE2_PCRE2TEST
258 #include "pcre2_tables.c"
259 #include "pcre2_ucd.c"
260 
261 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
262 check needed for overflow depends on whether long ints are in fact longer than
263 ints. They are defined not to be shorter. */
264 
265 #if ULONG_MAX > UINT32_MAX
266 #define U32OVERFLOW(x) (x > UINT32_MAX)
267 #else
268 #define U32OVERFLOW(x) (x == UINT32_MAX)
269 #endif
270 
271 #if LONG_MAX > INT32_MAX
272 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
273 #else
274 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
275 #endif
276 
277 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
278 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
279 defined. We can now include it for each supported code unit width. Because
280 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
281 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
282 while including these files, and then restore it to a no-op. Because LINK_SIZE
283 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
284 these inclusions should not be changed. */
285 
286 #undef PCRE2_SUFFIX
287 #undef PCRE2_CODE_UNIT_WIDTH
288 
289 #ifdef   SUPPORT_PCRE2_8
290 #define  PCRE2_CODE_UNIT_WIDTH 8
291 #define  PCRE2_SUFFIX(a) G(a,8)
292 #include "pcre2_intmodedep.h"
293 #include "pcre2_printint.c"
294 #undef   PCRE2_CODE_UNIT_WIDTH
295 #undef   PCRE2_SUFFIX
296 #endif   /* SUPPORT_PCRE2_8 */
297 
298 #ifdef   SUPPORT_PCRE2_16
299 #define  PCRE2_CODE_UNIT_WIDTH 16
300 #define  PCRE2_SUFFIX(a) G(a,16)
301 #include "pcre2_intmodedep.h"
302 #include "pcre2_printint.c"
303 #undef   PCRE2_CODE_UNIT_WIDTH
304 #undef   PCRE2_SUFFIX
305 #endif   /* SUPPORT_PCRE2_16 */
306 
307 #ifdef   SUPPORT_PCRE2_32
308 #define  PCRE2_CODE_UNIT_WIDTH 32
309 #define  PCRE2_SUFFIX(a) G(a,32)
310 #include "pcre2_intmodedep.h"
311 #include "pcre2_printint.c"
312 #undef   PCRE2_CODE_UNIT_WIDTH
313 #undef   PCRE2_SUFFIX
314 #endif   /* SUPPORT_PCRE2_32 */
315 
316 #define PCRE2_SUFFIX(a) a
317 
318 /* We need to be able to check input text for UTF-8 validity, whatever code
319 widths are actually available, because the input to pcre2test is always in
320 8-bit code units. So we include the UTF validity checking function for 8-bit
321 code units. */
322 
323 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
324 
325 #define  PCRE2_CODE_UNIT_WIDTH 8
326 #undef   PCRE2_SPTR
327 #define  PCRE2_SPTR PCRE2_SPTR8
328 #include "pcre2_valid_utf.c"
329 #undef   PCRE2_CODE_UNIT_WIDTH
330 #undef   PCRE2_SPTR
331 
332 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
333 support, it can be selected by a command-line option. If there is no 8-bit
334 support, there must be 16-bit or 32-bit support, so default to one of them. The
335 config function, JIT stack, contexts, and version string are the same in all
336 modes, so use the form of the first that is available. */
337 
338 #if defined SUPPORT_PCRE2_8
339 #define DEFAULT_TEST_MODE PCRE8_MODE
340 #define VERSION_TYPE PCRE2_UCHAR8
341 #define PCRE2_CONFIG pcre2_config_8
342 #define PCRE2_JIT_STACK pcre2_jit_stack_8
343 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
344 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
345 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
346 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
347 
348 #elif defined SUPPORT_PCRE2_16
349 #define DEFAULT_TEST_MODE PCRE16_MODE
350 #define VERSION_TYPE PCRE2_UCHAR16
351 #define PCRE2_CONFIG pcre2_config_16
352 #define PCRE2_JIT_STACK pcre2_jit_stack_16
353 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
354 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
355 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
356 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
357 
358 #elif defined SUPPORT_PCRE2_32
359 #define DEFAULT_TEST_MODE PCRE32_MODE
360 #define VERSION_TYPE PCRE2_UCHAR32
361 #define PCRE2_CONFIG pcre2_config_32
362 #define PCRE2_JIT_STACK pcre2_jit_stack_32
363 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
364 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
365 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
366 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
367 #endif
368 
369 /* ------------- Structure and table for handling #-commands ------------- */
370 
371 typedef struct cmdstruct {
372   const char *name;
373   int  value;
374 } cmdstruct;
375 
376 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
377   CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
378 
379 static cmdstruct cmdlist[] = {
380   { "forbid_utf",      CMD_FORBID_UTF },
381   { "load",            CMD_LOAD },
382   { "newline_default", CMD_NEWLINE_DEFAULT },
383   { "pattern",         CMD_PATTERN },
384   { "perltest",        CMD_PERLTEST },
385   { "pop",             CMD_POP },
386   { "popcopy",         CMD_POPCOPY },
387   { "save",            CMD_SAVE },
388   { "subject",         CMD_SUBJECT }};
389 
390 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
391 
392 /* ------------- Structures and tables for handling modifiers -------------- */
393 
394 /* Table of names for newline types. Must be kept in step with the definitions
395 of PCRE2_NEWLINE_xx in pcre2.h. */
396 
397 static const char *newlines[] = {
398   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
399 
400 /* Structure and table for handling pattern conversion types. */
401 
402 typedef struct convertstruct {
403   const char *name;
404   uint32_t option;
405 } convertstruct;
406 
407 static convertstruct convertlist[] = {
408   { "glob",                   PCRE2_CONVERT_GLOB },
409   { "glob_no_starstar",       PCRE2_CONVERT_GLOB_NO_STARSTAR },
410   { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
411   { "posix_basic",            PCRE2_CONVERT_POSIX_BASIC },
412   { "posix_extended",         PCRE2_CONVERT_POSIX_EXTENDED },
413   { "unset",                  CONVERT_UNSET }};
414 
415 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
416 
417 /* Modifier types and applicability */
418 
419 enum { MOD_CTC,    /* Applies to a compile context */
420        MOD_CTM,    /* Applies to a match context */
421        MOD_PAT,    /* Applies to a pattern */
422        MOD_PATP,   /* Ditto, OK for Perl test */
423        MOD_DAT,    /* Applies to a data line */
424        MOD_PD,     /* Applies to a pattern or a data line */
425        MOD_PDP,    /* As MOD_PD, OK for Perl test */
426        MOD_PND,    /* As MOD_PD, but not for a default pattern */
427        MOD_PNDP,   /* As MOD_PND, OK for Perl test */
428        MOD_CHR,    /* Is a single character */
429        MOD_CON,    /* Is a "convert" type/options list */
430        MOD_CTL,    /* Is a control bit */
431        MOD_BSR,    /* Is a BSR value */
432        MOD_IN2,    /* Is one or two unsigned integers */
433        MOD_INS,    /* Is a signed integer */
434        MOD_INT,    /* Is an unsigned integer */
435        MOD_IND,    /* Is an unsigned integer, but no value => default */
436        MOD_NL,     /* Is a newline value */
437        MOD_NN,     /* Is a number or a name; more than one may occur */
438        MOD_OPT,    /* Is an option bit */
439        MOD_SIZ,    /* Is a PCRE2_SIZE value */
440        MOD_STR };  /* Is a string */
441 
442 /* Control bits. Some apply to compiling, some to matching, but some can be set
443 either on a pattern or a data line, so they must all be distinct. There are now
444 so many of them that they are split into two fields. */
445 
446 #define CTL_AFTERTEXT                    0x00000001u
447 #define CTL_ALLAFTERTEXT                 0x00000002u
448 #define CTL_ALLCAPTURES                  0x00000004u
449 #define CTL_ALLUSEDTEXT                  0x00000008u
450 #define CTL_ALTGLOBAL                    0x00000010u
451 #define CTL_BINCODE                      0x00000020u
452 #define CTL_CALLOUT_CAPTURE              0x00000040u
453 #define CTL_CALLOUT_INFO                 0x00000080u
454 #define CTL_CALLOUT_NONE                 0x00000100u
455 #define CTL_DFA                          0x00000200u
456 #define CTL_EXPAND                       0x00000400u
457 #define CTL_FINDLIMITS                   0x00000800u
458 #define CTL_FRAMESIZE                    0x00001000u
459 #define CTL_FULLBINCODE                  0x00002000u
460 #define CTL_GETALL                       0x00004000u
461 #define CTL_GLOBAL                       0x00008000u
462 #define CTL_HEXPAT                       0x00010000u  /* Same word as USE_LENGTH */
463 #define CTL_INFO                         0x00020000u
464 #define CTL_JITFAST                      0x00040000u
465 #define CTL_JITVERIFY                    0x00080000u
466 #define CTL_MARK                         0x00100000u
467 #define CTL_MEMORY                       0x00200000u
468 #define CTL_NULLCONTEXT                  0x00400000u
469 #define CTL_POSIX                        0x00800000u
470 #define CTL_POSIX_NOSUB                  0x01000000u
471 #define CTL_PUSH                         0x02000000u  /* These three must be */
472 #define CTL_PUSHCOPY                     0x04000000u  /*   all in the same */
473 #define CTL_PUSHTABLESCOPY               0x08000000u  /*     word. */
474 #define CTL_STARTCHAR                    0x10000000u
475 #define CTL_USE_LENGTH                   0x20000000u  /* Same word as HEXPAT */
476 #define CTL_UTF8_INPUT                   0x40000000u
477 #define CTL_ZERO_TERMINATE               0x80000000u
478 
479 /* Combinations */
480 
481 #define CTL_DEBUG            (CTL_FULLBINCODE|CTL_INFO)  /* For setting */
482 #define CTL_ANYINFO          (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
483 #define CTL_ANYGLOB          (CTL_ALTGLOBAL|CTL_GLOBAL)
484 
485 /* Second control word */
486 
487 #define CTL2_SUBSTITUTE_EXTENDED         0x00000001u
488 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH  0x00000002u
489 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET    0x00000004u
490 #define CTL2_SUBSTITUTE_UNSET_EMPTY      0x00000008u
491 #define CTL2_SUBJECT_LITERAL             0x00000010u
492 #define CTL2_CALLOUT_NO_WHERE            0x00000020u
493 #define CTL2_CALLOUT_EXTRA               0x00000040u
494 
495 #define CTL2_NL_SET                      0x40000000u  /* Informational */
496 #define CTL2_BSR_SET                     0x80000000u  /* Informational */
497 
498 /* These are the matching controls that may be set either on a pattern or on a
499 data line. They are copied from the pattern controls as initial settings for
500 data line controls. Note that CTL_MEMORY is not included here, because it does
501 different things in the two cases. */
502 
503 #define CTL_ALLPD  (CTL_AFTERTEXT|\
504                     CTL_ALLAFTERTEXT|\
505                     CTL_ALLCAPTURES|\
506                     CTL_ALLUSEDTEXT|\
507                     CTL_ALTGLOBAL|\
508                     CTL_GLOBAL|\
509                     CTL_MARK|\
510                     CTL_STARTCHAR|\
511                     CTL_UTF8_INPUT)
512 
513 #define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\
514                     CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
515                     CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
516                     CTL2_SUBSTITUTE_UNSET_EMPTY)
517 
518 /* Structures for holding modifier information for patterns and subject strings
519 (data). Fields containing modifiers that can be set either for a pattern or a
520 subject must be at the start and in the same order in both cases so that the
521 same offset in the big table below works for both. */
522 
523 typedef struct patctl {    /* Structure for pattern modifiers. */
524   uint32_t  options;       /* Must be in same position as datctl */
525   uint32_t  control;       /* Must be in same position as datctl */
526   uint32_t  control2;      /* Must be in same position as datctl */
527   uint32_t  jitstack;      /* Must be in same position as datctl */
528    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
529   uint32_t  jit;
530   uint32_t  stackguard_test;
531   uint32_t  tables_id;
532   uint32_t  convert_type;
533   uint32_t  convert_length;
534   uint32_t  convert_glob_escape;
535   uint32_t  convert_glob_separator;
536   uint32_t  regerror_buffsize;
537    uint8_t  locale[LOCALESIZE];
538 } patctl;
539 
540 #define MAXCPYGET 10
541 #define LENCPYGET 64
542 
543 typedef struct datctl {    /* Structure for data line modifiers. */
544   uint32_t  options;       /* Must be in same position as patctl */
545   uint32_t  control;       /* Must be in same position as patctl */
546   uint32_t  control2;      /* Must be in same position as patctl */
547   uint32_t  jitstack;      /* Must be in same position as patctl */
548    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
549   uint32_t  startend[2];
550   uint32_t  cerror[2];
551   uint32_t  cfail[2];
552    int32_t  callout_data;
553    int32_t  copy_numbers[MAXCPYGET];
554    int32_t  get_numbers[MAXCPYGET];
555   uint32_t  oveccount;
556   uint32_t  offset;
557   uint8_t   copy_names[LENCPYGET];
558   uint8_t   get_names[LENCPYGET];
559 } datctl;
560 
561 /* Ids for which context to modify. */
562 
563 enum { CTX_PAT,            /* Active pattern context */
564        CTX_POPPAT,         /* Ditto, for a popped pattern */
565        CTX_DEFPAT,         /* Default pattern context */
566        CTX_DAT,            /* Active data (match) context */
567        CTX_DEFDAT };       /* Default data (match) context */
568 
569 /* Macros to simplify the big table below. */
570 
571 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
572 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
573 #define PO(name) offsetof(patctl, name)
574 #define PD(name) PO(name)
575 #define DO(name) offsetof(datctl, name)
576 
577 /* Table of all long-form modifiers. Must be in collating sequence of modifier
578 name because it is searched by binary chop. */
579 
580 typedef struct modstruct {
581   const char   *name;
582   uint16_t      which;
583   uint16_t      type;
584   uint32_t      value;
585   PCRE2_SIZE    offset;
586 } modstruct;
587 
588 static modstruct modlist[] = {
589   { "aftertext",                  MOD_PNDP, MOD_CTL, CTL_AFTERTEXT,              PO(control) },
590   { "allaftertext",               MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT,           PO(control) },
591   { "allcaptures",                MOD_PND,  MOD_CTL, CTL_ALLCAPTURES,            PO(control) },
592   { "allow_empty_class",          MOD_PAT,  MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS,    PO(options) },
593   { "allow_surrogate_escapes",    MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
594   { "allusedtext",                MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT,            PO(control) },
595   { "alt_bsux",                   MOD_PAT,  MOD_OPT, PCRE2_ALT_BSUX,             PO(options) },
596   { "alt_circumflex",             MOD_PAT,  MOD_OPT, PCRE2_ALT_CIRCUMFLEX,       PO(options) },
597   { "alt_verbnames",              MOD_PAT,  MOD_OPT, PCRE2_ALT_VERBNAMES,        PO(options) },
598   { "altglobal",                  MOD_PND,  MOD_CTL, CTL_ALTGLOBAL,              PO(control) },
599   { "anchored",                   MOD_PD,   MOD_OPT, PCRE2_ANCHORED,             PD(options) },
600   { "auto_callout",               MOD_PAT,  MOD_OPT, PCRE2_AUTO_CALLOUT,         PO(options) },
601   { "bad_escape_is_literal",      MOD_CTC,  MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
602   { "bincode",                    MOD_PAT,  MOD_CTL, CTL_BINCODE,                PO(control) },
603   { "bsr",                        MOD_CTC,  MOD_BSR, 0,                          CO(bsr_convention) },
604   { "callout_capture",            MOD_DAT,  MOD_CTL, CTL_CALLOUT_CAPTURE,        DO(control) },
605   { "callout_data",               MOD_DAT,  MOD_INS, 0,                          DO(callout_data) },
606   { "callout_error",              MOD_DAT,  MOD_IN2, 0,                          DO(cerror) },
607   { "callout_extra",              MOD_DAT,  MOD_CTL, CTL2_CALLOUT_EXTRA,         DO(control2) },
608   { "callout_fail",               MOD_DAT,  MOD_IN2, 0,                          DO(cfail) },
609   { "callout_info",               MOD_PAT,  MOD_CTL, CTL_CALLOUT_INFO,           PO(control) },
610   { "callout_no_where",           MOD_DAT,  MOD_CTL, CTL2_CALLOUT_NO_WHERE,      DO(control2) },
611   { "callout_none",               MOD_DAT,  MOD_CTL, CTL_CALLOUT_NONE,           DO(control) },
612   { "caseless",                   MOD_PATP, MOD_OPT, PCRE2_CASELESS,             PO(options) },
613   { "convert",                    MOD_PAT,  MOD_CON, 0,                          PO(convert_type) },
614   { "convert_glob_escape",        MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_escape) },
615   { "convert_glob_separator",     MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_separator) },
616   { "convert_length",             MOD_PAT,  MOD_INT, 0,                          PO(convert_length) },
617   { "copy",                       MOD_DAT,  MOD_NN,  DO(copy_numbers),           DO(copy_names) },
618   { "debug",                      MOD_PAT,  MOD_CTL, CTL_DEBUG,                  PO(control) },
619   { "depth_limit",                MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },
620   { "dfa",                        MOD_DAT,  MOD_CTL, CTL_DFA,                    DO(control) },
621   { "dfa_restart",                MOD_DAT,  MOD_OPT, PCRE2_DFA_RESTART,          DO(options) },
622   { "dfa_shortest",               MOD_DAT,  MOD_OPT, PCRE2_DFA_SHORTEST,         DO(options) },
623   { "dollar_endonly",             MOD_PAT,  MOD_OPT, PCRE2_DOLLAR_ENDONLY,       PO(options) },
624   { "dotall",                     MOD_PATP, MOD_OPT, PCRE2_DOTALL,               PO(options) },
625   { "dupnames",                   MOD_PATP, MOD_OPT, PCRE2_DUPNAMES,             PO(options) },
626   { "endanchored",                MOD_PD,   MOD_OPT, PCRE2_ENDANCHORED,          PD(options) },
627   { "expand",                     MOD_PAT,  MOD_CTL, CTL_EXPAND,                 PO(control) },
628   { "extended",                   MOD_PATP, MOD_OPT, PCRE2_EXTENDED,             PO(options) },
629   { "extended_more",              MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE,        PO(options) },
630   { "find_limits",                MOD_DAT,  MOD_CTL, CTL_FINDLIMITS,             DO(control) },
631   { "firstline",                  MOD_PAT,  MOD_OPT, PCRE2_FIRSTLINE,            PO(options) },
632   { "framesize",                  MOD_PAT,  MOD_CTL, CTL_FRAMESIZE,              PO(control) },
633   { "fullbincode",                MOD_PAT,  MOD_CTL, CTL_FULLBINCODE,            PO(control) },
634   { "get",                        MOD_DAT,  MOD_NN,  DO(get_numbers),            DO(get_names) },
635   { "getall",                     MOD_DAT,  MOD_CTL, CTL_GETALL,                 DO(control) },
636   { "global",                     MOD_PNDP, MOD_CTL, CTL_GLOBAL,                 PO(control) },
637   { "heap_limit",                 MOD_CTM,  MOD_INT, 0,                          MO(heap_limit) },
638   { "hex",                        MOD_PAT,  MOD_CTL, CTL_HEXPAT,                 PO(control) },
639   { "info",                       MOD_PAT,  MOD_CTL, CTL_INFO,                   PO(control) },
640   { "jit",                        MOD_PAT,  MOD_IND, 7,                          PO(jit) },
641   { "jitfast",                    MOD_PAT,  MOD_CTL, CTL_JITFAST,                PO(control) },
642   { "jitstack",                   MOD_PNDP, MOD_INT, 0,                          PO(jitstack) },
643   { "jitverify",                  MOD_PAT,  MOD_CTL, CTL_JITVERIFY,              PO(control) },
644   { "literal",                    MOD_PAT,  MOD_OPT, PCRE2_LITERAL,              PO(options) },
645   { "locale",                     MOD_PAT,  MOD_STR, LOCALESIZE,                 PO(locale) },
646   { "mark",                       MOD_PNDP, MOD_CTL, CTL_MARK,                   PO(control) },
647   { "match_limit",                MOD_CTM,  MOD_INT, 0,                          MO(match_limit) },
648   { "match_line",                 MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_LINE,     CO(extra_options) },
649   { "match_unset_backref",        MOD_PAT,  MOD_OPT, PCRE2_MATCH_UNSET_BACKREF,  PO(options) },
650   { "match_word",                 MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_WORD,     CO(extra_options) },
651   { "max_pattern_length",         MOD_CTC,  MOD_SIZ, 0,                          CO(max_pattern_length) },
652   { "memory",                     MOD_PD,   MOD_CTL, CTL_MEMORY,                 PD(control) },
653   { "multiline",                  MOD_PATP, MOD_OPT, PCRE2_MULTILINE,            PO(options) },
654   { "never_backslash_c",          MOD_PAT,  MOD_OPT, PCRE2_NEVER_BACKSLASH_C,    PO(options) },
655   { "never_ucp",                  MOD_PAT,  MOD_OPT, PCRE2_NEVER_UCP,            PO(options) },
656   { "never_utf",                  MOD_PAT,  MOD_OPT, PCRE2_NEVER_UTF,            PO(options) },
657   { "newline",                    MOD_CTC,  MOD_NL,  0,                          CO(newline_convention) },
658   { "no_auto_capture",            MOD_PAT,  MOD_OPT, PCRE2_NO_AUTO_CAPTURE,      PO(options) },
659   { "no_auto_possess",            MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS,      PO(options) },
660   { "no_dotstar_anchor",          MOD_PAT,  MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR,    PO(options) },
661   { "no_jit",                     MOD_DAT,  MOD_OPT, PCRE2_NO_JIT,               DO(options) },
662   { "no_start_optimize",          MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE,    PO(options) },
663   { "no_utf_check",               MOD_PD,   MOD_OPT, PCRE2_NO_UTF_CHECK,         PD(options) },
664   { "notbol",                     MOD_DAT,  MOD_OPT, PCRE2_NOTBOL,               DO(options) },
665   { "notempty",                   MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY,             DO(options) },
666   { "notempty_atstart",           MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY_ATSTART,     DO(options) },
667   { "noteol",                     MOD_DAT,  MOD_OPT, PCRE2_NOTEOL,               DO(options) },
668   { "null_context",               MOD_PD,   MOD_CTL, CTL_NULLCONTEXT,            PO(control) },
669   { "offset",                     MOD_DAT,  MOD_INT, 0,                          DO(offset) },
670   { "offset_limit",               MOD_CTM,  MOD_SIZ, 0,                          MO(offset_limit)},
671   { "ovector",                    MOD_DAT,  MOD_INT, 0,                          DO(oveccount) },
672   { "parens_nest_limit",          MOD_CTC,  MOD_INT, 0,                          CO(parens_nest_limit) },
673   { "partial_hard",               MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
674   { "partial_soft",               MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
675   { "ph",                         MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
676   { "posix",                      MOD_PAT,  MOD_CTL, CTL_POSIX,                  PO(control) },
677   { "posix_nosub",                MOD_PAT,  MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB,  PO(control) },
678   { "posix_startend",             MOD_DAT,  MOD_IN2, 0,                          DO(startend) },
679   { "ps",                         MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
680   { "push",                       MOD_PAT,  MOD_CTL, CTL_PUSH,                   PO(control) },
681   { "pushcopy",                   MOD_PAT,  MOD_CTL, CTL_PUSHCOPY,               PO(control) },
682   { "pushtablescopy",             MOD_PAT,  MOD_CTL, CTL_PUSHTABLESCOPY,         PO(control) },
683   { "recursion_limit",            MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },  /* Obsolete synonym */
684   { "regerror_buffsize",          MOD_PAT,  MOD_INT, 0,                          PO(regerror_buffsize) },
685   { "replace",                    MOD_PND,  MOD_STR, REPLACE_MODSIZE,            PO(replacement) },
686   { "stackguard",                 MOD_PAT,  MOD_INT, 0,                          PO(stackguard_test) },
687   { "startchar",                  MOD_PND,  MOD_CTL, CTL_STARTCHAR,              PO(control) },
688   { "startoffset",                MOD_DAT,  MOD_INT, 0,                          DO(offset) },
689   { "subject_literal",            MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL,       PO(control2) },
690   { "substitute_extended",        MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_EXTENDED,   PO(control2) },
691   { "substitute_overflow_length", MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
692   { "substitute_unknown_unset",   MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
693   { "substitute_unset_empty",     MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
694   { "tables",                     MOD_PAT,  MOD_INT, 0,                          PO(tables_id) },
695   { "ucp",                        MOD_PATP, MOD_OPT, PCRE2_UCP,                  PO(options) },
696   { "ungreedy",                   MOD_PAT,  MOD_OPT, PCRE2_UNGREEDY,             PO(options) },
697   { "use_length",                 MOD_PAT,  MOD_CTL, CTL_USE_LENGTH,             PO(control) },
698   { "use_offset_limit",           MOD_PAT,  MOD_OPT, PCRE2_USE_OFFSET_LIMIT,     PO(options) },
699   { "utf",                        MOD_PATP, MOD_OPT, PCRE2_UTF,                  PO(options) },
700   { "utf8_input",                 MOD_PAT,  MOD_CTL, CTL_UTF8_INPUT,             PO(control) },
701   { "zero_terminate",             MOD_DAT,  MOD_CTL, CTL_ZERO_TERMINATE,         DO(control) }
702 };
703 
704 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
705 
706 /* Controls and options that are supported for use with the POSIX interface. */
707 
708 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
709   PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
710   PCRE2_UTF|PCRE2_UNGREEDY)
711 
712 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
713 
714 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
715   CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
716   CTL_POSIX_NOSUB|CTL_USE_LENGTH)
717 
718 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
719 
720 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
721   PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
722 
723 #define POSIX_SUPPORTED_MATCH_CONTROLS  (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
724 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
725 
726 /* Control bits that are not ignored with 'push'. */
727 
728 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
729   CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
730   CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
731   CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
732 
733 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET)
734 
735 /* Controls that apply only at compile time with 'push'. */
736 
737 #define PUSH_COMPILE_ONLY_CONTROLS   CTL_JITVERIFY
738 #define PUSH_COMPILE_ONLY_CONTROLS2  (0)
739 
740 /* Controls that are forbidden with #pop or #popcopy. */
741 
742 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
743   CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
744 
745 /* Pattern controls that are mutually exclusive. At present these are all in
746 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
747 CTL_POSIX, so it doesn't need its own entries. */
748 
749 static uint32_t exclusive_pat_controls[] = {
750   CTL_POSIX    | CTL_PUSH,
751   CTL_POSIX    | CTL_PUSHCOPY,
752   CTL_POSIX    | CTL_PUSHTABLESCOPY,
753   CTL_PUSH     | CTL_PUSHCOPY,
754   CTL_PUSH     | CTL_PUSHTABLESCOPY,
755   CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
756   CTL_EXPAND   | CTL_HEXPAT };
757 
758 /* Data controls that are mutually exclusive. At present these are all in the
759 first control word. */
760 
761 static uint32_t exclusive_dat_controls[] = {
762   CTL_ALLUSEDTEXT | CTL_STARTCHAR,
763   CTL_FINDLIMITS  | CTL_NULLCONTEXT };
764 
765 /* Table of single-character abbreviated modifiers. The index field is
766 initialized to -1, but the first time the modifier is encountered, it is filled
767 in with the index of the full entry in modlist, to save repeated searching when
768 processing multiple test items. This short list is searched serially, so its
769 order does not matter. */
770 
771 typedef struct c1modstruct {
772   const char *fullname;
773   uint32_t    onechar;
774   int         index;
775 } c1modstruct;
776 
777 static c1modstruct c1modlist[] = {
778   { "bincode",         'B',           -1 },
779   { "info",            'I',           -1 },
780   { "global",          'g',           -1 },
781   { "caseless",        'i',           -1 },
782   { "multiline",       'm',           -1 },
783   { "no_auto_capture", 'n',           -1 },
784   { "dotall",          's',           -1 },
785   { "extended",        'x',           -1 }
786 };
787 
788 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
789 
790 /* Table of arguments for the -C command line option. Use macros to make the
791 table itself easier to read. */
792 
793 #if defined SUPPORT_PCRE2_8
794 #define SUPPORT_8 1
795 #endif
796 #if defined SUPPORT_PCRE2_16
797 #define SUPPORT_16 1
798 #endif
799 #if defined SUPPORT_PCRE2_32
800 #define SUPPORT_32 1
801 #endif
802 
803 #ifndef SUPPORT_8
804 #define SUPPORT_8 0
805 #endif
806 #ifndef SUPPORT_16
807 #define SUPPORT_16 0
808 #endif
809 #ifndef SUPPORT_32
810 #define SUPPORT_32 0
811 #endif
812 
813 #ifdef EBCDIC
814 #define SUPPORT_EBCDIC 1
815 #define EBCDIC_NL CHAR_LF
816 #else
817 #define SUPPORT_EBCDIC 0
818 #define EBCDIC_NL 0
819 #endif
820 
821 #ifdef NEVER_BACKSLASH_C
822 #define BACKSLASH_C 0
823 #else
824 #define BACKSLASH_C 1
825 #endif
826 
827 typedef struct coptstruct {
828   const char *name;
829   uint32_t    type;
830   uint32_t    value;
831 } coptstruct;
832 
833 enum { CONF_BSR,
834        CONF_FIX,
835        CONF_FIZ,
836        CONF_INT,
837        CONF_NL
838 };
839 
840 static coptstruct coptlist[] = {
841   { "backslash-C", CONF_FIX, BACKSLASH_C },
842   { "bsr",         CONF_BSR, PCRE2_CONFIG_BSR },
843   { "ebcdic",      CONF_FIX, SUPPORT_EBCDIC },
844   { "ebcdic-nl",   CONF_FIZ, EBCDIC_NL },
845   { "jit",         CONF_INT, PCRE2_CONFIG_JIT },
846   { "linksize",    CONF_INT, PCRE2_CONFIG_LINKSIZE },
847   { "newline",     CONF_NL,  PCRE2_CONFIG_NEWLINE },
848   { "pcre2-16",    CONF_FIX, SUPPORT_16 },
849   { "pcre2-32",    CONF_FIX, SUPPORT_32 },
850   { "pcre2-8",     CONF_FIX, SUPPORT_8 },
851   { "unicode",     CONF_INT, PCRE2_CONFIG_UNICODE }
852 };
853 
854 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
855 
856 #undef SUPPORT_8
857 #undef SUPPORT_16
858 #undef SUPPORT_32
859 #undef SUPPORT_EBCDIC
860 
861 
862 /* ----------------------- Static variables ------------------------ */
863 
864 static FILE *infile;
865 static FILE *outfile;
866 
867 static const void *last_callout_mark;
868 static PCRE2_JIT_STACK *jit_stack = NULL;
869 static size_t jit_stack_size = 0;
870 
871 static BOOL first_callout;
872 static BOOL jit_was_used;
873 static BOOL restrict_for_perl_test = FALSE;
874 static BOOL show_memory = FALSE;
875 
876 static int code_unit_size;                    /* Bytes */
877 static int jitrc;                             /* Return from JIT compile */
878 static int test_mode = DEFAULT_TEST_MODE;
879 static int timeit = 0;
880 static int timeitm = 0;
881 
882 clock_t total_compile_time = 0;
883 clock_t total_jit_compile_time = 0;
884 clock_t total_match_time = 0;
885 
886 static uint32_t dfa_matched;
887 static uint32_t forbid_utf = 0;
888 static uint32_t maxlookbehind;
889 static uint32_t max_oveccount;
890 static uint32_t callout_count;
891 
892 static uint16_t local_newline_default = 0;
893 
894 static VERSION_TYPE jittarget[VERSION_SIZE];
895 static VERSION_TYPE version[VERSION_SIZE];
896 static VERSION_TYPE uversion[VERSION_SIZE];
897 
898 static patctl def_patctl;
899 static patctl pat_patctl;
900 static datctl def_datctl;
901 static datctl dat_datctl;
902 
903 static void *patstack[PATSTACKSIZE];
904 static int patstacknext = 0;
905 
906 static void *malloclist[MALLOCLISTSIZE];
907 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
908 static uint32_t malloclistptr = 0;
909 
910 #ifdef SUPPORT_PCRE2_8
911 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
912 #endif
913 
914 static int *dfa_workspace = NULL;
915 static const uint8_t *locale_tables = NULL;
916 static const uint8_t *use_tables = NULL;
917 static uint8_t locale_name[32];
918 
919 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
920 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
921 buffer is where all input lines are read. Its size is the same as pbuffer8.
922 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
923 are actually compiled from pbuffer16 or pbuffer32. */
924 
925 static size_t    pbuffer8_size  = 50000;        /* Initial size, bytes */
926 static uint8_t  *pbuffer8 = NULL;
927 static uint8_t  *buffer = NULL;
928 
929 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
930 is cast as needed. For long data lines it grows as necessary. */
931 
932 static size_t dbuffer_size = 1u << 14;    /* Initial size, bytes */
933 static uint8_t *dbuffer = NULL;
934 
935 
936 /* ---------------- Mode-dependent variables -------------------*/
937 
938 #ifdef SUPPORT_PCRE2_8
939 static pcre2_code_8             *compiled_code8;
940 static pcre2_general_context_8  *general_context8, *general_context_copy8;
941 static pcre2_compile_context_8  *pat_context8, *default_pat_context8;
942 static pcre2_convert_context_8  *con_context8, *default_con_context8;
943 static pcre2_match_context_8    *dat_context8, *default_dat_context8;
944 static pcre2_match_data_8       *match_data8;
945 #endif
946 
947 #ifdef SUPPORT_PCRE2_16
948 static pcre2_code_16            *compiled_code16;
949 static pcre2_general_context_16 *general_context16, *general_context_copy16;
950 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
951 static pcre2_convert_context_16 *con_context16, *default_con_context16;
952 static pcre2_match_context_16   *dat_context16, *default_dat_context16;
953 static pcre2_match_data_16      *match_data16;
954 static PCRE2_SIZE pbuffer16_size = 0;   /* Set only when needed */
955 static uint16_t *pbuffer16 = NULL;
956 #endif
957 
958 #ifdef SUPPORT_PCRE2_32
959 static pcre2_code_32            *compiled_code32;
960 static pcre2_general_context_32 *general_context32, *general_context_copy32;
961 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
962 static pcre2_convert_context_32 *con_context32, *default_con_context32;
963 static pcre2_match_context_32   *dat_context32, *default_dat_context32;
964 static pcre2_match_data_32      *match_data32;
965 static PCRE2_SIZE pbuffer32_size = 0;   /* Set only when needed */
966 static uint32_t *pbuffer32 = NULL;
967 #endif
968 
969 
970 /* ---------------- Macros that work in all modes ----------------- */
971 
972 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
973 #define SET(x,y) SETOP(x,y,=)
974 #define SETPLUS(x,y) SETOP(x,y,+=)
975 #define strlen8(x) strlen((char *)x)
976 
977 
978 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
979 
980 /* Define macros for variables and functions that must be selected dynamically
981 depending on the mode setting (8, 16, 32). These are dependent on which modes
982 are supported. */
983 
984 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
985      defined (SUPPORT_PCRE2_32)) >= 2
986 
987 /* ----- All three modes supported ----- */
988 
989 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
990 
991 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
992   (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
993 
994 #define CASTVAR(t,x) ( \
995   (test_mode == PCRE8_MODE)? (t)G(x,8) : \
996   (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
997 
998 #define CODE_UNIT(a,b) ( \
999   (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1000   (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1001   (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1002 
1003 #define CONCTXCPY(a,b) \
1004   if (test_mode == PCRE8_MODE) \
1005     memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1006   else if (test_mode == PCRE16_MODE) \
1007     memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1008   else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1009 
1010 #define CONVERT_COPY(a,b,c) \
1011   if (test_mode == PCRE8_MODE) \
1012     memcpy(G(a,8),(char *)b,c); \
1013   else if (test_mode == PCRE16_MODE) \
1014     memcpy(G(a,16),(char *)b,(c)*2); \
1015   else if (test_mode == PCRE32_MODE) \
1016     memcpy(G(a,32),(char *)b,(c)*4)
1017 
1018 #define DATCTXCPY(a,b) \
1019   if (test_mode == PCRE8_MODE) \
1020     memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1021   else if (test_mode == PCRE16_MODE) \
1022     memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1023   else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1024 
1025 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1026   (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1027 
1028 #define PATCTXCPY(a,b) \
1029   if (test_mode == PCRE8_MODE) \
1030     memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1031   else if (test_mode == PCRE16_MODE) \
1032     memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1033   else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1034 
1035 #define PCHARS(lv, p, offset, len, utf, f) \
1036   if (test_mode == PCRE32_MODE) \
1037     lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1038   else if (test_mode == PCRE16_MODE) \
1039     lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1040   else \
1041     lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1042 
1043 #define PCHARSV(p, offset, len, utf, f) \
1044   if (test_mode == PCRE32_MODE) \
1045     (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1046   else if (test_mode == PCRE16_MODE) \
1047     (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1048   else \
1049     (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1050 
1051 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1052   if (test_mode == PCRE8_MODE) \
1053      a = pcre2_callout_enumerate_8(compiled_code8, \
1054        (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1055   else if (test_mode == PCRE16_MODE) \
1056      a = pcre2_callout_enumerate_16(compiled_code16, \
1057        (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1058   else \
1059      a = pcre2_callout_enumerate_32(compiled_code32, \
1060        (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1061 
1062 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1063   if (test_mode == PCRE8_MODE) \
1064     G(a,8) = pcre2_code_copy_8(b); \
1065   else if (test_mode == PCRE16_MODE) \
1066     G(a,16) = pcre2_code_copy_16(b); \
1067   else \
1068     G(a,32) = pcre2_code_copy_32(b)
1069 
1070 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1071   if (test_mode == PCRE8_MODE) \
1072     a = (void *)pcre2_code_copy_8(G(b,8)); \
1073   else if (test_mode == PCRE16_MODE) \
1074     a = (void *)pcre2_code_copy_16(G(b,16)); \
1075   else \
1076     a = (void *)pcre2_code_copy_32(G(b,32))
1077 
1078 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1079   if (test_mode == PCRE8_MODE) \
1080     a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1081   else if (test_mode == PCRE16_MODE) \
1082     a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1083   else \
1084     a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1085 
1086 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1087   if (test_mode == PCRE8_MODE) \
1088     G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1089   else if (test_mode == PCRE16_MODE) \
1090     G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1091   else \
1092     G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1093 
1094 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1095   if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1096   else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1097   else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1098 
1099 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1100   if (test_mode == PCRE8_MODE) \
1101     a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1102   else if (test_mode == PCRE16_MODE) \
1103     a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1104   else \
1105     a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1106 
1107 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1108   if (test_mode == PCRE8_MODE) \
1109     r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1110   else if (test_mode == PCRE16_MODE) \
1111     r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1112   else \
1113     r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1114 
1115 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1116   if (test_mode == PCRE8_MODE) \
1117     a = pcre2_get_ovector_count_8(G(b,8)); \
1118   else if (test_mode == PCRE16_MODE) \
1119     a = pcre2_get_ovector_count_16(G(b,16)); \
1120   else \
1121     a = pcre2_get_ovector_count_32(G(b,32))
1122 
1123 #define PCRE2_GET_STARTCHAR(a,b) \
1124   if (test_mode == PCRE8_MODE) \
1125     a = pcre2_get_startchar_8(G(b,8)); \
1126   else if (test_mode == PCRE16_MODE) \
1127     a = pcre2_get_startchar_16(G(b,16)); \
1128   else \
1129     a = pcre2_get_startchar_32(G(b,32))
1130 
1131 #define PCRE2_JIT_COMPILE(r,a,b) \
1132   if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1133   else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1134   else r = pcre2_jit_compile_32(G(a,32),b)
1135 
1136 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1137   if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1138   else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1139   else pcre2_jit_free_unused_memory_32(G(a,32))
1140 
1141 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1142   if (test_mode == PCRE8_MODE) \
1143     a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1144   else if (test_mode == PCRE16_MODE) \
1145     a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1146   else \
1147     a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1148 
1149 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1150   if (test_mode == PCRE8_MODE) \
1151     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1152   else if (test_mode == PCRE16_MODE) \
1153     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1154   else \
1155     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1156 
1157 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1158   if (test_mode == PCRE8_MODE) \
1159     pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1160   else if (test_mode == PCRE16_MODE) \
1161     pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1162   else \
1163     pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1164 
1165 #define PCRE2_JIT_STACK_FREE(a) \
1166   if (test_mode == PCRE8_MODE) \
1167     pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1168   else if (test_mode == PCRE16_MODE) \
1169     pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1170   else \
1171     pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1172 
1173 #define PCRE2_MAKETABLES(a) \
1174   if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1175   else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1176   else a = pcre2_maketables_32(NULL)
1177 
1178 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1179   if (test_mode == PCRE8_MODE) \
1180     a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1181   else if (test_mode == PCRE16_MODE) \
1182     a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1183   else \
1184     a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1185 
1186 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1187   if (test_mode == PCRE8_MODE) \
1188     G(a,8) = pcre2_match_data_create_8(b,c); \
1189   else if (test_mode == PCRE16_MODE) \
1190     G(a,16) = pcre2_match_data_create_16(b,c); \
1191   else \
1192     G(a,32) = pcre2_match_data_create_32(b,c)
1193 
1194 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1195   if (test_mode == PCRE8_MODE) \
1196     G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1197   else if (test_mode == PCRE16_MODE) \
1198     G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1199   else \
1200     G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1201 
1202 #define PCRE2_MATCH_DATA_FREE(a) \
1203   if (test_mode == PCRE8_MODE) \
1204     pcre2_match_data_free_8(G(a,8)); \
1205   else if (test_mode == PCRE16_MODE) \
1206     pcre2_match_data_free_16(G(a,16)); \
1207   else \
1208     pcre2_match_data_free_32(G(a,32))
1209 
1210 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1211   if (test_mode == PCRE8_MODE) \
1212     a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1213   else if (test_mode == PCRE16_MODE) \
1214     a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1215   else \
1216     a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1217 
1218 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1219   if (test_mode == PCRE8_MODE) \
1220     a = pcre2_pattern_info_8(G(b,8),c,d); \
1221   else if (test_mode == PCRE16_MODE) \
1222     a = pcre2_pattern_info_16(G(b,16),c,d); \
1223   else \
1224     a = pcre2_pattern_info_32(G(b,32),c,d)
1225 
1226 #define PCRE2_PRINTINT(a) \
1227   if (test_mode == PCRE8_MODE) \
1228     pcre2_printint_8(compiled_code8,outfile,a); \
1229   else if (test_mode == PCRE16_MODE) \
1230     pcre2_printint_16(compiled_code16,outfile,a); \
1231   else \
1232     pcre2_printint_32(compiled_code32,outfile,a)
1233 
1234 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1235   if (test_mode == PCRE8_MODE) \
1236     r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1237   else if (test_mode == PCRE16_MODE) \
1238     r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1239   else \
1240     r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1241 
1242 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1243   if (test_mode == PCRE8_MODE) \
1244     r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1245   else if (test_mode == PCRE16_MODE) \
1246     r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1247   else \
1248     r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1249 
1250 #define PCRE2_SERIALIZE_FREE(a) \
1251   if (test_mode == PCRE8_MODE) \
1252     pcre2_serialize_free_8(a); \
1253   else if (test_mode == PCRE16_MODE) \
1254     pcre2_serialize_free_16(a); \
1255   else \
1256     pcre2_serialize_free_32(a)
1257 
1258 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1259   if (test_mode == PCRE8_MODE) \
1260     r = pcre2_serialize_get_number_of_codes_8(a); \
1261   else if (test_mode == PCRE16_MODE) \
1262     r = pcre2_serialize_get_number_of_codes_16(a); \
1263   else \
1264     r = pcre2_serialize_get_number_of_codes_32(a); \
1265 
1266 #define PCRE2_SET_CALLOUT(a,b,c) \
1267   if (test_mode == PCRE8_MODE) \
1268     pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1269   else if (test_mode == PCRE16_MODE) \
1270     pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1271   else \
1272     pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1273 
1274 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1275   if (test_mode == PCRE8_MODE) \
1276     pcre2_set_character_tables_8(G(a,8),b); \
1277   else if (test_mode == PCRE16_MODE) \
1278     pcre2_set_character_tables_16(G(a,16),b); \
1279   else \
1280     pcre2_set_character_tables_32(G(a,32),b)
1281 
1282 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1283   if (test_mode == PCRE8_MODE) \
1284     pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1285   else if (test_mode == PCRE16_MODE) \
1286     pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1287   else \
1288     pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1289 
1290 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1291   if (test_mode == PCRE8_MODE) \
1292     pcre2_set_depth_limit_8(G(a,8),b); \
1293   else if (test_mode == PCRE16_MODE) \
1294     pcre2_set_depth_limit_16(G(a,16),b); \
1295   else \
1296     pcre2_set_depth_limit_32(G(a,32),b)
1297 
1298 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1299   if (test_mode == PCRE8_MODE) \
1300     r = pcre2_set_glob_separator_8(G(a,8),b); \
1301   else if (test_mode == PCRE16_MODE) \
1302     r = pcre2_set_glob_separator_16(G(a,16),b); \
1303   else \
1304     r = pcre2_set_glob_separator_32(G(a,32),b)
1305 
1306 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1307   if (test_mode == PCRE8_MODE) \
1308     r = pcre2_set_glob_escape_8(G(a,8),b); \
1309   else if (test_mode == PCRE16_MODE) \
1310     r = pcre2_set_glob_escape_16(G(a,16),b); \
1311   else \
1312     r = pcre2_set_glob_escape_32(G(a,32),b)
1313 
1314 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1315   if (test_mode == PCRE8_MODE) \
1316     pcre2_set_heap_limit_8(G(a,8),b); \
1317   else if (test_mode == PCRE16_MODE) \
1318     pcre2_set_heap_limit_16(G(a,16),b); \
1319   else \
1320     pcre2_set_heap_limit_32(G(a,32),b)
1321 
1322 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1323   if (test_mode == PCRE8_MODE) \
1324     pcre2_set_match_limit_8(G(a,8),b); \
1325   else if (test_mode == PCRE16_MODE) \
1326     pcre2_set_match_limit_16(G(a,16),b); \
1327   else \
1328     pcre2_set_match_limit_32(G(a,32),b)
1329 
1330 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1331   if (test_mode == PCRE8_MODE) \
1332     pcre2_set_max_pattern_length_8(G(a,8),b); \
1333   else if (test_mode == PCRE16_MODE) \
1334     pcre2_set_max_pattern_length_16(G(a,16),b); \
1335   else \
1336     pcre2_set_max_pattern_length_32(G(a,32),b)
1337 
1338 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1339   if (test_mode == PCRE8_MODE) \
1340     pcre2_set_offset_limit_8(G(a,8),b); \
1341   else if (test_mode == PCRE16_MODE) \
1342     pcre2_set_offset_limit_16(G(a,16),b); \
1343   else \
1344     pcre2_set_offset_limit_32(G(a,32),b)
1345 
1346 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1347   if (test_mode == PCRE8_MODE) \
1348     pcre2_set_parens_nest_limit_8(G(a,8),b); \
1349   else if (test_mode == PCRE16_MODE) \
1350     pcre2_set_parens_nest_limit_16(G(a,16),b); \
1351   else \
1352     pcre2_set_parens_nest_limit_32(G(a,32),b)
1353 
1354 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1355   if (test_mode == PCRE8_MODE) \
1356     a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
1357       (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1358   else if (test_mode == PCRE16_MODE) \
1359     a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
1360       (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1361   else \
1362     a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
1363       (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1364 
1365 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1366   if (test_mode == PCRE8_MODE) \
1367     a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1368   else if (test_mode == PCRE16_MODE) \
1369     a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1370   else \
1371     a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1372 
1373 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1374   if (test_mode == PCRE8_MODE) \
1375     a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1376   else if (test_mode == PCRE16_MODE) \
1377     a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1378   else \
1379     a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1380 
1381 #define PCRE2_SUBSTRING_FREE(a) \
1382   if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1383   else if (test_mode == PCRE16_MODE) \
1384     pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1385   else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1386 
1387 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1388   if (test_mode == PCRE8_MODE) \
1389     a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1390   else if (test_mode == PCRE16_MODE) \
1391     a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1392   else \
1393     a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1394 
1395 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1396   if (test_mode == PCRE8_MODE) \
1397     a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1398   else if (test_mode == PCRE16_MODE) \
1399     a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1400   else \
1401     a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1402 
1403 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1404   if (test_mode == PCRE8_MODE) \
1405     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1406   else if (test_mode == PCRE16_MODE) \
1407     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1408   else \
1409     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1410 
1411 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1412   if (test_mode == PCRE8_MODE) \
1413     a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1414   else if (test_mode == PCRE16_MODE) \
1415     a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1416   else \
1417     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1418 
1419 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1420   if (test_mode == PCRE8_MODE) \
1421     a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1422   else if (test_mode == PCRE16_MODE) \
1423     a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1424   else \
1425     a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1426 
1427 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1428   if (test_mode == PCRE8_MODE) \
1429     pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1430   else if (test_mode == PCRE16_MODE) \
1431     pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1432   else \
1433     pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1434 
1435 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1436   if (test_mode == PCRE8_MODE) \
1437     a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1438   else if (test_mode == PCRE16_MODE) \
1439     a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1440   else \
1441     a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1442 
1443 #define PTR(x) ( \
1444   (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1445   (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1446   (void *)G(x,32))
1447 
1448 #define SETFLD(x,y,z) \
1449   if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1450   else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1451   else G(x,32)->y = z
1452 
1453 #define SETFLDVEC(x,y,v,z) \
1454   if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1455   else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1456   else G(x,32)->y[v] = z
1457 
1458 #define SETOP(x,y,z) \
1459   if (test_mode == PCRE8_MODE) G(x,8) z y; \
1460   else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1461   else G(x,32) z y
1462 
1463 #define SETCASTPTR(x,y) \
1464   if (test_mode == PCRE8_MODE) \
1465     G(x,8) = (uint8_t *)(y); \
1466   else if (test_mode == PCRE16_MODE) \
1467     G(x,16) = (uint16_t *)(y); \
1468   else \
1469     G(x,32) = (uint32_t *)(y)
1470 
1471 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1472   (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1473   ((int)strlen32((PCRE2_SPTR32)p)))
1474 
1475 #define SUB1(a,b) \
1476   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1477   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1478   else G(a,32)(G(b,32))
1479 
1480 #define SUB2(a,b,c) \
1481   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1482   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1483   else G(a,32)(G(b,32),G(c,32))
1484 
1485 #define TEST(x,r,y) ( \
1486   (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1487   (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1488   (test_mode == PCRE32_MODE && G(x,32) r (y)))
1489 
1490 #define TESTFLD(x,f,r,y) ( \
1491   (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1492   (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1493   (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1494 
1495 
1496 /* ----- Two out of three modes are supported ----- */
1497 
1498 #else
1499 
1500 /* We can use some macro trickery to make a single set of definitions work in
1501 the three different cases. */
1502 
1503 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1504 
1505 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1506 #define BITONE 32
1507 #define BITTWO 16
1508 
1509 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1510 
1511 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1512 #define BITONE 32
1513 #define BITTWO 8
1514 
1515 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1516 
1517 #else
1518 #define BITONE 16
1519 #define BITTWO 8
1520 #endif
1521 
1522 
1523 /* ----- Common macros for two-mode cases ----- */
1524 
1525 #define BYTEONE (BITONE/8)
1526 #define BYTETWO (BITTWO/8)
1527 
1528 #define CASTFLD(t,a,b) \
1529   ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1530     (t)(G(a,BITTWO)->b))
1531 
1532 #define CASTVAR(t,x) ( \
1533   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1534     (t)G(x,BITONE) : (t)G(x,BITTWO))
1535 
1536 #define CODE_UNIT(a,b) ( \
1537   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1538   (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1539   (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1540 
1541 #define CONCTXCPY(a,b) \
1542   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1543     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1544   else \
1545     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1546 
1547 #define CONVERT_COPY(a,b,c) \
1548   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1549   memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1550   memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1551 
1552 #define DATCTXCPY(a,b) \
1553   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1554     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1555   else \
1556     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1557 
1558 #define FLD(a,b) \
1559   ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1560 
1561 #define PATCTXCPY(a,b) \
1562   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1563     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1564   else \
1565     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1566 
1567 #define PCHARS(lv, p, offset, len, utf, f) \
1568   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1569     lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1570   else \
1571     lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1572 
1573 #define PCHARSV(p, offset, len, utf, f) \
1574   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1575     (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1576   else \
1577     (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1578 
1579 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1580   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1581      a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1582        (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1583   else \
1584      a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1585        (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1586 
1587 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1588   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1589     G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1590   else \
1591     G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1592 
1593 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1594   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1595     a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1596   else \
1597     a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1598 
1599 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1600   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1601     a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1602   else \
1603     a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1604 
1605 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1606   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1607     G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1608   else \
1609     G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1610 
1611 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1612   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1613     G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1614   else \
1615     G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1616 
1617 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1618   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1619     a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1620       G(g,BITONE),h,i,j); \
1621   else \
1622     a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1623       G(g,BITTWO),h,i,j)
1624 
1625 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1626   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1627     r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1628   else \
1629     r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1630 
1631 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1632   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1633     a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1634   else \
1635     a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1636 
1637 #define PCRE2_GET_STARTCHAR(a,b) \
1638   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1639     a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1640   else \
1641     a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1642 
1643 #define PCRE2_JIT_COMPILE(r,a,b) \
1644   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1645     r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1646   else \
1647     r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1648 
1649 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1650   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1651     G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1652   else \
1653     G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1654 
1655 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1656   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1657     a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1658       G(g,BITONE),h); \
1659   else \
1660     a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1661       G(g,BITTWO),h)
1662 
1663 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1664   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1665     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1666   else \
1667     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1668 
1669 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1670   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1671     G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1672   else \
1673     G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1674 
1675 #define PCRE2_JIT_STACK_FREE(a) \
1676   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1677     G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1678   else \
1679     G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1680 
1681 #define PCRE2_MAKETABLES(a) \
1682   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1683     a = G(pcre2_maketables_,BITONE)(NULL); \
1684   else \
1685     a = G(pcre2_maketables_,BITTWO)(NULL)
1686 
1687 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1688   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1689     a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1690       G(g,BITONE),h); \
1691   else \
1692     a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1693       G(g,BITTWO),h)
1694 
1695 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1696   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1697     G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1698   else \
1699     G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1700 
1701 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1702   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1703     G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1704   else \
1705     G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1706 
1707 #define PCRE2_MATCH_DATA_FREE(a) \
1708   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1709     G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1710   else \
1711     G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1712 
1713 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1714   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1715     a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1716   else \
1717     a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1718 
1719 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1720   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1721     a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1722   else \
1723     a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1724 
1725 #define PCRE2_PRINTINT(a) \
1726  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1727     G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1728   else \
1729     G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1730 
1731 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1732  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1733     r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1734   else \
1735     r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1736 
1737 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1738  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1739     r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1740   else \
1741     r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1742 
1743 #define PCRE2_SERIALIZE_FREE(a) \
1744  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1745     G(pcre2_serialize_free_,BITONE)(a); \
1746   else \
1747     G(pcre2_serialize_free_,BITTWO)(a)
1748 
1749 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1750  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1751     r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1752   else \
1753     r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1754 
1755 #define PCRE2_SET_CALLOUT(a,b,c) \
1756   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1757     G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1758       (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1759   else \
1760     G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1761       (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1762 
1763 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1764   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1765     G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1766   else \
1767     G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1768 
1769 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1770   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1771     G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1772   else \
1773     G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1774 
1775 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1776   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1777     G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1778   else \
1779     G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1780 
1781 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1782   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1783     r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1784   else \
1785     r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1786 
1787 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1788   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1789     r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1790   else \
1791     r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1792 
1793 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1794   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1795     G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1796   else \
1797     G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1798 
1799 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1800   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1801     G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1802   else \
1803     G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1804 
1805 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1806   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1807     G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1808   else \
1809     G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1810 
1811 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1812   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1813     G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1814   else \
1815     G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1816 
1817 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1818   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1819     G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1820   else \
1821     G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1822 
1823 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1824   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1825     a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1826       G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \
1827       (G(PCRE2_UCHAR,BITONE) *)k,l); \
1828   else \
1829     a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1830       G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \
1831       (G(PCRE2_UCHAR,BITTWO) *)k,l)
1832 
1833 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1834   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1835     a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1836       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1837   else \
1838     a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1839       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1840 
1841 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1842   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1843     a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1844       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1845   else \
1846     a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1847       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1848 
1849 #define PCRE2_SUBSTRING_FREE(a) \
1850   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1851     G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1852   else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1853 
1854 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1855   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1856     a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1857       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1858   else \
1859     a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1860       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1861 
1862 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1863   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1864     a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1865       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1866   else \
1867     a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1868       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1869 
1870 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1871   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1872     a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1873   else \
1874     a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1875 
1876 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1877   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1878     a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1879   else \
1880     a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1881 
1882 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1883   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1884     a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1885       (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1886   else \
1887     a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1888       (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1889 
1890 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1891   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1892     G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1893   else \
1894     G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1895 
1896 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1897   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1898     a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1899   else \
1900     a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1901 
1902 #define PTR(x) ( \
1903   (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1904   (void *)G(x,BITTWO))
1905 
1906 #define SETFLD(x,y,z) \
1907   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1908   else G(x,BITTWO)->y = z
1909 
1910 #define SETFLDVEC(x,y,v,z) \
1911   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1912   else G(x,BITTWO)->y[v] = z
1913 
1914 #define SETOP(x,y,z) \
1915   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1916   else G(x,BITTWO) z y
1917 
1918 #define SETCASTPTR(x,y) \
1919   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1920     G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1921   else \
1922     G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1923 
1924 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1925   G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1926   G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1927 
1928 #define SUB1(a,b) \
1929   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1930     G(a,BITONE)(G(b,BITONE)); \
1931   else \
1932     G(a,BITTWO)(G(b,BITTWO))
1933 
1934 #define SUB2(a,b,c) \
1935   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1936     G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
1937   else \
1938     G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
1939 
1940 #define TEST(x,r,y) ( \
1941   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
1942   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
1943 
1944 #define TESTFLD(x,f,r,y) ( \
1945   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
1946   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
1947 
1948 
1949 #endif  /* Two out of three modes */
1950 
1951 /* ----- End of cases where more than one mode is supported ----- */
1952 
1953 
1954 /* ----- Only 8-bit mode is supported ----- */
1955 
1956 #elif defined SUPPORT_PCRE2_8
1957 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
1958 #define CASTVAR(t,x) (t)G(x,8)
1959 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
1960 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
1961 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
1962 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
1963 #define FLD(a,b) G(a,8)->b
1964 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
1965 #define PCHARS(lv, p, offset, len, utf, f) \
1966   lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1967 #define PCHARSV(p, offset, len, utf, f) \
1968   (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1969 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1970    a = pcre2_callout_enumerate_8(compiled_code8, \
1971      (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
1972 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
1973 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
1974 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
1975 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1976   G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
1977 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1978   pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
1979 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1980   a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
1981 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1982   r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
1983 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
1984 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
1985 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
1986 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
1987 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1988   a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1989 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1990   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
1991 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1992   pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
1993 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
1994 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
1995 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1996   a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1997 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
1998 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1999   G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
2000 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2001 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2002 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2003 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2004 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2005   r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2006 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2007   r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2008 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2009 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2010   r = pcre2_serialize_get_number_of_codes_8(a)
2011 #define PCRE2_SET_CALLOUT(a,b,c) \
2012   pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2013 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2014 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2015   pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2016 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2017 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2018 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2019 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2020 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2021 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2022 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2023 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2024 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2025   a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
2026     (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2027 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2028   a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2029 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2030   a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2031 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2032 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2033   a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2034 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2035   a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2036 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2037     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2038 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2039     a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2040 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2041   a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2042 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2043   pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
2044 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2045   a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2046 #define PTR(x) (void *)G(x,8)
2047 #define SETFLD(x,y,z) G(x,8)->y = z
2048 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2049 #define SETOP(x,y,z) G(x,8) z y
2050 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2051 #define STRLEN(p) (int)strlen((char *)p)
2052 #define SUB1(a,b) G(a,8)(G(b,8))
2053 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2054 #define TEST(x,r,y) (G(x,8) r (y))
2055 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2056 
2057 
2058 /* ----- Only 16-bit mode is supported ----- */
2059 
2060 #elif defined SUPPORT_PCRE2_16
2061 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2062 #define CASTVAR(t,x) (t)G(x,16)
2063 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2064 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2065 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2066 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2067 #define FLD(a,b) G(a,16)->b
2068 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2069 #define PCHARS(lv, p, offset, len, utf, f) \
2070   lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2071 #define PCHARSV(p, offset, len, utf, f) \
2072   (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2073 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2074    a = pcre2_callout_enumerate_16(compiled_code16, \
2075      (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2076 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2077 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2078 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2079 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2080   G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
2081 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2082   pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2083 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2084   a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2085 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2086   r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2087 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2088 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2089 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2090 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2091 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2092   a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2093 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2094   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2095 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2096   pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2097 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2098 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
2099 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2100   a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2101 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
2102 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2103   G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
2104 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2105 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2106 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2107 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2108 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2109   r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2110 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2111   r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2112 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2113 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2114   r = pcre2_serialize_get_number_of_codes_16(a)
2115 #define PCRE2_SET_CALLOUT(a,b,c) \
2116   pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2117 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2118 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2119   pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2120 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2121 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2122 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2123 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2124 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2125 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
2126 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2127 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2128 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2129   a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
2130     (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2131 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2132   a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2133 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2134   a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2135 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2136 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2137   a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2138 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2139   a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2140 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2141     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2142 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2143     a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2144 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2145   a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2146 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2147   pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
2148 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2149   a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2150 #define PTR(x) (void *)G(x,16)
2151 #define SETFLD(x,y,z) G(x,16)->y = z
2152 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2153 #define SETOP(x,y,z) G(x,16) z y
2154 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2155 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2156 #define SUB1(a,b) G(a,16)(G(b,16))
2157 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2158 #define TEST(x,r,y) (G(x,16) r (y))
2159 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2160 
2161 
2162 /* ----- Only 32-bit mode is supported ----- */
2163 
2164 #elif defined SUPPORT_PCRE2_32
2165 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2166 #define CASTVAR(t,x) (t)G(x,32)
2167 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2168 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2169 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2170 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2171 #define FLD(a,b) G(a,32)->b
2172 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2173 #define PCHARS(lv, p, offset, len, utf, f) \
2174   lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2175 #define PCHARSV(p, offset, len, utf, f) \
2176   (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2177 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2178    a = pcre2_callout_enumerate_32(compiled_code32, \
2179      (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2180 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2181 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2182 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2183 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2184   G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2185 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2186   pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2187 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2188   a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2189 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2190   r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2191 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2192 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2193 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2194 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2195 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2196   a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2197 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2198   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2199 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2200   pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2201 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2202 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2203 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2204   a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2205 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2206 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2207   G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2208 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2209 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2210 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2211 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2212 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2213   r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2214 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2215   r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2216 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2217 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2218   r = pcre2_serialize_get_number_of_codes_32(a)
2219 #define PCRE2_SET_CALLOUT(a,b,c) \
2220   pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
2221 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2222 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2223   pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2224 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2225 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2226 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2227 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2228 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2229 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2230 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2231 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2232 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2233   a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
2234     (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2235 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2236   a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2237 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2238   a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2239 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2240 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2241   a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2242 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2243   a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2244 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2245     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2246 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2247     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2248 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2249   a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2250 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2251   pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2252 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2253   a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2254 #define PTR(x) (void *)G(x,32)
2255 #define SETFLD(x,y,z) G(x,32)->y = z
2256 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2257 #define SETOP(x,y,z) G(x,32) z y
2258 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2259 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2260 #define SUB1(a,b) G(a,32)(G(b,32))
2261 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2262 #define TEST(x,r,y) (G(x,32) r (y))
2263 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2264 
2265 #endif
2266 
2267 /* ----- End of mode-specific function call macros ----- */
2268 
2269 
2270 
2271 
2272 /*************************************************
2273 *         Alternate character tables             *
2274 *************************************************/
2275 
2276 /* By default, the "tables" pointer in the compile context when calling
2277 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2278 library. However, the tables modifier can be used to select alternate sets of
2279 tables, for different kinds of testing. Note that the locale modifier also
2280 adjusts the tables. */
2281 
2282 /* This is the set of tables distributed as default with PCRE2. It recognizes
2283 only ASCII characters. */
2284 
2285 static const uint8_t tables1[] = {
2286 
2287 /* This table is a lower casing table. */
2288 
2289     0,  1,  2,  3,  4,  5,  6,  7,
2290     8,  9, 10, 11, 12, 13, 14, 15,
2291    16, 17, 18, 19, 20, 21, 22, 23,
2292    24, 25, 26, 27, 28, 29, 30, 31,
2293    32, 33, 34, 35, 36, 37, 38, 39,
2294    40, 41, 42, 43, 44, 45, 46, 47,
2295    48, 49, 50, 51, 52, 53, 54, 55,
2296    56, 57, 58, 59, 60, 61, 62, 63,
2297    64, 97, 98, 99,100,101,102,103,
2298   104,105,106,107,108,109,110,111,
2299   112,113,114,115,116,117,118,119,
2300   120,121,122, 91, 92, 93, 94, 95,
2301    96, 97, 98, 99,100,101,102,103,
2302   104,105,106,107,108,109,110,111,
2303   112,113,114,115,116,117,118,119,
2304   120,121,122,123,124,125,126,127,
2305   128,129,130,131,132,133,134,135,
2306   136,137,138,139,140,141,142,143,
2307   144,145,146,147,148,149,150,151,
2308   152,153,154,155,156,157,158,159,
2309   160,161,162,163,164,165,166,167,
2310   168,169,170,171,172,173,174,175,
2311   176,177,178,179,180,181,182,183,
2312   184,185,186,187,188,189,190,191,
2313   192,193,194,195,196,197,198,199,
2314   200,201,202,203,204,205,206,207,
2315   208,209,210,211,212,213,214,215,
2316   216,217,218,219,220,221,222,223,
2317   224,225,226,227,228,229,230,231,
2318   232,233,234,235,236,237,238,239,
2319   240,241,242,243,244,245,246,247,
2320   248,249,250,251,252,253,254,255,
2321 
2322 /* This table is a case flipping table. */
2323 
2324     0,  1,  2,  3,  4,  5,  6,  7,
2325     8,  9, 10, 11, 12, 13, 14, 15,
2326    16, 17, 18, 19, 20, 21, 22, 23,
2327    24, 25, 26, 27, 28, 29, 30, 31,
2328    32, 33, 34, 35, 36, 37, 38, 39,
2329    40, 41, 42, 43, 44, 45, 46, 47,
2330    48, 49, 50, 51, 52, 53, 54, 55,
2331    56, 57, 58, 59, 60, 61, 62, 63,
2332    64, 97, 98, 99,100,101,102,103,
2333   104,105,106,107,108,109,110,111,
2334   112,113,114,115,116,117,118,119,
2335   120,121,122, 91, 92, 93, 94, 95,
2336    96, 65, 66, 67, 68, 69, 70, 71,
2337    72, 73, 74, 75, 76, 77, 78, 79,
2338    80, 81, 82, 83, 84, 85, 86, 87,
2339    88, 89, 90,123,124,125,126,127,
2340   128,129,130,131,132,133,134,135,
2341   136,137,138,139,140,141,142,143,
2342   144,145,146,147,148,149,150,151,
2343   152,153,154,155,156,157,158,159,
2344   160,161,162,163,164,165,166,167,
2345   168,169,170,171,172,173,174,175,
2346   176,177,178,179,180,181,182,183,
2347   184,185,186,187,188,189,190,191,
2348   192,193,194,195,196,197,198,199,
2349   200,201,202,203,204,205,206,207,
2350   208,209,210,211,212,213,214,215,
2351   216,217,218,219,220,221,222,223,
2352   224,225,226,227,228,229,230,231,
2353   232,233,234,235,236,237,238,239,
2354   240,241,242,243,244,245,246,247,
2355   248,249,250,251,252,253,254,255,
2356 
2357 /* This table contains bit maps for various character classes. Each map is 32
2358 bytes long and the bits run from the least significant end of each byte. The
2359 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2360 graph, print, punct, and cntrl. Other classes are built from combinations. */
2361 
2362   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2363   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2364   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2365   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2366 
2367   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2368   0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2369   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2370   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2371 
2372   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2373   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2374   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2375   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2376 
2377   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2378   0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2379   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2380   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2381 
2382   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2383   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2384   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2385   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2386 
2387   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2388   0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2389   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2390   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2391 
2392   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2393   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2394   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2395   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2396 
2397   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2398   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2399   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2400   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2401 
2402   0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2403   0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2404   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2405   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2406 
2407   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2408   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2409   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2410   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2411 
2412 /* This table identifies various classes of character by individual bits:
2413   0x01   white space character
2414   0x02   letter
2415   0x04   decimal digit
2416   0x08   hexadecimal digit
2417   0x10   alphanumeric or '_'
2418   0x80   regular expression metacharacter or binary zero
2419 */
2420 
2421   0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
2422   0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /*   8- 15 */
2423   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
2424   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
2425   0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
2426   0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
2427   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
2428   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
2429   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
2430   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
2431   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
2432   0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
2433   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
2434   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
2435   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
2436   0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
2437   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2438   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2439   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2440   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2441   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2442   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2443   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2444   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2445   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2446   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2447   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2448   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2449   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2450   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2451   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2452   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2453 
2454 /* This is a set of tables that came originally from a Windows user. It seems
2455 to be at least an approximation of ISO 8859. In particular, there are
2456 characters greater than 128 that are marked as spaces, letters, etc. */
2457 
2458 static const uint8_t tables2[] = {
2459 0,1,2,3,4,5,6,7,
2460 8,9,10,11,12,13,14,15,
2461 16,17,18,19,20,21,22,23,
2462 24,25,26,27,28,29,30,31,
2463 32,33,34,35,36,37,38,39,
2464 40,41,42,43,44,45,46,47,
2465 48,49,50,51,52,53,54,55,
2466 56,57,58,59,60,61,62,63,
2467 64,97,98,99,100,101,102,103,
2468 104,105,106,107,108,109,110,111,
2469 112,113,114,115,116,117,118,119,
2470 120,121,122,91,92,93,94,95,
2471 96,97,98,99,100,101,102,103,
2472 104,105,106,107,108,109,110,111,
2473 112,113,114,115,116,117,118,119,
2474 120,121,122,123,124,125,126,127,
2475 128,129,130,131,132,133,134,135,
2476 136,137,138,139,140,141,142,143,
2477 144,145,146,147,148,149,150,151,
2478 152,153,154,155,156,157,158,159,
2479 160,161,162,163,164,165,166,167,
2480 168,169,170,171,172,173,174,175,
2481 176,177,178,179,180,181,182,183,
2482 184,185,186,187,188,189,190,191,
2483 224,225,226,227,228,229,230,231,
2484 232,233,234,235,236,237,238,239,
2485 240,241,242,243,244,245,246,215,
2486 248,249,250,251,252,253,254,223,
2487 224,225,226,227,228,229,230,231,
2488 232,233,234,235,236,237,238,239,
2489 240,241,242,243,244,245,246,247,
2490 248,249,250,251,252,253,254,255,
2491 0,1,2,3,4,5,6,7,
2492 8,9,10,11,12,13,14,15,
2493 16,17,18,19,20,21,22,23,
2494 24,25,26,27,28,29,30,31,
2495 32,33,34,35,36,37,38,39,
2496 40,41,42,43,44,45,46,47,
2497 48,49,50,51,52,53,54,55,
2498 56,57,58,59,60,61,62,63,
2499 64,97,98,99,100,101,102,103,
2500 104,105,106,107,108,109,110,111,
2501 112,113,114,115,116,117,118,119,
2502 120,121,122,91,92,93,94,95,
2503 96,65,66,67,68,69,70,71,
2504 72,73,74,75,76,77,78,79,
2505 80,81,82,83,84,85,86,87,
2506 88,89,90,123,124,125,126,127,
2507 128,129,130,131,132,133,134,135,
2508 136,137,138,139,140,141,142,143,
2509 144,145,146,147,148,149,150,151,
2510 152,153,154,155,156,157,158,159,
2511 160,161,162,163,164,165,166,167,
2512 168,169,170,171,172,173,174,175,
2513 176,177,178,179,180,181,182,183,
2514 184,185,186,187,188,189,190,191,
2515 224,225,226,227,228,229,230,231,
2516 232,233,234,235,236,237,238,239,
2517 240,241,242,243,244,245,246,215,
2518 248,249,250,251,252,253,254,223,
2519 192,193,194,195,196,197,198,199,
2520 200,201,202,203,204,205,206,207,
2521 208,209,210,211,212,213,214,247,
2522 216,217,218,219,220,221,222,255,
2523 0,62,0,0,1,0,0,0,
2524 0,0,0,0,0,0,0,0,
2525 32,0,0,0,1,0,0,0,
2526 0,0,0,0,0,0,0,0,
2527 0,0,0,0,0,0,255,3,
2528 126,0,0,0,126,0,0,0,
2529 0,0,0,0,0,0,0,0,
2530 0,0,0,0,0,0,0,0,
2531 0,0,0,0,0,0,255,3,
2532 0,0,0,0,0,0,0,0,
2533 0,0,0,0,0,0,12,2,
2534 0,0,0,0,0,0,0,0,
2535 0,0,0,0,0,0,0,0,
2536 254,255,255,7,0,0,0,0,
2537 0,0,0,0,0,0,0,0,
2538 255,255,127,127,0,0,0,0,
2539 0,0,0,0,0,0,0,0,
2540 0,0,0,0,254,255,255,7,
2541 0,0,0,0,0,4,32,4,
2542 0,0,0,128,255,255,127,255,
2543 0,0,0,0,0,0,255,3,
2544 254,255,255,135,254,255,255,7,
2545 0,0,0,0,0,4,44,6,
2546 255,255,127,255,255,255,127,255,
2547 0,0,0,0,254,255,255,255,
2548 255,255,255,255,255,255,255,127,
2549 0,0,0,0,254,255,255,255,
2550 255,255,255,255,255,255,255,255,
2551 0,2,0,0,255,255,255,255,
2552 255,255,255,255,255,255,255,127,
2553 0,0,0,0,255,255,255,255,
2554 255,255,255,255,255,255,255,255,
2555 0,0,0,0,254,255,0,252,
2556 1,0,0,248,1,0,0,120,
2557 0,0,0,0,254,255,255,255,
2558 0,0,128,0,0,0,128,0,
2559 255,255,255,255,0,0,0,0,
2560 0,0,0,0,0,0,0,128,
2561 255,255,255,255,0,0,0,0,
2562 0,0,0,0,0,0,0,0,
2563 128,0,0,0,0,0,0,0,
2564 0,1,1,0,1,1,0,0,
2565 0,0,0,0,0,0,0,0,
2566 0,0,0,0,0,0,0,0,
2567 1,0,0,0,128,0,0,0,
2568 128,128,128,128,0,0,128,0,
2569 28,28,28,28,28,28,28,28,
2570 28,28,0,0,0,0,0,128,
2571 0,26,26,26,26,26,26,18,
2572 18,18,18,18,18,18,18,18,
2573 18,18,18,18,18,18,18,18,
2574 18,18,18,128,128,0,128,16,
2575 0,26,26,26,26,26,26,18,
2576 18,18,18,18,18,18,18,18,
2577 18,18,18,18,18,18,18,18,
2578 18,18,18,128,128,0,0,0,
2579 0,0,0,0,0,1,0,0,
2580 0,0,0,0,0,0,0,0,
2581 0,0,0,0,0,0,0,0,
2582 0,0,0,0,0,0,0,0,
2583 1,0,0,0,0,0,0,0,
2584 0,0,18,0,0,0,0,0,
2585 0,0,20,20,0,18,0,0,
2586 0,20,18,0,0,0,0,0,
2587 18,18,18,18,18,18,18,18,
2588 18,18,18,18,18,18,18,18,
2589 18,18,18,18,18,18,18,0,
2590 18,18,18,18,18,18,18,18,
2591 18,18,18,18,18,18,18,18,
2592 18,18,18,18,18,18,18,18,
2593 18,18,18,18,18,18,18,0,
2594 18,18,18,18,18,18,18,18
2595 };
2596 
2597 
2598 
2599 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2600 /*************************************************
2601 *    Emulated memmove() for systems without it   *
2602 *************************************************/
2603 
2604 /* This function can make use of bcopy() if it is available. Otherwise do it by
2605 steam, as there are some non-Unix environments that lack both memmove() and
2606 bcopy(). */
2607 
2608 static void *
emulated_memmove(void * d,const void * s,size_t n)2609 emulated_memmove(void *d, const void *s, size_t n)
2610 {
2611 #ifdef HAVE_BCOPY
2612 bcopy(s, d, n);
2613 return d;
2614 #else
2615 size_t i;
2616 unsigned char *dest = (unsigned char *)d;
2617 const unsigned char *src = (const unsigned char *)s;
2618 if (dest > src)
2619   {
2620   dest += n;
2621   src += n;
2622   for (i = 0; i < n; ++i) *(--dest) = *(--src);
2623   return (void *)dest;
2624   }
2625 else
2626   {
2627   for (i = 0; i < n; ++i) *dest++ = *src++;
2628   return (void *)(dest - n);
2629   }
2630 #endif   /* not HAVE_BCOPY */
2631 }
2632 #undef memmove
2633 #define memmove(d,s,n) emulated_memmove(d,s,n)
2634 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
2635 
2636 
2637 
2638 #ifndef HAVE_STRERROR
2639 /*************************************************
2640 *     Provide strerror() for non-ANSI libraries  *
2641 *************************************************/
2642 
2643 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2644 libraries. They may no longer be around, but just in case, we can try to
2645 provide the same facility by this simple alternative function. */
2646 
2647 extern int   sys_nerr;
2648 extern char *sys_errlist[];
2649 
2650 char *
strerror(int n)2651 strerror(int n)
2652 {
2653 if (n < 0 || n >= sys_nerr) return "unknown error number";
2654 return sys_errlist[n];
2655 }
2656 #endif /* HAVE_STRERROR */
2657 
2658 
2659 
2660 /*************************************************
2661 *            Local memory functions              *
2662 *************************************************/
2663 
2664 /* Alternative memory functions, to test functionality. */
2665 
my_malloc(PCRE2_SIZE size,void * data)2666 static void *my_malloc(PCRE2_SIZE size, void *data)
2667 {
2668 void *block = malloc(size);
2669 (void)data;
2670 if (show_memory)
2671   {
2672   if (block == NULL)
2673     {
2674     fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", SIZ_CAST size);
2675     }
2676   else
2677     {
2678     fprintf(outfile, "malloc  %5" SIZ_FORM, SIZ_CAST size);
2679 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2680     fprintf(outfile, " %p", block);   /* Not portable */
2681 #endif
2682     if (malloclistptr < MALLOCLISTSIZE)
2683       {
2684       malloclist[malloclistptr] = block;
2685       malloclistlength[malloclistptr++] = size;
2686       }
2687     else
2688       fprintf(outfile, " (not remembered)");
2689     fprintf(outfile, "\n");
2690     }
2691   }
2692 return block;
2693 }
2694 
my_free(void * block,void * data)2695 static void my_free(void *block, void *data)
2696 {
2697 (void)data;
2698 if (show_memory)
2699   {
2700   uint32_t i, j;
2701   BOOL found = FALSE;
2702 
2703   fprintf(outfile, "free");
2704   for (i = 0; i < malloclistptr; i++)
2705     {
2706     if (block == malloclist[i])
2707       {
2708       fprintf(outfile, "    %5" SIZ_FORM, SIZ_CAST malloclistlength[i]);
2709       malloclistptr--;
2710       for (j = i; j < malloclistptr; j++)
2711         {
2712         malloclist[j] = malloclist[j+1];
2713         malloclistlength[j] = malloclistlength[j+1];
2714         }
2715       found = TRUE;
2716       break;
2717       }
2718     }
2719   if (!found) fprintf(outfile, " unremembered block");
2720 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2721   fprintf(outfile, " %p", block);  /* Not portable */
2722 #endif
2723   fprintf(outfile, "\n");
2724   }
2725 free(block);
2726 }
2727 
2728 
2729 
2730 /*************************************************
2731 *       Callback function for stack guard        *
2732 *************************************************/
2733 
2734 /* This is set up to be called from pcre2_compile() when the stackguard=n
2735 modifier sets a value greater than zero. The test we do is whether the
2736 parenthesis nesting depth is greater than the value set by the modifier.
2737 
2738 Argument:  the current parenthesis nesting depth
2739 Returns:   non-zero to kill the compilation
2740 */
2741 
2742 static int
stack_guard(uint32_t depth,void * user_data)2743 stack_guard(uint32_t depth, void *user_data)
2744 {
2745 (void)user_data;
2746 return depth > pat_patctl.stackguard_test;
2747 }
2748 
2749 
2750 /*************************************************
2751 *         JIT memory callback                    *
2752 *************************************************/
2753 
2754 static PCRE2_JIT_STACK*
jit_callback(void * arg)2755 jit_callback(void *arg)
2756 {
2757 jit_was_used = TRUE;
2758 return (PCRE2_JIT_STACK *)arg;
2759 }
2760 
2761 
2762 /*************************************************
2763 *      Convert UTF-8 character to code point     *
2764 *************************************************/
2765 
2766 /* This function reads one or more bytes that represent a UTF-8 character,
2767 and returns the codepoint of that character. Note that the function supports
2768 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2769 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2770 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2771 checking, and also for generating 32-bit non-UTF data values above the UTF
2772 limit.
2773 
2774 Argument:
2775   utf8bytes   a pointer to the byte vector
2776   vptr        a pointer to an int to receive the value
2777 
2778 Returns:      >  0 => the number of bytes consumed
2779               -6 to 0 => malformed UTF-8 character at offset = (-return)
2780 */
2781 
2782 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2783 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2784 {
2785 uint32_t c = *utf8bytes++;
2786 uint32_t d = c;
2787 int i, j, s;
2788 
2789 for (i = -1; i < 6; i++)               /* i is number of additional bytes */
2790   {
2791   if ((d & 0x80) == 0) break;
2792   d <<= 1;
2793   }
2794 
2795 if (i == -1) { *vptr = c; return 1; }  /* ascii character */
2796 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
2797 
2798 /* i now has a value in the range 1-5 */
2799 
2800 s = 6*i;
2801 d = (c & utf8_table3[i]) << s;
2802 
2803 for (j = 0; j < i; j++)
2804   {
2805   c = *utf8bytes++;
2806   if ((c & 0xc0) != 0x80) return -(j+1);
2807   s -= 6;
2808   d |= (c & 0x3f) << s;
2809   }
2810 
2811 /* Check that encoding was the correct unique one */
2812 
2813 for (j = 0; j < utf8_table1_size; j++)
2814   if (d <= (uint32_t)utf8_table1[j]) break;
2815 if (j != i) return -(i+1);
2816 
2817 /* Valid value */
2818 
2819 *vptr = d;
2820 return i+1;
2821 }
2822 
2823 
2824 
2825 /*************************************************
2826 *             Print one character                *
2827 *************************************************/
2828 
2829 /* Print a single character either literally, or as a hex escape, and count how
2830 many printed characters are used.
2831 
2832 Arguments:
2833   c            the character
2834   utf          TRUE in UTF mode
2835   f            the FILE to print to, or NULL just to count characters
2836 
2837 Returns:       number of characters written
2838 */
2839 
2840 static int
pchar(uint32_t c,BOOL utf,FILE * f)2841 pchar(uint32_t c, BOOL utf, FILE *f)
2842 {
2843 int n = 0;
2844 char tempbuffer[16];
2845 
2846 if (PRINTOK(c))
2847   {
2848   if (f != NULL) fprintf(f, "%c", c);
2849   return 1;
2850   }
2851 
2852 if (c < 0x100)
2853   {
2854   if (utf)
2855     {
2856     if (f != NULL) fprintf(f, "\\x{%02x}", c);
2857     return 6;
2858     }
2859   else
2860     {
2861     if (f != NULL) fprintf(f, "\\x%02x", c);
2862     return 4;
2863     }
2864   }
2865 
2866 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2867   else n = sprintf(tempbuffer, "\\x{%02x}", c);
2868 
2869 return n >= 0 ? n : 0;
2870 }
2871 
2872 
2873 
2874 #ifdef SUPPORT_PCRE2_16
2875 /*************************************************
2876 *    Find length of 0-terminated 16-bit string   *
2877 *************************************************/
2878 
strlen16(PCRE2_SPTR16 p)2879 static size_t strlen16(PCRE2_SPTR16 p)
2880 {
2881 PCRE2_SPTR16 pp = p;
2882 while (*pp != 0) pp++;
2883 return (int)(pp - p);
2884 }
2885 #endif  /* SUPPORT_PCRE2_16 */
2886 
2887 
2888 
2889 #ifdef SUPPORT_PCRE2_32
2890 /*************************************************
2891 *    Find length of 0-terminated 32-bit string   *
2892 *************************************************/
2893 
strlen32(PCRE2_SPTR32 p)2894 static size_t strlen32(PCRE2_SPTR32 p)
2895 {
2896 PCRE2_SPTR32 pp = p;
2897 while (*pp != 0) pp++;
2898 return (int)(pp - p);
2899 }
2900 #endif  /* SUPPORT_PCRE2_32 */
2901 
2902 
2903 #ifdef SUPPORT_PCRE2_8
2904 /*************************************************
2905 *         Print 8-bit character string           *
2906 *************************************************/
2907 
2908 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2909 For printing *MARK strings, a negative length is given. If handed a NULL file,
2910 just counts chars without printing (because pchar() does that). */
2911 
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)2912 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2913 {
2914 uint32_t c = 0;
2915 int yield = 0;
2916 
2917 if (length < 0) length = p[-1];
2918 while (length-- > 0)
2919   {
2920   if (utf)
2921     {
2922     int rc = utf82ord(p, &c);
2923     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
2924       {
2925       length -= rc - 1;
2926       p += rc;
2927       yield += pchar(c, utf, f);
2928       continue;
2929       }
2930     }
2931   c = *p++;
2932   yield += pchar(c, utf, f);
2933   }
2934 
2935 return yield;
2936 }
2937 #endif
2938 
2939 
2940 #ifdef SUPPORT_PCRE2_16
2941 /*************************************************
2942 *           Print 16-bit character string        *
2943 *************************************************/
2944 
2945 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2946 For printing *MARK strings, a negative length is given. If handed a NULL file,
2947 just counts chars without printing. */
2948 
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)2949 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
2950 {
2951 int yield = 0;
2952 if (length < 0) length = p[-1];
2953 while (length-- > 0)
2954   {
2955   uint32_t c = *p++ & 0xffff;
2956   if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2957     {
2958     int d = *p & 0xffff;
2959     if (d >= 0xDC00 && d <= 0xDFFF)
2960       {
2961       c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2962       length--;
2963       p++;
2964       }
2965     }
2966   yield += pchar(c, utf, f);
2967   }
2968 return yield;
2969 }
2970 #endif  /* SUPPORT_PCRE2_16 */
2971 
2972 
2973 
2974 #ifdef SUPPORT_PCRE2_32
2975 /*************************************************
2976 *           Print 32-bit character string        *
2977 *************************************************/
2978 
2979 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2980 For printing *MARK strings, a negative length is given. If handed a NULL file,
2981 just counts chars without printing. */
2982 
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)2983 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
2984 {
2985 int yield = 0;
2986 (void)(utf);  /* Avoid compiler warning */
2987 
2988 if (length < 0) length = p[-1];
2989 while (length-- > 0)
2990   {
2991   uint32_t c = *p++;
2992   yield += pchar(c, utf, f);
2993   }
2994 return yield;
2995 }
2996 #endif  /* SUPPORT_PCRE2_32 */
2997 
2998 
2999 
3000 
3001 #ifdef SUPPORT_PCRE2_8
3002 /*************************************************
3003 *       Convert character value to UTF-8         *
3004 *************************************************/
3005 
3006 /* This function takes an integer value in the range 0 - 0x7fffffff
3007 and encodes it as a UTF-8 character in 0 to 6 bytes.
3008 
3009 Arguments:
3010   cvalue     the character value
3011   utf8bytes  pointer to buffer for result - at least 6 bytes long
3012 
3013 Returns:     number of characters placed in the buffer
3014 */
3015 
3016 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3017 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3018 {
3019 int i, j;
3020 if (cvalue > 0x7fffffffu)
3021   return -1;
3022 for (i = 0; i < utf8_table1_size; i++)
3023   if (cvalue <= (uint32_t)utf8_table1[i]) break;
3024 utf8bytes += i;
3025 for (j = i; j > 0; j--)
3026  {
3027  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3028  cvalue >>= 6;
3029  }
3030 *utf8bytes = utf8_table2[i] | cvalue;
3031 return i + 1;
3032 }
3033 #endif  /* SUPPORT_PCRE2_8 */
3034 
3035 
3036 
3037 #ifdef SUPPORT_PCRE2_16
3038 /*************************************************
3039 *           Convert string to 16-bit             *
3040 *************************************************/
3041 
3042 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3043 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3044 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3045 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3046 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3047 greater than 0xffff.
3048 
3049 If all the input bytes are ASCII, the space needed for a 16-bit string is
3050 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3051 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3052 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3053 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3054 save repeated re-sizing.
3055 
3056 Note that this function does not object to surrogate values. This is
3057 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3058 for the purpose of testing that they are correctly faulted.
3059 
3060 Arguments:
3061   p          points to a byte string
3062   utf        true in UTF mode
3063   lenptr     points to number of bytes in the string (excluding trailing zero)
3064 
3065 Returns:     0 on success, with the length updated to the number of 16-bit
3066                data items used (excluding the trailing zero)
3067              OR -1 if a UTF-8 string is malformed
3068              OR -2 if a value > 0x10ffff is encountered in UTF mode
3069              OR -3 if a value > 0xffff is encountered when not in UTF mode
3070 */
3071 
3072 static PCRE2_SIZE
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3073 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3074 {
3075 uint16_t *pp;
3076 PCRE2_SIZE len = *lenptr;
3077 
3078 if (pbuffer16_size < 2*len + 2)
3079   {
3080   if (pbuffer16 != NULL) free(pbuffer16);
3081   pbuffer16_size = 2*len + 2;
3082   if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3083   pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3084   if (pbuffer16 == NULL)
3085     {
3086     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3087       SIZ_CAST pbuffer16_size);
3088     exit(1);
3089     }
3090   }
3091 
3092 pp = pbuffer16;
3093 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3094   {
3095   for (; len > 0; len--) *pp++ = *p++;
3096   }
3097 else while (len > 0)
3098   {
3099   uint32_t c;
3100   int chlen = utf82ord(p, &c);
3101   if (chlen <= 0) return -1;
3102   if (!utf && c > 0xffff) return -3;
3103   if (c > 0x10ffff) return -2;
3104   p += chlen;
3105   len -= chlen;
3106   if (c < 0x10000) *pp++ = c; else
3107     {
3108     c -= 0x10000;
3109     *pp++ = 0xD800 | (c >> 10);
3110     *pp++ = 0xDC00 | (c & 0x3ff);
3111     }
3112   }
3113 
3114 *pp = 0;
3115 *lenptr = pp - pbuffer16;
3116 return 0;
3117 }
3118 #endif
3119 
3120 
3121 
3122 #ifdef SUPPORT_PCRE2_32
3123 /*************************************************
3124 *           Convert string to 32-bit             *
3125 *************************************************/
3126 
3127 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3128 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3129 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3130 limit of 0x10ffff cause an error.
3131 
3132 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3133 is set, and no limit is imposed. There is special interpretation of the 0xff
3134 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3135 next character to be set. This provides a way of generating 32-bit characters
3136 greater than 0x7fffffff.
3137 
3138 If all the input bytes are ASCII, the space needed for a 32-bit string is
3139 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3140 string is no more than four times, because the number of characters must be
3141 less than the number of bytes. The result is always left in pbuffer32. Impose a
3142 minimum size to save repeated re-sizing.
3143 
3144 Note that this function does not object to surrogate values. This is
3145 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3146 for the purpose of testing that they are correctly faulted.
3147 
3148 Arguments:
3149   p          points to a byte string
3150   utf        true in UTF mode
3151   lenptr     points to number of bytes in the string (excluding trailing zero)
3152 
3153 Returns:     0 on success, with the length updated to the number of 32-bit
3154                data items used (excluding the trailing zero)
3155              OR -1 if a UTF-8 string is malformed
3156              OR -2 if a value > 0x10ffff is encountered in UTF mode
3157 */
3158 
3159 static PCRE2_SIZE
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3160 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3161 {
3162 uint32_t *pp;
3163 PCRE2_SIZE len = *lenptr;
3164 
3165 if (pbuffer32_size < 4*len + 4)
3166   {
3167   if (pbuffer32 != NULL) free(pbuffer32);
3168   pbuffer32_size = 4*len + 4;
3169   if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3170   pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3171   if (pbuffer32 == NULL)
3172     {
3173     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3174       SIZ_CAST pbuffer32_size);
3175     exit(1);
3176     }
3177   }
3178 
3179 pp = pbuffer32;
3180 
3181 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3182   {
3183   for (; len > 0; len--) *pp++ = *p++;
3184   }
3185 
3186 else while (len > 0)
3187   {
3188   int chlen;
3189   uint32_t c;
3190   uint32_t topbit = 0;
3191   if (!utf && *p == 0xff && len > 1)
3192     {
3193     topbit = 0x80000000u;
3194     p++;
3195     len--;
3196     }
3197   chlen = utf82ord(p, &c);
3198   if (chlen <= 0) return -1;
3199   if (utf && c > 0x10ffff) return -2;
3200   p += chlen;
3201   len -= chlen;
3202   *pp++ = c | topbit;
3203   }
3204 
3205 *pp = 0;
3206 *lenptr = pp - pbuffer32;
3207 return 0;
3208 }
3209 #endif /* SUPPORT_PCRE2_32 */
3210 
3211 
3212 
3213 /*************************************************
3214 *         Move back by so many characters        *
3215 *************************************************/
3216 
3217 /* Given a code unit offset in a subject string, move backwards by a number of
3218 characters, and return the resulting offset.
3219 
3220 Arguments:
3221   subject   pointer to the string
3222   offset    start offset
3223   count     count to move back by
3224   utf       TRUE if in UTF mode
3225 
3226 Returns:   a possibly changed offset
3227 */
3228 
3229 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3230 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3231 {
3232 if (!utf || test_mode == PCRE32_MODE)
3233   return (count >= offset)? 0 : (offset - count);
3234 
3235 else if (test_mode == PCRE8_MODE)
3236   {
3237   PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3238   for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3239     {
3240     pp--;
3241     while ((*pp & 0xc0) == 0x80) pp--;
3242     }
3243   return pp - (PCRE2_SPTR8)subject;
3244   }
3245 
3246 else  /* 16-bit mode */
3247   {
3248   PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3249   for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3250     {
3251     pp--;
3252     if ((*pp & 0xfc00) == 0xdc00) pp--;
3253     }
3254   return pp - (PCRE2_SPTR16)subject;
3255   }
3256 }
3257 
3258 
3259 
3260 /*************************************************
3261 *           Expand input buffers                 *
3262 *************************************************/
3263 
3264 /* This function doubles the size of the input buffer and the buffer for
3265 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3266 the new ones.
3267 
3268 Arguments: none
3269 Returns:   nothing (aborts if malloc() fails)
3270 */
3271 
3272 static void
expand_input_buffers(void)3273 expand_input_buffers(void)
3274 {
3275 int new_pbuffer8_size = 2*pbuffer8_size;
3276 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3277 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3278 
3279 if (new_buffer == NULL || new_pbuffer8 == NULL)
3280   {
3281   fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3282   exit(1);
3283   }
3284 
3285 memcpy(new_buffer, buffer, pbuffer8_size);
3286 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3287 
3288 pbuffer8_size = new_pbuffer8_size;
3289 
3290 free(buffer);
3291 free(pbuffer8);
3292 
3293 buffer = new_buffer;
3294 pbuffer8 = new_pbuffer8;
3295 }
3296 
3297 
3298 
3299 /*************************************************
3300 *        Read or extend an input line            *
3301 *************************************************/
3302 
3303 /* Input lines are read into buffer, but both patterns and data lines can be
3304 continued over multiple input lines. In addition, if the buffer fills up, we
3305 want to automatically expand it so as to be able to handle extremely large
3306 lines that are needed for certain stress tests, although this is less likely
3307 now that there are repetition features for both patterns and data. When the
3308 input buffer is expanded, the other two buffers must also be expanded likewise,
3309 and the contents of pbuffer, which are a copy of the input for callouts, must
3310 be preserved (for when expansion happens for a data line). This is not the most
3311 optimal way of handling this, but hey, this is just a test program!
3312 
3313 Arguments:
3314   f            the file to read
3315   start        where in buffer to start (this *must* be within buffer)
3316   prompt       for stdin or readline()
3317 
3318 Returns:       pointer to the start of new data
3319                could be a copy of start, or could be moved
3320                NULL if no data read and EOF reached
3321 */
3322 
3323 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3324 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3325 {
3326 uint8_t *here = start;
3327 
3328 for (;;)
3329   {
3330   size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3331 
3332   if (rlen > 1000)
3333     {
3334     size_t dlen;
3335 
3336     /* If libreadline or libedit support is required, use readline() to read a
3337     line if the input is a terminal. Note that readline() removes the trailing
3338     newline, so we must put it back again, to be compatible with fgets(). */
3339 
3340 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3341     if (INTERACTIVE(f))
3342       {
3343       size_t len;
3344       char *s = readline(prompt);
3345       if (s == NULL) return (here == start)? NULL : start;
3346       len = strlen(s);
3347       if (len > 0) add_history(s);
3348       if (len > rlen - 1) len = rlen - 1;
3349       memcpy(here, s, len);
3350       here[len] = '\n';
3351       here[len+1] = 0;
3352       free(s);
3353       }
3354     else
3355 #endif
3356 
3357     /* Read the next line by normal means, prompting if the file is a tty. */
3358 
3359       {
3360       if (INTERACTIVE(f)) printf("%s", prompt);
3361       if (fgets((char *)here, rlen,  f) == NULL)
3362         return (here == start)? NULL : start;
3363       }
3364 
3365     dlen = strlen((char *)here);
3366     here += dlen;
3367 
3368     /* Check for end of line reached. Take care not to read data from before
3369     start (dlen will be zero for a file starting with a binary zero). */
3370 
3371     if (here > start && here[-1] == '\n') return start;
3372 
3373     /* If we have not read a newline when reading a file, we have either filled
3374     the buffer or reached the end of the file. We can detect the former by
3375     checking that the string fills the buffer, and the latter by feof(). If
3376     neither of these is true, it means we read a binary zero which has caused
3377     strlen() to give a short length. This is a hard error because pcre2test
3378     expects to work with C strings. */
3379 
3380     if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3381       {
3382       fprintf(outfile, "** Binary zero encountered in input\n");
3383       fprintf(outfile, "** pcre2test run abandoned\n");
3384       exit(1);
3385       }
3386     }
3387 
3388   else
3389     {
3390     size_t start_offset = start - buffer;
3391     size_t here_offset = here - buffer;
3392     expand_input_buffers();
3393     start = buffer + start_offset;
3394     here = buffer + here_offset;
3395     }
3396   }
3397 
3398 /* Control never gets here */
3399 }
3400 
3401 
3402 
3403 /*************************************************
3404 *         Case-independent strncmp() function    *
3405 *************************************************/
3406 
3407 /*
3408 Arguments:
3409   s         first string
3410   t         second string
3411   n         number of characters to compare
3412 
3413 Returns:    < 0, = 0, or > 0, according to the comparison
3414 */
3415 
3416 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3417 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3418 {
3419 while (n--)
3420   {
3421   int c = tolower(*s++) - tolower(*t++);
3422   if (c != 0) return c;
3423   }
3424 return 0;
3425 }
3426 
3427 
3428 
3429 /*************************************************
3430 *          Scan the main modifier list           *
3431 *************************************************/
3432 
3433 /* This function searches the modifier list for a long modifier name.
3434 
3435 Argument:
3436   p         start of the name
3437   lenp      length of the name
3438 
3439 Returns:    an index in the modifier list, or -1 on failure
3440 */
3441 
3442 static int
scan_modifiers(const uint8_t * p,unsigned int len)3443 scan_modifiers(const uint8_t *p, unsigned int len)
3444 {
3445 int bot = 0;
3446 int top = MODLISTCOUNT;
3447 
3448 while (top > bot)
3449   {
3450   int mid = (bot + top)/2;
3451   unsigned int mlen = strlen(modlist[mid].name);
3452   int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3453   if (c == 0)
3454     {
3455     if (len == mlen) return mid;
3456     c = (int)len - (int)mlen;
3457     }
3458   if (c > 0) bot = mid + 1; else top = mid;
3459   }
3460 
3461 return -1;
3462 
3463 }
3464 
3465 
3466 
3467 /*************************************************
3468 *        Check a modifer and find its field      *
3469 *************************************************/
3470 
3471 /* This function is called when a modifier has been identified. We check that
3472 it is allowed here and find the field that is to be changed.
3473 
3474 Arguments:
3475   m          the modifier list entry
3476   ctx        CTX_PAT     => pattern context
3477              CTX_POPPAT  => pattern context for popped pattern
3478              CTX_DEFPAT  => default pattern context
3479              CTX_DAT     => data context
3480              CTX_DEFDAT  => default data context
3481   pctl       point to pattern control block
3482   dctl       point to data control block
3483   c          a single character or 0
3484 
3485 Returns:     a field pointer or NULL
3486 */
3487 
3488 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3489 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3490 {
3491 void *field = NULL;
3492 PCRE2_SIZE offset = m->offset;
3493 
3494 if (restrict_for_perl_test) switch(m->which)
3495   {
3496   case MOD_PNDP:
3497   case MOD_PATP:
3498   case MOD_PDP:
3499   break;
3500 
3501   default:
3502   fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3503     m->name);
3504   return NULL;
3505   }
3506 
3507 switch (m->which)
3508   {
3509   case MOD_CTC:  /* Compile context modifier */
3510   if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3511     else if (ctx == CTX_PAT) field = PTR(pat_context);
3512   break;
3513 
3514   case MOD_CTM:  /* Match context modifier */
3515   if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3516     else if (ctx == CTX_DAT) field = PTR(dat_context);
3517   break;
3518 
3519   case MOD_DAT:  /* Data line modifier */
3520   if (dctl != NULL) field = dctl;
3521   break;
3522 
3523   case MOD_PAT:    /* Pattern modifier */
3524   case MOD_PATP:   /* Allowed for Perl test */
3525   if (pctl != NULL) field = pctl;
3526   break;
3527 
3528   case MOD_PD:   /* Pattern or data line modifier */
3529   case MOD_PDP:  /* Ditto, allowed for Perl test */
3530   case MOD_PND:  /* Ditto, but not default pattern */
3531   case MOD_PNDP: /* Ditto, allowed for Perl test */
3532   if (dctl != NULL) field = dctl;
3533     else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3534              ctx != CTX_DEFPAT))
3535       field = pctl;
3536   break;
3537   }
3538 
3539 if (field == NULL)
3540   {
3541   if (c == 0)
3542     fprintf(outfile, "** '%s' is not valid here\n", m->name);
3543   else
3544     fprintf(outfile, "** /%c is not valid here\n", c);
3545   return NULL;
3546   }
3547 
3548 return (char *)field + offset;
3549 }
3550 
3551 
3552 
3553 /*************************************************
3554 *            Decode a modifier list              *
3555 *************************************************/
3556 
3557 /* A pointer to a control block is NULL when called in cases when that block is
3558 not relevant. They are never all relevant in one call. At least one of patctl
3559 and datctl is NULL. The second argument specifies which context to use for
3560 modifiers that apply to contexts.
3561 
3562 Arguments:
3563   p          point to modifier string
3564   ctx        CTX_PAT     => pattern context
3565              CTX_POPPAT  => pattern context for popped pattern
3566              CTX_DEFPAT  => default pattern context
3567              CTX_DAT     => data context
3568              CTX_DEFDAT  => default data context
3569   pctl       point to pattern control block
3570   dctl       point to data control block
3571 
3572 Returns: TRUE if successful decode, FALSE otherwise
3573 */
3574 
3575 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3576 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3577 {
3578 uint8_t *ep, *pp;
3579 long li;
3580 unsigned long uli;
3581 BOOL first = TRUE;
3582 
3583 for (;;)
3584   {
3585   void *field;
3586   modstruct *m;
3587   BOOL off = FALSE;
3588   unsigned int i, len;
3589   int index;
3590   char *endptr;
3591 
3592   /* Skip white space and commas. */
3593 
3594   while (isspace(*p) || *p == ',') p++;
3595   if (*p == 0) break;
3596 
3597   /* Find the end of the item; lose trailing whitespace at end of line. */
3598 
3599   for (ep = p; *ep != 0 && *ep != ','; ep++);
3600   if (*ep == 0)
3601     {
3602     while (ep > p && isspace(ep[-1])) ep--;
3603     *ep = 0;
3604     }
3605 
3606   /* Remember if the first character is '-'. */
3607 
3608   if (*p == '-')
3609     {
3610     off = TRUE;
3611     p++;
3612     }
3613 
3614   /* Find the length of a full-length modifier name, and scan for it. */
3615 
3616   pp = p;
3617   while (pp < ep && *pp != '=') pp++;
3618   index = scan_modifiers(p, pp - p);
3619 
3620   /* If the first modifier is unrecognized, try to interpret it as a sequence
3621   of single-character abbreviated modifiers. None of these modifiers have any
3622   associated data. They just set options or control bits. */
3623 
3624   if (index < 0)
3625     {
3626     uint32_t cc;
3627     uint8_t *mp = p;
3628 
3629     if (!first)
3630       {
3631       fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3632       if (ep - p == 1)
3633         fprintf(outfile, "** Single-character modifiers must come first\n");
3634       return FALSE;
3635       }
3636 
3637     for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3638       {
3639       for (i = 0; i < C1MODLISTCOUNT; i++)
3640         if (cc == c1modlist[i].onechar) break;
3641 
3642       if (i >= C1MODLISTCOUNT)
3643         {
3644         fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3645           *p, (int)(ep-mp), mp);
3646         return FALSE;
3647         }
3648 
3649       if (c1modlist[i].index >= 0)
3650         {
3651         index = c1modlist[i].index;
3652         }
3653 
3654       else
3655         {
3656         index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3657           strlen(c1modlist[i].fullname));
3658         if (index < 0)
3659           {
3660           fprintf(outfile, "** Internal error: single-character equivalent "
3661             "modifier '%s' not found\n", c1modlist[i].fullname);
3662           return FALSE;
3663           }
3664         c1modlist[i].index = index;     /* Cache for next time */
3665         }
3666 
3667       field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3668       if (field == NULL) return FALSE;
3669 
3670       /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3671       PCRE2_EXTENDED_MORE. */
3672 
3673       if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3674         {
3675         *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3676         *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3677         }
3678       else
3679         *((uint32_t *)field) |= modlist[index].value;
3680       }
3681 
3682     continue;    /* With tne next (fullname) modifier */
3683     }
3684 
3685   /* We have a match on a full-name modifier. Check for the existence of data
3686   when needed. */
3687 
3688   m = modlist + index;      /* Save typing */
3689   if (m->type != MOD_CTL && m->type != MOD_OPT &&
3690       (m->type != MOD_IND || *pp == '='))
3691     {
3692     if (*pp++ != '=')
3693       {
3694       fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3695       return FALSE;
3696       }
3697     if (off)
3698       {
3699       fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3700       return FALSE;
3701       }
3702     }
3703 
3704   /* These on/off types have no data. */
3705 
3706   else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3707     {
3708     fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3709     return FALSE;
3710     }
3711 
3712   /* Set the data length for those types that have data. Then find the field
3713   that is to be set. If check_modifier() returns NULL, it has already output an
3714   error message. */
3715 
3716   len = ep - pp;
3717   field = check_modifier(m, ctx, pctl, dctl, 0);
3718   if (field == NULL) return FALSE;
3719 
3720   /* Process according to data type. */
3721 
3722   switch (m->type)
3723     {
3724     case MOD_CTL:
3725     case MOD_OPT:
3726     if (off) *((uint32_t *)field) &= ~m->value;
3727       else *((uint32_t *)field) |= m->value;
3728     break;
3729 
3730     case MOD_BSR:
3731     if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3732       {
3733 #ifdef BSR_ANYCRLF
3734       *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3735 #else
3736       *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3737 #endif
3738       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3739         else dctl->control2 &= ~CTL2_BSR_SET;
3740       }
3741     else
3742       {
3743       if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3744         *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3745       else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3746         *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3747       else goto INVALID_VALUE;
3748       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3749         else dctl->control2 |= CTL2_BSR_SET;
3750       }
3751     pp = ep;
3752     break;
3753 
3754     case MOD_CHR:  /* A single character */
3755     *((uint32_t *)field) = *pp++;
3756     break;
3757 
3758     case MOD_CON:  /* A convert type/options list */
3759     for (;; pp++)
3760       {
3761       uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3762       len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3763       for (i = 0; i < convertlistcount; i++)
3764         {
3765         if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3766           {
3767           if (*((uint32_t *)field) == CONVERT_UNSET)
3768             *((uint32_t *)field) = convertlist[i].option;
3769           else
3770             *((uint32_t *)field) |= convertlist[i].option;
3771           break;
3772           }
3773         }
3774       if (i >= convertlistcount) goto INVALID_VALUE;
3775       pp += len;
3776       if (*pp != ':') break;
3777       }
3778     break;
3779 
3780     case MOD_IN2:    /* One or two unsigned integers */
3781     if (!isdigit(*pp)) goto INVALID_VALUE;
3782     uli = strtoul((const char *)pp, &endptr, 10);
3783     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3784     ((uint32_t *)field)[0] = (uint32_t)uli;
3785     if (*endptr == ':')
3786       {
3787       uli = strtoul((const char *)endptr+1, &endptr, 10);
3788       if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3789       ((uint32_t *)field)[1] = (uint32_t)uli;
3790       }
3791     else ((uint32_t *)field)[1] = 0;
3792     pp = (uint8_t *)endptr;
3793     break;
3794 
3795     /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3796     less than ULONG_MAX. So first test for overflowing the long int, and then
3797     test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3798 
3799     case MOD_SIZ:    /* PCRE2_SIZE value */
3800     if (!isdigit(*pp)) goto INVALID_VALUE;
3801     uli = strtoul((const char *)pp, &endptr, 10);
3802     if (uli == ULONG_MAX) goto INVALID_VALUE;
3803 #if ULONG_MAX > PCRE2_SIZE_MAX
3804     if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3805 #endif
3806     *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3807     pp = (uint8_t *)endptr;
3808     break;
3809 
3810     case MOD_IND:    /* Unsigned integer with default */
3811     if (len == 0)
3812       {
3813       *((uint32_t *)field) = (uint32_t)(m->value);
3814       break;
3815       }
3816     /* Fall through */
3817 
3818     case MOD_INT:    /* Unsigned integer */
3819     if (!isdigit(*pp)) goto INVALID_VALUE;
3820     uli = strtoul((const char *)pp, &endptr, 10);
3821     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3822     *((uint32_t *)field) = (uint32_t)uli;
3823     pp = (uint8_t *)endptr;
3824     break;
3825 
3826     case MOD_INS:   /* Signed integer */
3827     if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3828     li = strtol((const char *)pp, &endptr, 10);
3829     if (S32OVERFLOW(li)) goto INVALID_VALUE;
3830     *((int32_t *)field) = (int32_t)li;
3831     pp = (uint8_t *)endptr;
3832     break;
3833 
3834     case MOD_NL:
3835     for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3836       if (len == strlen(newlines[i]) &&
3837         strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3838     if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3839     if (i == 0)
3840       {
3841       *((uint16_t *)field) = NEWLINE_DEFAULT;
3842       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
3843         else dctl->control2 &= ~CTL2_NL_SET;
3844       }
3845     else
3846       {
3847       *((uint16_t *)field) = i;
3848       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
3849         else dctl->control2 |= CTL2_NL_SET;
3850       }
3851     pp = ep;
3852     break;
3853 
3854     case MOD_NN:              /* Name or (signed) number; may be several */
3855     if (isdigit(*pp) || *pp == '-')
3856       {
3857       int ct = MAXCPYGET - 1;
3858       int32_t value;
3859       li = strtol((const char *)pp, &endptr, 10);
3860       if (S32OVERFLOW(li)) goto INVALID_VALUE;
3861       value = (int32_t)li;
3862       field = (char *)field - m->offset + m->value;      /* Adjust field ptr */
3863       if (value >= 0)                                    /* Add new number */
3864         {
3865         while (*((int32_t *)field) >= 0 && ct-- > 0)   /* Skip previous */
3866           field = (char *)field + sizeof(int32_t);
3867         if (ct <= 0)
3868           {
3869           fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3870           return FALSE;
3871           }
3872         }
3873       *((int32_t *)field) = value;
3874       if (ct > 0) ((int32_t *)field)[1] = -1;
3875       pp = (uint8_t *)endptr;
3876       }
3877 
3878     /* Multiple strings are put end to end. */
3879 
3880     else
3881       {
3882       char *nn = (char *)field;
3883       if (len > 0)                    /* Add new name */
3884         {
3885         if (len > MAX_NAME_SIZE)
3886           {
3887           fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3888           return FALSE;
3889           }
3890         while (*nn != 0) nn += strlen(nn) + 1;
3891         if (nn + len + 2 - (char *)field > LENCPYGET)
3892           {
3893           fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3894             m->name);
3895           return FALSE;
3896           }
3897         memcpy(nn, pp, len);
3898         }
3899       nn[len] = 0 ;
3900       nn[len+1] = 0;
3901       pp = ep;
3902       }
3903     break;
3904 
3905     case MOD_STR:
3906     if (len + 1 > m->value)
3907       {
3908       fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3909         m->name, m->value - 1);
3910       return FALSE;
3911       }
3912     memcpy(field, pp, len);
3913     ((uint8_t *)field)[len] = 0;
3914     pp = ep;
3915     break;
3916     }
3917 
3918   if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3919     {
3920     fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
3921     return FALSE;
3922     }
3923 
3924   p = pp;
3925   first = FALSE;
3926 
3927   if (ctx == CTX_POPPAT &&
3928      (pctl->options != 0 ||
3929       pctl->tables_id != 0 ||
3930       pctl->locale[0] != 0 ||
3931       (pctl->control & NOTPOP_CONTROLS) != 0))
3932     {
3933     fprintf(outfile, "** '%s' is not valid here\n", m->name);
3934     return FALSE;
3935     }
3936   }
3937 
3938 return TRUE;
3939 
3940 INVALID_VALUE:
3941 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
3942 return FALSE;
3943 }
3944 
3945 
3946 /*************************************************
3947 *             Get info from a pattern            *
3948 *************************************************/
3949 
3950 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
3951 pattern.
3952 
3953 Arguments:
3954   what        code for the required information
3955   where       where to put the answer
3956   unsetok     PCRE2_ERROR_UNSET is an "expected" result
3957 
3958 Returns:      the return from pcre2_pattern_info()
3959 */
3960 
3961 static int
pattern_info(int what,void * where,BOOL unsetok)3962 pattern_info(int what, void *where, BOOL unsetok)
3963 {
3964 int rc;
3965 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL);  /* Exercise the code */
3966 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
3967 if (rc >= 0) return 0;
3968 if (rc != PCRE2_ERROR_UNSET || !unsetok)
3969   {
3970   fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
3971     what);
3972   if (rc == PCRE2_ERROR_BADMODE)
3973     fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3974       "%d-bit mode\n", test_mode,
3975       8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
3976   }
3977 return rc;
3978 }
3979 
3980 
3981 
3982 #ifdef SUPPORT_PCRE2_8
3983 /*************************************************
3984 *             Show something in a list           *
3985 *************************************************/
3986 
3987 /* This function just helps to keep the code that uses it tidier. It's used for
3988 various lists of things where there needs to be introductory text before the
3989 first item. As these calls are all in the POSIX-support code, they happen only
3990 when 8-bit mode is supported. */
3991 
3992 static void
prmsg(const char ** msg,const char * s)3993 prmsg(const char **msg, const char *s)
3994 {
3995 fprintf(outfile, "%s %s", *msg, s);
3996 *msg = "";
3997 }
3998 #endif  /* SUPPORT_PCRE2_8 */
3999 
4000 
4001 
4002 /*************************************************
4003 *                Show control bits               *
4004 *************************************************/
4005 
4006 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4007 Because the bits are unique, this can be used for both pattern and data control
4008 words.
4009 
4010 Arguments:
4011   controls    control bits
4012   controls2   more control bits
4013   before      text to print before
4014 
4015 Returns:      nothing
4016 */
4017 
4018 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4019 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4020 {
4021 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4022   before,
4023   ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4024   ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4025   ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4026   ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4027   ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4028   ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4029   ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4030   ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4031   ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4032   ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4033   ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4034   ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4035   ((controls & CTL_DFA) != 0)? " dfa" : "",
4036   ((controls & CTL_EXPAND) != 0)? " expand" : "",
4037   ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4038   ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
4039   ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4040   ((controls & CTL_GETALL) != 0)? " getall" : "",
4041   ((controls & CTL_GLOBAL) != 0)? " global" : "",
4042   ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4043   ((controls & CTL_INFO) != 0)? " info" : "",
4044   ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4045   ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4046   ((controls & CTL_MARK) != 0)? " mark" : "",
4047   ((controls & CTL_MEMORY) != 0)? " memory" : "",
4048   ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4049   ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4050   ((controls & CTL_POSIX) != 0)? " posix" : "",
4051   ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4052   ((controls & CTL_PUSH) != 0)? " push" : "",
4053   ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4054   ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4055   ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4056   ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4057   ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4058   ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4059   ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4060   ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4061   ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4062   ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4063 }
4064 
4065 
4066 
4067 /*************************************************
4068 *                Show compile options            *
4069 *************************************************/
4070 
4071 /* Called from show_pattern_info() and for unsupported POSIX options.
4072 
4073 Arguments:
4074   options     an options word
4075   before      text to print before
4076   after       text to print after
4077 
4078 Returns:      nothing
4079 */
4080 
4081 static void
show_compile_options(uint32_t options,const char * before,const char * after)4082 show_compile_options(uint32_t options, const char *before, const char *after)
4083 {
4084 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4085 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4086   before,
4087   ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4088   ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4089   ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4090   ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4091   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4092   ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4093   ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4094   ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4095   ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4096   ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4097   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4098   ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4099   ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4100   ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4101   ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4102   ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4103   ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4104   ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4105   ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4106   ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4107   ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4108   ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4109   ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4110   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4111   ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4112   ((options & PCRE2_UCP) != 0)? " ucp" : "",
4113   ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4114   ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4115   ((options & PCRE2_UTF) != 0)? " utf" : "",
4116   after);
4117 }
4118 
4119 
4120 /*************************************************
4121 *           Show compile extra options           *
4122 *************************************************/
4123 
4124 /* Called from show_pattern_info() and for unsupported POSIX options.
4125 
4126 Arguments:
4127   options     an options word
4128   before      text to print before
4129   after       text to print after
4130 
4131 Returns:      nothing
4132 */
4133 
4134 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4135 show_compile_extra_options(uint32_t options, const char *before,
4136   const char *after)
4137 {
4138 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4139 else fprintf(outfile, "%s%s%s%s%s%s",
4140   before,
4141   ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4142   ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4143   ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4144   ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4145   after);
4146 }
4147 
4148 
4149 
4150 #ifdef SUPPORT_PCRE2_8
4151 /*************************************************
4152 *                Show match options              *
4153 *************************************************/
4154 
4155 /* Called for unsupported POSIX options. */
4156 
4157 static void
show_match_options(uint32_t options)4158 show_match_options(uint32_t options)
4159 {
4160 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s",
4161   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4162   ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4163   ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4164   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4165   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4166   ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4167   ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4168   ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4169   ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4170   ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4171   ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4172 }
4173 #endif  /* SUPPORT_PCRE2_8 */
4174 
4175 
4176 
4177 /*************************************************
4178 *      Show memory usage info for a pattern      *
4179 *************************************************/
4180 
4181 static void
show_memory_info(void)4182 show_memory_info(void)
4183 {
4184 uint32_t name_count, name_entry_size;
4185 size_t size, cblock_size;
4186 
4187 /* One of the test_mode values will always be true, but to stop a compiler
4188 warning we must initialize cblock_size. */
4189 
4190 cblock_size = 0;
4191 #ifdef SUPPORT_PCRE2_8
4192 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4193 #endif
4194 #ifdef SUPPORT_PCRE2_16
4195 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4196 #endif
4197 #ifdef SUPPORT_PCRE2_32
4198 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4199 #endif
4200 
4201 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4202 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4203 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4204 fprintf(outfile, "Memory allocation (code space): %d\n",
4205   (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
4206 if (pat_patctl.jit != 0)
4207   {
4208   (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4209   fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
4210   }
4211 }
4212 
4213 
4214 
4215 /*************************************************
4216 *       Show frame size info for a pattern       *
4217 *************************************************/
4218 
4219 static void
show_framesize(void)4220 show_framesize(void)
4221 {
4222 size_t frame_size;
4223 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4224 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
4225 }
4226 
4227 
4228 
4229 /*************************************************
4230 *         Get and output an error message        *
4231 *************************************************/
4232 
4233 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4234 print_error_message(int errorcode, const char *before, const char *after)
4235 {
4236 int len;
4237 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4238 if (len < 0)
4239   {
4240   fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4241     "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4242   }
4243 else
4244   {
4245   fprintf(outfile, "%s", before);
4246   PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4247   fprintf(outfile, "%s", after);
4248   }
4249 return len >= 0;
4250 }
4251 
4252 
4253 /*************************************************
4254 *     Callback function for callout enumeration  *
4255 *************************************************/
4256 
4257 /* The only differences in the callout emumeration block for different code
4258 unit widths are that the pointers to the subject, the most recent MARK, and a
4259 callout argument string point to strings of the appropriate width. Casts can be
4260 used to deal with this.
4261 
4262 Argument:
4263   cb            pointer to enumerate block
4264   callout_data  user data
4265 
4266 Returns:    0
4267 */
4268 
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4269 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4270   void *callout_data)
4271 {
4272 uint32_t i;
4273 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4274 
4275 (void)callout_data;  /* Not currently displayed */
4276 
4277 fprintf(outfile, "Callout ");
4278 if (cb->callout_string != NULL)
4279   {
4280   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4281   fprintf(outfile, "%c", delimiter);
4282   PCHARSV(cb->callout_string, 0,
4283     cb->callout_string_length, utf, outfile);
4284   for (i = 0; callout_start_delims[i] != 0; i++)
4285     if (delimiter == callout_start_delims[i])
4286       {
4287       delimiter = callout_end_delims[i];
4288       break;
4289       }
4290   fprintf(outfile, "%c  ", delimiter);
4291   }
4292 else fprintf(outfile, "%d  ", cb->callout_number);
4293 
4294 fprintf(outfile, "%.*s\n",
4295   (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4296   pbuffer8 + cb->pattern_position);
4297 
4298 return 0;
4299 }
4300 
4301 
4302 
4303 /*************************************************
4304 *        Show information about a pattern        *
4305 *************************************************/
4306 
4307 /* This function is called after a pattern has been compiled if any of the
4308 information-requesting controls have been set.
4309 
4310 Arguments:  none
4311 
4312 Returns:    PR_OK     continue processing next line
4313             PR_SKIP   skip to a blank line
4314             PR_ABEND  abort the pcre2test run
4315 */
4316 
4317 static int
show_pattern_info(void)4318 show_pattern_info(void)
4319 {
4320 uint32_t compile_options, overall_options, extra_options;
4321 
4322 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4323   {
4324   fprintf(outfile, "------------------------------------------------------------------\n");
4325   PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4326   }
4327 
4328 if ((pat_patctl.control & CTL_INFO) != 0)
4329   {
4330   int rc;
4331   void *nametable;
4332   uint8_t *start_bits;
4333   BOOL heap_limit_set, match_limit_set, depth_limit_set;
4334   uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4335     hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4336     depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4337     newline_convention;
4338 
4339   /* Exercise the error route. */
4340 
4341   PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4342   (void)rc;
4343 
4344   /* These info requests may return PCRE2_ERROR_UNSET. */
4345 
4346   switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4347     {
4348     case 0:
4349     heap_limit_set = TRUE;
4350     break;
4351 
4352     case PCRE2_ERROR_UNSET:
4353     heap_limit_set = FALSE;
4354     break;
4355 
4356     default:
4357     return PR_ABEND;
4358     }
4359 
4360   switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4361     {
4362     case 0:
4363     match_limit_set = TRUE;
4364     break;
4365 
4366     case PCRE2_ERROR_UNSET:
4367     match_limit_set = FALSE;
4368     break;
4369 
4370     default:
4371     return PR_ABEND;
4372     }
4373 
4374   switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4375     {
4376     case 0:
4377     depth_limit_set = TRUE;
4378     break;
4379 
4380     case PCRE2_ERROR_UNSET:
4381     depth_limit_set = FALSE;
4382     break;
4383 
4384     default:
4385     return PR_ABEND;
4386     }
4387 
4388   /* These info requests should always succeed. */
4389 
4390   if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4391       pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4392       pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4393       pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4394       pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4395       pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4396       pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4397       pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4398       pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4399       pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4400       pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4401       pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4402       pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4403       pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4404       pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4405       pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4406       pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4407       != 0)
4408     return PR_ABEND;
4409 
4410   fprintf(outfile, "Capturing subpattern count = %d\n", capture_count);
4411 
4412   if (backrefmax > 0)
4413     fprintf(outfile, "Max back reference = %d\n", backrefmax);
4414 
4415   if (maxlookbehind > 0)
4416     fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4417 
4418   if (heap_limit_set)
4419     fprintf(outfile, "Heap limit = %u\n", heap_limit);
4420 
4421   if (match_limit_set)
4422     fprintf(outfile, "Match limit = %u\n", match_limit);
4423 
4424   if (depth_limit_set)
4425     fprintf(outfile, "Depth limit = %u\n", depth_limit);
4426 
4427   if (namecount > 0)
4428     {
4429     fprintf(outfile, "Named capturing subpatterns:\n");
4430     for (; namecount > 0; namecount--)
4431       {
4432       int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4433       uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4434       fprintf(outfile, "  ");
4435       PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4436       while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4437 #ifdef SUPPORT_PCRE2_32
4438       if (test_mode == PCRE32_MODE)
4439         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4440 #endif
4441 #ifdef SUPPORT_PCRE2_16
4442       if (test_mode == PCRE16_MODE)
4443         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4444 #endif
4445 #ifdef SUPPORT_PCRE2_8
4446       if (test_mode == PCRE8_MODE)
4447         fprintf(outfile, "%3d\n", (int)(
4448         ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4449 #endif
4450       nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4451       }
4452     }
4453 
4454   if (hascrorlf)     fprintf(outfile, "Contains explicit CR or LF match\n");
4455   if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4456   if (match_empty)   fprintf(outfile, "May match empty string\n");
4457 
4458   pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4459   pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4460   pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4461 
4462   /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4463   cluttering up the verification output of non-UTF test files. */
4464 
4465   if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4466     {
4467     compile_options &= ~PCRE2_NEVER_UTF;
4468     overall_options &= ~PCRE2_NEVER_UTF;
4469     }
4470 
4471   if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4472     {
4473     compile_options &= ~PCRE2_NEVER_UCP;
4474     overall_options &= ~PCRE2_NEVER_UCP;
4475     }
4476 
4477   if ((compile_options|overall_options) != 0)
4478     {
4479     if (compile_options == overall_options)
4480       show_compile_options(compile_options, "Options:", "\n");
4481     else
4482       {
4483       show_compile_options(compile_options, "Compile options:", "\n");
4484       show_compile_options(overall_options, "Overall options:", "\n");
4485       }
4486     }
4487 
4488   if (extra_options != 0)
4489     show_compile_extra_options(extra_options, "Extra options:", "\n");
4490 
4491   if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4492 
4493   if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4494       (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4495     fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4496       "any Unicode newline" : "CR, LF, or CRLF");
4497 
4498   if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4499     {
4500     switch (newline_convention)
4501       {
4502       case PCRE2_NEWLINE_CR:
4503       fprintf(outfile, "Forced newline is CR\n");
4504       break;
4505 
4506       case PCRE2_NEWLINE_LF:
4507       fprintf(outfile, "Forced newline is LF\n");
4508       break;
4509 
4510       case PCRE2_NEWLINE_CRLF:
4511       fprintf(outfile, "Forced newline is CRLF\n");
4512       break;
4513 
4514       case PCRE2_NEWLINE_ANYCRLF:
4515       fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4516       break;
4517 
4518       case PCRE2_NEWLINE_ANY:
4519       fprintf(outfile, "Forced newline is any Unicode newline\n");
4520       break;
4521 
4522       case PCRE2_NEWLINE_NUL:
4523       fprintf(outfile, "Forced newline is NUL\n");
4524       break;
4525 
4526       default:
4527       break;
4528       }
4529     }
4530 
4531   if (first_ctype == 2)
4532     {
4533     fprintf(outfile, "First code unit at start or follows newline\n");
4534     }
4535   else if (first_ctype == 1)
4536     {
4537     const char *caseless =
4538       ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4539       "" : " (caseless)";
4540     if (PRINTOK(first_cunit))
4541       fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4542     else
4543       {
4544       fprintf(outfile, "First code unit = ");
4545       pchar(first_cunit, FALSE, outfile);
4546       fprintf(outfile, "%s\n", caseless);
4547       }
4548     }
4549   else if (start_bits != NULL)
4550     {
4551     int i;
4552     int c = 24;
4553     fprintf(outfile, "Starting code units: ");
4554     for (i = 0; i < 256; i++)
4555       {
4556       if ((start_bits[i/8] & (1<<(i&7))) != 0)
4557         {
4558         if (c > 75)
4559           {
4560           fprintf(outfile, "\n  ");
4561           c = 2;
4562           }
4563         if (PRINTOK(i) && i != ' ')
4564           {
4565           fprintf(outfile, "%c ", i);
4566           c += 2;
4567           }
4568         else
4569           {
4570           fprintf(outfile, "\\x%02x ", i);
4571           c += 5;
4572           }
4573         }
4574       }
4575     fprintf(outfile, "\n");
4576     }
4577 
4578   if (last_ctype != 0)
4579     {
4580     const char *caseless =
4581       ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4582       "" : " (caseless)";
4583     if (PRINTOK(last_cunit))
4584       fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4585     else
4586       {
4587       fprintf(outfile, "Last code unit = ");
4588       pchar(last_cunit, FALSE, outfile);
4589       fprintf(outfile, "%s\n", caseless);
4590       }
4591     }
4592 
4593   fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4594 
4595   if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4596     {
4597     if (FLD(compiled_code, executable_jit) != NULL)
4598       fprintf(outfile, "JIT compilation was successful\n");
4599     else
4600       {
4601 #ifdef SUPPORT_JIT
4602       fprintf(outfile, "JIT compilation was not successful");
4603       if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4604         return PR_ABEND;
4605       fprintf(outfile, "\n");
4606 #else
4607       fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4608 #endif
4609       }
4610     }
4611   }
4612 
4613 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4614   {
4615   int errorcode;
4616   PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4617   if (errorcode != 0)
4618     {
4619     fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4620     if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4621       return PR_ABEND;
4622     return PR_SKIP;
4623     }
4624   }
4625 
4626 return PR_OK;
4627 }
4628 
4629 
4630 
4631 /*************************************************
4632 *              Handle serialization error        *
4633 *************************************************/
4634 
4635 /* Print an error message after a serialization failure.
4636 
4637 Arguments:
4638   rc         the error code
4639   msg        an initial message for what failed
4640 
4641 Returns:     FALSE if print_error_message() fails
4642 */
4643 
4644 static BOOL
serial_error(int rc,const char * msg)4645 serial_error(int rc, const char *msg)
4646 {
4647 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4648 return print_error_message(rc, "", "\n");
4649 }
4650 
4651 
4652 
4653 /*************************************************
4654 *        Open file for save/load commands        *
4655 *************************************************/
4656 
4657 /* This function decodes the file name and opens the file.
4658 
4659 Arguments:
4660   buffptr     point after the #command
4661   mode        open mode
4662   fptr        points to the FILE variable
4663 
4664 Returns:      PR_OK or PR_ABEND
4665 */
4666 
4667 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr)4668 open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
4669 {
4670 char *endf;
4671 char *filename = (char *)buffptr;
4672 while (isspace(*filename)) filename++;
4673 endf = filename + strlen8(filename);
4674 while (endf > filename && isspace(endf[-1])) endf--;
4675 
4676 if (endf == filename)
4677   {
4678   fprintf(outfile, "** File name expected after #save\n");
4679   return PR_ABEND;
4680   }
4681 
4682 *endf = 0;
4683 *fptr = fopen((const char *)filename, mode);
4684 if (*fptr == NULL)
4685   {
4686   fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4687   return PR_ABEND;
4688   }
4689 
4690 return PR_OK;
4691 }
4692 
4693 
4694 
4695 /*************************************************
4696 *               Process command line             *
4697 *************************************************/
4698 
4699 /* This function is called for lines beginning with # and a character that is
4700 not ! or whitespace, when encountered between tests, which means that there is
4701 no compiled pattern (compiled_code is NULL). The line is in buffer.
4702 
4703 Arguments:  none
4704 
4705 Returns:    PR_OK     continue processing next line
4706             PR_SKIP   skip to a blank line
4707             PR_ABEND  abort the pcre2test run
4708 */
4709 
4710 static int
process_command(void)4711 process_command(void)
4712 {
4713 FILE *f;
4714 PCRE2_SIZE serial_size;
4715 size_t i;
4716 int rc, cmd, cmdlen, yield;
4717 uint16_t first_listed_newline;
4718 const char *cmdname;
4719 uint8_t *argptr, *serial;
4720 
4721 yield = PR_OK;
4722 cmd = CMD_UNKNOWN;
4723 cmdlen = 0;
4724 
4725 for (i = 0; i < cmdlistcount; i++)
4726   {
4727   cmdname = cmdlist[i].name;
4728   cmdlen = strlen(cmdname);
4729   if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4730       isspace(buffer[cmdlen+1]))
4731     {
4732     cmd = cmdlist[i].value;
4733     break;
4734     }
4735   }
4736 
4737 argptr = buffer + cmdlen + 1;
4738 
4739 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
4740   {
4741   fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
4742   return PR_ABEND;
4743   }
4744 
4745 switch(cmd)
4746   {
4747   case CMD_UNKNOWN:
4748   fprintf(outfile, "** Unknown command: %s", buffer);
4749   break;
4750 
4751   case CMD_FORBID_UTF:
4752   forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4753   break;
4754 
4755   case CMD_PERLTEST:
4756   restrict_for_perl_test = TRUE;
4757   break;
4758 
4759   /* Set default pattern modifiers */
4760 
4761   case CMD_PATTERN:
4762   (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4763   if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4764     def_patctl.jit = 7;
4765   break;
4766 
4767   /* Set default subject modifiers */
4768 
4769   case CMD_SUBJECT:
4770   (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4771   break;
4772 
4773   /* Check the default newline, and if not one of those listed, set up the
4774   first one to be forced. An empty list unsets. */
4775 
4776   case CMD_NEWLINE_DEFAULT:
4777   local_newline_default = 0;   /* Unset */
4778   first_listed_newline = 0;
4779   for (;;)
4780     {
4781     while (isspace(*argptr)) argptr++;
4782     if (*argptr == 0) break;
4783     for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4784       {
4785       size_t nlen = strlen(newlines[i]);
4786       if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4787           isspace(argptr[nlen]))
4788         {
4789         if (i == NEWLINE_DEFAULT) return PR_OK;  /* Default is valid */
4790         if (first_listed_newline == 0) first_listed_newline = i;
4791         }
4792       }
4793     while (*argptr != 0 && !isspace(*argptr)) argptr++;
4794     }
4795   local_newline_default = first_listed_newline;
4796   break;
4797 
4798   /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4799   the compiled pattern (e.g. to give information) are permitted. The default
4800   pattern modifiers are ignored. */
4801 
4802   case CMD_POP:
4803   case CMD_POPCOPY:
4804   if (patstacknext <= 0)
4805     {
4806     fprintf(outfile, "** Can't pop off an empty stack\n");
4807     return PR_SKIP;
4808     }
4809   memset(&pat_patctl, 0, sizeof(patctl));   /* Completely unset */
4810   if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4811     return PR_SKIP;
4812 
4813   if (cmd == CMD_POP)
4814     {
4815     SET(compiled_code, patstack[--patstacknext]);
4816     }
4817   else
4818     {
4819     PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4820     }
4821 
4822   if (pat_patctl.jit != 0)
4823     {
4824     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4825     }
4826   if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4827   if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4828   if ((pat_patctl.control & CTL_ANYINFO) != 0)
4829     {
4830     rc = show_pattern_info();
4831     if (rc != PR_OK) return rc;
4832     }
4833   break;
4834 
4835   /* Save the stack of compiled patterns to a file, then empty the stack. */
4836 
4837   case CMD_SAVE:
4838   if (patstacknext <= 0)
4839     {
4840     fprintf(outfile, "** No stacked patterns to save\n");
4841     return PR_OK;
4842     }
4843 
4844   rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
4845   if (rc != PR_OK) return rc;
4846 
4847   PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4848     general_context);
4849   if (rc < 0)
4850     {
4851     fclose(f);
4852     if (!serial_error(rc, "Serialization")) return PR_ABEND;
4853     break;
4854     }
4855 
4856   /* Write the length at the start of the file to make it straightforward to
4857   get the right memory when re-loading. This saves having to read the file size
4858   in different operating systems. To allow for different endianness (even
4859   though reloading with the opposite endianness does not work), write the
4860   length byte-by-byte. */
4861 
4862   for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
4863   if (fwrite(serial, 1, serial_size, f) != serial_size)
4864     {
4865     fprintf(outfile, "** Wrong return from fwrite()\n");
4866     fclose(f);
4867     return PR_ABEND;
4868     }
4869 
4870   fclose(f);
4871   PCRE2_SERIALIZE_FREE(serial);
4872   while(patstacknext > 0)
4873     {
4874     SET(compiled_code, patstack[--patstacknext]);
4875     SUB1(pcre2_code_free, compiled_code);
4876     }
4877   SET(compiled_code, NULL);
4878   break;
4879 
4880   /* Load a set of compiled patterns from a file onto the stack */
4881 
4882   case CMD_LOAD:
4883   rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
4884   if (rc != PR_OK) return rc;
4885 
4886   serial_size = 0;
4887   for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
4888 
4889   serial = malloc(serial_size);
4890   if (serial == NULL)
4891     {
4892     fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
4893       SIZ_CAST serial_size);
4894     fclose(f);
4895     return PR_ABEND;
4896     }
4897 
4898   i = fread(serial, 1, serial_size, f);
4899   fclose(f);
4900 
4901   if (i != serial_size)
4902     {
4903     fprintf(outfile, "** Wrong return from fread()\n");
4904     yield = PR_ABEND;
4905     }
4906   else
4907     {
4908     PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
4909     if (rc < 0)
4910       {
4911       if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
4912       }
4913     else
4914       {
4915       if (rc + patstacknext > PATSTACKSIZE)
4916         {
4917         fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
4918           rc, (rc == 1)? "" : "s");
4919         rc = PATSTACKSIZE - patstacknext;
4920         fprintf(outfile, "** Decoding %d pattern%s\n", rc,
4921           (rc == 1)? "" : "s");
4922         }
4923       PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
4924         general_context);
4925       if (rc < 0)
4926         {
4927         if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
4928         }
4929       else patstacknext += rc;
4930       }
4931     }
4932 
4933   free(serial);
4934   break;
4935   }
4936 
4937 return yield;
4938 }
4939 
4940 
4941 
4942 /*************************************************
4943 *               Process pattern line             *
4944 *************************************************/
4945 
4946 /* This function is called when the input buffer contains the start of a
4947 pattern. The first character is known to be a valid delimiter. The pattern is
4948 read, modifiers are interpreted, and a suitable local context is set up for
4949 this test. The pattern is then compiled.
4950 
4951 Arguments:  none
4952 
4953 Returns:    PR_OK     continue processing next line
4954             PR_SKIP   skip to a blank line
4955             PR_ABEND  abort the pcre2test run
4956 */
4957 
4958 static int
process_pattern(void)4959 process_pattern(void)
4960 {
4961 BOOL utf;
4962 uint32_t k;
4963 uint8_t *p = buffer;
4964 unsigned int delimiter = *p++;
4965 int errorcode;
4966 void *use_pat_context;
4967 uint32_t use_forbid_utf = forbid_utf;
4968 PCRE2_SIZE patlen;
4969 PCRE2_SIZE valgrind_access_length;
4970 PCRE2_SIZE erroroffset;
4971 
4972 /* Initialize the context and pattern/data controls for this test from the
4973 defaults. */
4974 
4975 PATCTXCPY(pat_context, default_pat_context);
4976 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
4977 
4978 /* Find the end of the pattern, reading more lines if necessary. */
4979 
4980 for(;;)
4981   {
4982   while (*p != 0)
4983     {
4984     if (*p == '\\' && p[1] != 0) p++;
4985       else if (*p == delimiter) break;
4986     p++;
4987     }
4988   if (*p != 0) break;
4989   if ((p = extend_inputline(infile, p, "    > ")) == NULL)
4990     {
4991     fprintf(outfile, "** Unexpected EOF\n");
4992     return PR_ABEND;
4993     }
4994   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
4995   }
4996 
4997 /* If the first character after the delimiter is backslash, make the pattern
4998 end with backslash. This is purely to provide a way of testing for the error
4999 message when a pattern ends with backslash. */
5000 
5001 if (p[1] == '\\') *p++ = '\\';
5002 
5003 /* Terminate the pattern at the delimiter, and compute the length. */
5004 
5005 *p++ = 0;
5006 patlen = p - buffer - 2;
5007 
5008 /* Look for modifiers and options after the final delimiter. */
5009 
5010 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5011 utf = (pat_patctl.options & PCRE2_UTF) != 0;
5012 
5013 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5014 exclusive with the utf modifier. */
5015 
5016 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5017   {
5018   if (test_mode == PCRE8_MODE)
5019     {
5020     fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5021     return PR_SKIP;
5022     }
5023   if (utf)
5024     {
5025     fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5026     return PR_SKIP;
5027     }
5028   }
5029 
5030 /* The convert and posix modifiers are mutually exclusive. */
5031 
5032 if (pat_patctl.convert_type != CONVERT_UNSET &&
5033     (pat_patctl.control & CTL_POSIX) != 0)
5034   {
5035   fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5036   return PR_SKIP;
5037   }
5038 
5039 /* Check for mutually exclusive control modifiers. At present, these are all in
5040 the first control word. */
5041 
5042 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5043   {
5044   uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5045   if (c != 0 && c != (c & (~c+1)))
5046     {
5047     show_controls(c, 0, "** Not allowed together:");
5048     fprintf(outfile, "\n");
5049     return PR_SKIP;
5050     }
5051   }
5052 
5053 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5054 specified. */
5055 
5056 if (pat_patctl.jit == 0 &&
5057     (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5058   pat_patctl.jit = 7;
5059 
5060 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5061 in callouts. Convert from hex if requested (literal strings in quotes may be
5062 present within the hexadecimal pairs). The result must necessarily be fewer
5063 characters so will always fit in pbuffer8. */
5064 
5065 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5066   {
5067   uint8_t *pp, *pt;
5068   uint32_t c, d;
5069 
5070   pt = pbuffer8;
5071   for (pp = buffer + 1; *pp != 0; pp++)
5072     {
5073     if (isspace(*pp)) continue;
5074     c = *pp++;
5075 
5076     /* Handle a literal substring */
5077 
5078     if (c == '\'' || c == '"')
5079       {
5080       uint8_t *pq = pp;
5081       for (;; pp++)
5082         {
5083         d = *pp;
5084         if (d == 0)
5085           {
5086           fprintf(outfile, "** Missing closing quote in hex pattern: "
5087             "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5088           return PR_SKIP;
5089           }
5090         if (d == c) break;
5091         *pt++ = d;
5092         }
5093       }
5094 
5095     /* Expect a hex pair */
5096 
5097     else
5098       {
5099       if (!isxdigit(c))
5100         {
5101         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5102           PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5103         return PR_SKIP;
5104         }
5105       if (*pp == 0)
5106         {
5107         fprintf(outfile, "** Odd number of digits in hex pattern\n");
5108         return PR_SKIP;
5109         }
5110       d = *pp;
5111       if (!isxdigit(d))
5112         {
5113         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5114           PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5115         return PR_SKIP;
5116         }
5117       c = toupper(c);
5118       d = toupper(d);
5119       *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5120                (isdigit(d)? (d - '0') : (d - 'A' + 10));
5121       }
5122     }
5123   *pt = 0;
5124   patlen = pt - pbuffer8;
5125   }
5126 
5127 /* If not a hex string, process for repetition expansion if requested. */
5128 
5129 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5130   {
5131   uint8_t *pp, *pt;
5132 
5133   pt = pbuffer8;
5134   for (pp = buffer + 1; *pp != 0; pp++)
5135     {
5136     uint8_t *pc = pp;
5137     uint32_t count = 1;
5138     size_t length = 1;
5139 
5140     /* Check for replication syntax; if not found, the defaults just set will
5141     prevail and one character will be copied. */
5142 
5143     if (pp[0] == '\\' && pp[1] == '[')
5144       {
5145       uint8_t *pe;
5146       for (pe = pp + 2; *pe != 0; pe++)
5147         {
5148         if (pe[0] == ']' && pe[1] == '{')
5149           {
5150           uint32_t clen = pe - pc - 2;
5151           uint32_t i = 0;
5152           unsigned long uli;
5153           char *endptr;
5154 
5155           pe += 2;
5156           uli = strtoul((const char *)pe, &endptr, 10);
5157           if (U32OVERFLOW(uli))
5158             {
5159             fprintf(outfile, "** Pattern repeat count too large\n");
5160             return PR_SKIP;
5161             }
5162 
5163           i = (uint32_t)uli;
5164           pe = (uint8_t *)endptr;
5165           if (*pe == '}')
5166             {
5167             if (i == 0)
5168               {
5169               fprintf(outfile, "** Zero repeat not allowed\n");
5170               return PR_SKIP;
5171               }
5172             pc += 2;
5173             count = i;
5174             length = clen;
5175             pp = pe;
5176             break;
5177             }
5178           }
5179         }
5180       }
5181 
5182     /* Add to output. If the buffer is too small expand it. The function for
5183     expanding buffers always keeps buffer and pbuffer8 in step as far as their
5184     size goes. */
5185 
5186     while (pt + count * length > pbuffer8 + pbuffer8_size)
5187       {
5188       size_t pc_offset = pc - buffer;
5189       size_t pp_offset = pp - buffer;
5190       size_t pt_offset = pt - pbuffer8;
5191       expand_input_buffers();
5192       pc = buffer + pc_offset;
5193       pp = buffer + pp_offset;
5194       pt = pbuffer8 + pt_offset;
5195       }
5196 
5197     for (; count > 0; count--)
5198       {
5199       memcpy(pt, pc, length);
5200       pt += length;
5201       }
5202     }
5203 
5204   *pt = 0;
5205   patlen = pt - pbuffer8;
5206 
5207   if ((pat_patctl.control & CTL_INFO) != 0)
5208     fprintf(outfile, "Expanded: %s\n", pbuffer8);
5209   }
5210 
5211 /* Neither hex nor expanded, just copy the input verbatim. */
5212 
5213 else
5214   {
5215   strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5216   }
5217 
5218 /* Sort out character tables */
5219 
5220 if (pat_patctl.locale[0] != 0)
5221   {
5222   if (pat_patctl.tables_id != 0)
5223     {
5224     fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5225     return PR_SKIP;
5226     }
5227   if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5228     {
5229     fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5230     return PR_SKIP;
5231     }
5232   if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5233     {
5234     strcpy((char *)locale_name, (char *)pat_patctl.locale);
5235     if (locale_tables != NULL) free((void *)locale_tables);
5236     PCRE2_MAKETABLES(locale_tables);
5237     }
5238   use_tables = locale_tables;
5239   }
5240 
5241 else switch (pat_patctl.tables_id)
5242   {
5243   case 0: use_tables = NULL; break;
5244   case 1: use_tables = tables1; break;
5245   case 2: use_tables = tables2; break;
5246   default:
5247   fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
5248   return PR_SKIP;
5249   }
5250 
5251 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5252 
5253 /* Set up for the stackguard test. */
5254 
5255 if (pat_patctl.stackguard_test != 0)
5256   {
5257   PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5258   }
5259 
5260 /* Handle compiling via the POSIX interface, which doesn't support the
5261 timing, showing, or debugging options, nor the ability to pass over
5262 local character tables. Neither does it have 16-bit or 32-bit support. */
5263 
5264 if ((pat_patctl.control & CTL_POSIX) != 0)
5265   {
5266 #ifdef SUPPORT_PCRE2_8
5267   int rc;
5268   int cflags = 0;
5269   const char *msg = "** Ignored with POSIX interface:";
5270 #endif
5271 
5272   if (test_mode != PCRE8_MODE)
5273     {
5274     fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5275     return PR_SKIP;
5276     }
5277 
5278 #ifdef SUPPORT_PCRE2_8
5279   /* Check for features that the POSIX interface does not support. */
5280 
5281   if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5282   if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5283   if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5284   if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5285   if (timeit > 0) prmsg(&msg, "timing");
5286   if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5287 
5288   if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5289     {
5290     show_compile_options(
5291       pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
5292     msg = "";
5293     }
5294 
5295   if ((FLD(pat_context, extra_options) &
5296        ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
5297     {
5298     show_compile_extra_options(
5299       FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
5300         msg, "");
5301     msg = "";
5302     }
5303 
5304   if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5305       (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
5306     {
5307     show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
5308       pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
5309     msg = "";
5310     }
5311 
5312   if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5313   if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5314     prmsg(&msg, "max_pattern_length");
5315   if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5316     prmsg(&msg, "parens_nest_limit");
5317 
5318   if (msg[0] == 0) fprintf(outfile, "\n");
5319 
5320   /* Translate PCRE2 options to POSIX options and then compile. */
5321 
5322   if (utf) cflags |= REG_UTF;
5323   if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5324   if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5325   if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5326   if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5327   if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5328   if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5329   if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5330 
5331   if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5332     {
5333     preg.re_endp = (char *)pbuffer8 + patlen;
5334     cflags |= REG_PEND;
5335     }
5336 
5337   rc = regcomp(&preg, (char *)pbuffer8, cflags);
5338 
5339   /* Compiling failed */
5340 
5341   if (rc != 0)
5342     {
5343     size_t bsize, usize;
5344     int psize;
5345 
5346     preg.re_pcre2_code = NULL;     /* In case something was left in there */
5347     preg.re_match_data = NULL;
5348 
5349     bsize = (pat_patctl.regerror_buffsize != 0)?
5350       pat_patctl.regerror_buffsize : pbuffer8_size;
5351     if (bsize + 8 < pbuffer8_size)
5352       memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5353     usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5354 
5355     /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5356     versions of snprintf() put a zero byte at the end, but others do not.
5357     Therefore, we print a maximum of one less than the size of the buffer. */
5358 
5359     psize = (int)bsize - 1;
5360     fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5361     if (usize > bsize)
5362       {
5363       fprintf(outfile, "** regerror() message truncated\n");
5364       if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5365         fprintf(outfile, "** regerror() buffer overflow\n");
5366       }
5367     return PR_SKIP;
5368     }
5369 
5370   /* Compiling succeeded. Check that the values in the preg block are sensible.
5371   It can happen that pcre2test is accidentally linked with a different POSIX
5372   library which succeeds, but of course puts different things into preg. In
5373   this situation, calling regfree() may cause a segfault (or invalid free() in
5374   valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5375   calling of regfree() on exit. */
5376 
5377   if (preg.re_pcre2_code == NULL ||
5378       ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5379       ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5380       preg.re_match_data == NULL ||
5381       preg.re_cflags != cflags)
5382     {
5383     fprintf(outfile,
5384       "** The regcomp() function returned zero (success), but the values set\n"
5385       "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5386       "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5387       "** some other POSIX regex library.\n**\n");
5388     preg.re_pcre2_code = NULL;
5389     return PR_ABEND;
5390     }
5391 
5392   return PR_OK;
5393 #endif  /* SUPPORT_PCRE2_8 */
5394   }
5395 
5396 /* Handle compiling via the native interface. Controls that act later are
5397 ignored with "push". Replacements are locked out. */
5398 
5399 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5400   {
5401   if (pat_patctl.replacement[0] != 0)
5402     {
5403     fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5404     return PR_OK;
5405     }
5406   if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5407       (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5408     {
5409     show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5410                   pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5411       "** Ignored when compiled pattern is stacked with 'push':");
5412     fprintf(outfile, "\n");
5413     }
5414   if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5415       (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5416     {
5417     show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5418                   pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5419       "** Applies only to compile when pattern is stacked with 'push':");
5420     fprintf(outfile, "\n");
5421     }
5422   }
5423 
5424 /* Convert the input in non-8-bit modes. */
5425 
5426 errorcode = 0;
5427 
5428 #ifdef SUPPORT_PCRE2_16
5429 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5430 #endif
5431 
5432 #ifdef SUPPORT_PCRE2_32
5433 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5434 #endif
5435 
5436 switch(errorcode)
5437   {
5438   case -1:
5439   fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5440     "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5441   return PR_SKIP;
5442 
5443   case -2:
5444   fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5445     "cannot be converted to UTF\n");
5446   return PR_SKIP;
5447 
5448   case -3:
5449   fprintf(outfile, "** Failed: character value greater than 0xffff "
5450     "cannot be converted to 16-bit in non-UTF mode\n");
5451   return PR_SKIP;
5452 
5453   default:
5454   break;
5455   }
5456 
5457 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5458 patlen. If it is to be converted, copy the result back afterwards so that it
5459 ends up back in the usual place. */
5460 
5461 if (pat_patctl.convert_type != CONVERT_UNSET)
5462   {
5463   int rc;
5464   int convert_return = PR_OK;
5465   uint32_t convert_options = pat_patctl.convert_type;
5466   void *converted_pattern;
5467   PCRE2_SIZE converted_length;
5468 
5469   if (pat_patctl.convert_length != 0)
5470     {
5471     converted_length = pat_patctl.convert_length;
5472     converted_pattern = malloc(converted_length * code_unit_size);
5473     if (converted_pattern == NULL)
5474       {
5475       fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5476       return PR_SKIP;
5477       }
5478     }
5479   else converted_pattern = NULL;  /* Let the library allocate */
5480 
5481   if (utf) convert_options |= PCRE2_CONVERT_UTF;
5482   if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5483     convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5484 
5485   CONCTXCPY(con_context, default_con_context);
5486 
5487   if (pat_patctl.convert_glob_escape != 0)
5488     {
5489     uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5490       pat_patctl.convert_glob_escape;
5491     PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5492     if (rc != 0)
5493       {
5494       fprintf(outfile, "** Invalid glob escape '%c'\n",
5495         pat_patctl.convert_glob_escape);
5496       convert_return = PR_SKIP;
5497       goto CONVERT_FINISH;
5498       }
5499     }
5500 
5501   if (pat_patctl.convert_glob_separator != 0)
5502     {
5503     PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5504     if (rc != 0)
5505       {
5506       fprintf(outfile, "** Invalid glob separator '%c'\n",
5507         pat_patctl.convert_glob_separator);
5508       convert_return = PR_SKIP;
5509       goto CONVERT_FINISH;
5510       }
5511     }
5512 
5513   PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5514     &converted_pattern, &converted_length, con_context);
5515 
5516   if (rc != 0)
5517     {
5518     fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5519       SIZ_CAST converted_length);
5520     convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5521     }
5522 
5523   /* Output the converted pattern, then copy it. */
5524 
5525   else
5526     {
5527     PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5528     fprintf(outfile, "\n");
5529     patlen = converted_length;
5530     CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5531     }
5532 
5533   /* Free the converted pattern. */
5534 
5535   CONVERT_FINISH:
5536   if (pat_patctl.convert_length != 0)
5537     free(converted_pattern);
5538   else
5539     PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5540 
5541   /* Return if conversion was unsuccessful. */
5542 
5543   if (convert_return != PR_OK) return convert_return;
5544   }
5545 
5546 /* By default we pass a zero-terminated pattern, but a length is passed if
5547 "use_length" was specified or this is a hex pattern (which might contain binary
5548 zeros). When valgrind is supported, arrange for the unused part of the buffer
5549 to be marked as no access. */
5550 
5551 valgrind_access_length = patlen;
5552 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5553   {
5554   patlen = PCRE2_ZERO_TERMINATED;
5555   valgrind_access_length += 1;  /* For the terminating zero */
5556   }
5557 
5558 #ifdef SUPPORT_VALGRIND
5559 #ifdef SUPPORT_PCRE2_8
5560 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5561   {
5562   VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5563     pbuffer8_size - valgrind_access_length);
5564   }
5565 #endif
5566 #ifdef SUPPORT_PCRE2_16
5567 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5568   {
5569   VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5570     pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5571   }
5572 #endif
5573 #ifdef SUPPORT_PCRE2_32
5574 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5575   {
5576   VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5577     pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5578   }
5579 #endif
5580 #else  /* Valgrind not supported */
5581 (void)valgrind_access_length;  /* Avoid compiler warning */
5582 #endif
5583 
5584 /* If #newline_default has been used and the library was not compiled with an
5585 appropriate default newline setting, local_newline_default will be non-zero. We
5586 use this if there is no explicit newline modifier. */
5587 
5588 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5589   {
5590   SETFLD(pat_context, newline_convention, local_newline_default);
5591   }
5592 
5593 /* The null_context modifier is used to test calling pcre2_compile() with a
5594 NULL context. */
5595 
5596 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5597   NULL : PTR(pat_context);
5598 
5599 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5600 and PCRE2_NEVER_UCP are invalid with it. */
5601 
5602 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5603 
5604 /* Compile many times when timing. */
5605 
5606 if (timeit > 0)
5607   {
5608   int i;
5609   clock_t time_taken = 0;
5610   for (i = 0; i < timeit; i++)
5611     {
5612     clock_t start_time = clock();
5613     PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5614       pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5615         use_pat_context);
5616     time_taken += clock() - start_time;
5617     if (TEST(compiled_code, !=, NULL))
5618       { SUB1(pcre2_code_free, compiled_code); }
5619     }
5620   total_compile_time += time_taken;
5621   fprintf(outfile, "Compile time %.4f milliseconds\n",
5622     (((double)time_taken * 1000.0) / (double)timeit) /
5623       (double)CLOCKS_PER_SEC);
5624   }
5625 
5626 /* A final compile that is used "for real". */
5627 
5628 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
5629   &errorcode, &erroroffset, use_pat_context);
5630 
5631 /* Call the JIT compiler if requested. When timing, we must free and recompile
5632 the pattern each time because that is the only way to free the JIT compiled
5633 code. We know that compilation will always succeed. */
5634 
5635 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5636   {
5637   if (timeit > 0)
5638     {
5639     int i;
5640     clock_t time_taken = 0;
5641     for (i = 0; i < timeit; i++)
5642       {
5643       clock_t start_time;
5644       SUB1(pcre2_code_free, compiled_code);
5645       PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5646         pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5647         use_pat_context);
5648       start_time = clock();
5649       PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit);
5650       time_taken += clock() - start_time;
5651       }
5652     total_jit_compile_time += time_taken;
5653     fprintf(outfile, "JIT compile  %.4f milliseconds\n",
5654       (((double)time_taken * 1000.0) / (double)timeit) /
5655         (double)CLOCKS_PER_SEC);
5656     }
5657   else
5658     {
5659     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5660     }
5661   }
5662 
5663 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5664 and 32-bit buffers can be marked completely undefined, but we must leave the
5665 pattern in the 8-bit buffer defined because it may be read from a callout
5666 during matching. */
5667 
5668 #ifdef SUPPORT_VALGRIND
5669 #ifdef SUPPORT_PCRE2_8
5670 if (test_mode == PCRE8_MODE)
5671   {
5672   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5673     pbuffer8_size - valgrind_access_length);
5674   }
5675 #endif
5676 #ifdef SUPPORT_PCRE2_16
5677 if (test_mode == PCRE16_MODE)
5678   {
5679   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5680   }
5681 #endif
5682 #ifdef SUPPORT_PCRE2_32
5683 if (test_mode == PCRE32_MODE)
5684   {
5685   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5686   }
5687 #endif
5688 #endif
5689 
5690 /* Compilation failed; go back for another re, skipping to blank line
5691 if non-interactive. */
5692 
5693 if (TEST(compiled_code, ==, NULL))
5694   {
5695   fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5696     (int)erroroffset);
5697   if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
5698   return PR_SKIP;
5699   }
5700 
5701 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5702 locked out at compile time, but we must also check for occurrences of \P, \p,
5703 and \X, which are only supported when Unicode is supported. */
5704 
5705 if (forbid_utf != 0)
5706   {
5707   if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5708     {
5709     fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5710       "#forbid_utf command\n");
5711     return PR_SKIP;
5712     }
5713   }
5714 
5715 /* Remember the maximum lookbehind, for partial matching. */
5716 
5717 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5718   return PR_ABEND;
5719 
5720 /* If an explicit newline modifier was given, set the information flag in the
5721 pattern so that it is preserved over push/pop. */
5722 
5723 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
5724   {
5725   SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5726   }
5727 
5728 /* Output code size and other information if requested. */
5729 
5730 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5731 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5732 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5733   {
5734   int rc = show_pattern_info();
5735   if (rc != PR_OK) return rc;
5736   }
5737 
5738 /* The "push" control requests that the compiled pattern be remembered on a
5739 stack. This is mainly for testing the serialization functionality. */
5740 
5741 if ((pat_patctl.control & CTL_PUSH) != 0)
5742   {
5743   if (patstacknext >= PATSTACKSIZE)
5744     {
5745     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5746     return PR_ABEND;
5747     }
5748   patstack[patstacknext++] = PTR(compiled_code);
5749   SET(compiled_code, NULL);
5750   }
5751 
5752 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5753 copy of the pattern, the latter with a copy of its character tables. This tests
5754 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5755 
5756 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5757   {
5758   if (patstacknext >= PATSTACKSIZE)
5759     {
5760     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5761     return PR_ABEND;
5762     }
5763   if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5764     {
5765     PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5766     }
5767   else
5768     {
5769     PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5770       compiled_code); }
5771   }
5772 
5773 return PR_OK;
5774 }
5775 
5776 
5777 
5778 /*************************************************
5779 *          Check heap, match or depth limit      *
5780 *************************************************/
5781 
5782 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
5783 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
5784 
5785 Arguments:
5786   pp        the subject string
5787   ulen      length of subject or PCRE2_ZERO_TERMINATED
5788   errnumber defines which limit to test
5789   msg       string to include in final message
5790 
5791 Returns:    the return from the final match function call
5792 */
5793 
5794 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)5795 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
5796 {
5797 int capcount;
5798 uint32_t min = 0;
5799 uint32_t mid = 64;
5800 uint32_t max = UINT32_MAX;
5801 
5802 PCRE2_SET_MATCH_LIMIT(dat_context, max);
5803 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
5804 PCRE2_SET_HEAP_LIMIT(dat_context, max);
5805 
5806 for (;;)
5807   {
5808   uint32_t stack_start = 0;
5809 
5810   if (errnumber == PCRE2_ERROR_HEAPLIMIT)
5811     {
5812     PCRE2_SET_HEAP_LIMIT(dat_context, mid);
5813     }
5814   else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
5815     {
5816     PCRE2_SET_MATCH_LIMIT(dat_context, mid);
5817     }
5818   else
5819     {
5820     PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
5821     }
5822 
5823   if ((dat_datctl.control & CTL_DFA) != 0)
5824     {
5825     stack_start = DFA_START_RWS_SIZE/1024;
5826     if (dfa_workspace == NULL)
5827       dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5828     if (dfa_matched++ == 0)
5829       dfa_workspace[0] = -1;  /* To catch bad restart */
5830     PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5831       dat_datctl.options, match_data,
5832       PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
5833     }
5834 
5835   else if ((pat_patctl.control & CTL_JITFAST) != 0)
5836     PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5837       dat_datctl.options, match_data, PTR(dat_context));
5838 
5839   else
5840     {
5841     stack_start = START_FRAMES_SIZE/1024;
5842     PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5843       dat_datctl.options, match_data, PTR(dat_context));
5844     }
5845 
5846   if (capcount == errnumber)
5847     {
5848     if ((mid & 0x80000000u) != 0)
5849       {
5850       fprintf(outfile, "Can't find minimum %s limit: check pattern for "
5851         "restriction\n", msg);
5852       break;
5853       }
5854 
5855     min = mid;
5856     mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
5857     }
5858   else if (capcount >= 0 ||
5859            capcount == PCRE2_ERROR_NOMATCH ||
5860            capcount == PCRE2_ERROR_PARTIAL)
5861     {
5862     /* If we've not hit the error with a heap limit less than the size of the
5863     initial stack frame vector (for pcre2_match()) or the initial stack
5864     workspace vector (for pcre2_dfa_match()), the heap is not being used, so
5865     the minimum limit is zero; there's no need to go on. The other limits are
5866     always greater than zero. */
5867 
5868     if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
5869       {
5870       fprintf(outfile, "Minimum %s limit = 0\n", msg);
5871       break;
5872       }
5873     if (mid == min + 1)
5874       {
5875       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
5876       break;
5877       }
5878     max = mid;
5879     mid = (min + max)/2;
5880     }
5881   else break;    /* Some other error */
5882   }
5883 
5884 return capcount;
5885 }
5886 
5887 
5888 
5889 /*************************************************
5890 *              Callout function                  *
5891 *************************************************/
5892 
5893 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
5894 we are in the match (unless suppressed). Yield zero unless more callouts than
5895 the fail count, or the callout data is not zero. The only differences in the
5896 callout block for different code unit widths are that the pointers to the
5897 subject, the most recent MARK, and a callout argument string point to strings
5898 of the appropriate width. Casts can be used to deal with this.
5899 
5900 Argument:  a pointer to a callout block
5901 Return:
5902 */
5903 
5904 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)5905 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
5906 {
5907 FILE *f, *fdefault;
5908 uint32_t i, pre_start, post_start, subject_length;
5909 PCRE2_SIZE current_position;
5910 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5911 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
5912 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
5913 
5914 /* The FILE f is used for echoing the subject string if it is non-NULL. This
5915 happens only once in simple cases, but we want to repeat after any additional
5916 output caused by CALLOUT_EXTRA. */
5917 
5918 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
5919   NULL : outfile;
5920 
5921 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
5922   {
5923   f = outfile;
5924   switch (cb->callout_flags)
5925     {
5926     case PCRE2_CALLOUT_BACKTRACK:
5927     fprintf(f, "Backtrack\n");
5928     break;
5929 
5930     case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
5931     fprintf(f, "Backtrack\nNo other matching paths\n");
5932     /* Fall through */
5933 
5934     case PCRE2_CALLOUT_STARTMATCH:
5935     fprintf(f, "New match attempt\n");
5936     break;
5937 
5938     default:
5939     f = fdefault;
5940     break;
5941     }
5942   }
5943 else f = fdefault;
5944 
5945 /* For a callout with a string argument, show the string first because there
5946 isn't a tidy way to fit it in the rest of the data. */
5947 
5948 if (cb->callout_string != NULL)
5949   {
5950   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
5951   fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
5952     SIZ_CAST cb->callout_string_offset, delimiter);
5953   PCHARSV(cb->callout_string, 0,
5954     cb->callout_string_length, utf, outfile);
5955   for (i = 0; callout_start_delims[i] != 0; i++)
5956     if (delimiter == callout_start_delims[i])
5957       {
5958       delimiter = callout_end_delims[i];
5959       break;
5960       }
5961   fprintf(outfile, "%c", delimiter);
5962   if (!callout_capture) fprintf(outfile, "\n");
5963   }
5964 
5965 /* Show captured strings if required */
5966 
5967 if (callout_capture)
5968   {
5969   if (cb->callout_string == NULL)
5970     fprintf(outfile, "Callout %d:", cb->callout_number);
5971   fprintf(outfile, " last capture = %d\n", cb->capture_last);
5972   for (i = 2; i < cb->capture_top * 2; i += 2)
5973     {
5974     fprintf(outfile, "%2d: ", i/2);
5975     if (cb->offset_vector[i] == PCRE2_UNSET)
5976       fprintf(outfile, "<unset>");
5977     else
5978       {
5979       PCHARSV(cb->subject, cb->offset_vector[i],
5980         cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
5981       }
5982     fprintf(outfile, "\n");
5983     }
5984   }
5985 
5986 /* Unless suppressed, re-print the subject in canonical form (with escapes for
5987 non-printing characters), the first time, or if giving full details. On
5988 subsequent calls in the same match, we use PCHARS() just to find the printed
5989 lengths of the substrings. */
5990 
5991 if (callout_where)
5992   {
5993   if (f != NULL) fprintf(f, "--->");
5994 
5995   /* The subject before the match start. */
5996 
5997   PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
5998 
5999   /* If a lookbehind is involved, the current position may be earlier than the
6000   match start. If so, use the match start instead. */
6001 
6002   current_position = (cb->current_position >= cb->start_match)?
6003     cb->current_position : cb->start_match;
6004 
6005   /* The subject between the match start and the current position. */
6006 
6007   PCHARS(post_start, cb->subject, cb->start_match,
6008     current_position - cb->start_match, utf, f);
6009 
6010   /* Print from the current position to the end. */
6011 
6012   PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6013     utf, f);
6014 
6015   /* Calculate the total subject printed length (no print). */
6016 
6017   PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6018 
6019   if (f != NULL) fprintf(f, "\n");
6020 
6021   /* For automatic callouts, show the pattern offset. Otherwise, for a
6022   numerical callout whose number has not already been shown with captured
6023   strings, show the number here. A callout with a string argument has been
6024   displayed above. */
6025 
6026   if (cb->callout_number == 255)
6027     {
6028     fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6029     if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
6030     }
6031   else
6032     {
6033     if (callout_capture || cb->callout_string != NULL) fprintf(outfile, "    ");
6034       else fprintf(outfile, "%3d ", cb->callout_number);
6035     }
6036 
6037   /* Now show position indicators */
6038 
6039   for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6040   fprintf(outfile, "^");
6041 
6042   if (post_start > 0)
6043     {
6044     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6045     fprintf(outfile, "^");
6046     }
6047 
6048   for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6049     fprintf(outfile, " ");
6050 
6051   if (cb->next_item_length != 0)
6052     fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6053       pbuffer8 + cb->pattern_position);
6054   else
6055     fprintf(outfile, "End of pattern");
6056 
6057   fprintf(outfile, "\n");
6058   }
6059 
6060 first_callout = FALSE;
6061 
6062 /* Show any mark info */
6063 
6064 if (cb->mark != last_callout_mark)
6065   {
6066   if (cb->mark == NULL)
6067     fprintf(outfile, "Latest Mark: <unset>\n");
6068   else
6069     {
6070     fprintf(outfile, "Latest Mark: ");
6071     PCHARSV(cb->mark, 0, -1, utf, outfile);
6072     putc('\n', outfile);
6073     }
6074   last_callout_mark = cb->mark;
6075   }
6076 
6077 /* Show callout data */
6078 
6079 if (callout_data_ptr != NULL)
6080   {
6081   int callout_data = *((int32_t *)callout_data_ptr);
6082   if (callout_data != 0)
6083     {
6084     fprintf(outfile, "Callout data = %d\n", callout_data);
6085     return callout_data;
6086     }
6087   }
6088 
6089 /* Keep count and give the appropriate return code */
6090 
6091 callout_count++;
6092 
6093 if (cb->callout_number == dat_datctl.cerror[0] &&
6094     callout_count >= dat_datctl.cerror[1])
6095   return PCRE2_ERROR_CALLOUT;
6096 
6097 if (cb->callout_number == dat_datctl.cfail[0] &&
6098     callout_count >= dat_datctl.cfail[1])
6099   return 1;
6100 
6101 return 0;
6102 }
6103 
6104 
6105 
6106 /*************************************************
6107 *       Handle *MARK and copy/get tests          *
6108 *************************************************/
6109 
6110 /* This function is called after complete and partial matches. It runs the
6111 tests for substring extraction.
6112 
6113 Arguments:
6114   utf       TRUE for utf
6115   capcount  return from pcre2_match()
6116 
6117 Returns:    FALSE if print_error_message() fails
6118 */
6119 
6120 static BOOL
copy_and_get(BOOL utf,int capcount)6121 copy_and_get(BOOL utf, int capcount)
6122 {
6123 int i;
6124 uint8_t *nptr;
6125 
6126 /* Test copy strings by number */
6127 
6128 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6129   {
6130   int rc;
6131   PCRE2_SIZE length, length2;
6132   uint32_t copybuffer[256];
6133   uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6134   length = sizeof(copybuffer)/code_unit_size;
6135   PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6136   if (rc < 0)
6137     {
6138     fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6139     if (!print_error_message(rc, "", "\n")) return FALSE;
6140     }
6141   else
6142     {
6143     PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6144     if (rc < 0)
6145       {
6146       fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6147       if (!print_error_message(rc, "", "\n")) return FALSE;
6148       }
6149     else if (length2 != length)
6150       {
6151       fprintf(outfile, "Mismatched substring lengths: %"
6152         SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6153       }
6154     fprintf(outfile, "%2dC ", n);
6155     PCHARSV(copybuffer, 0, length, utf, outfile);
6156     fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6157     }
6158   }
6159 
6160 /* Test copy strings by name */
6161 
6162 nptr = dat_datctl.copy_names;
6163 for (;;)
6164   {
6165   int rc;
6166   int groupnumber;
6167   PCRE2_SIZE length, length2;
6168   uint32_t copybuffer[256];
6169   int namelen = strlen((const char *)nptr);
6170 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6171   PCRE2_SIZE cnl = namelen;
6172 #endif
6173   if (namelen == 0) break;
6174 
6175 #ifdef SUPPORT_PCRE2_8
6176   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6177 #endif
6178 #ifdef SUPPORT_PCRE2_16
6179   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6180 #endif
6181 #ifdef SUPPORT_PCRE2_32
6182   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6183 #endif
6184 
6185   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6186   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6187     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6188 
6189   length = sizeof(copybuffer)/code_unit_size;
6190   PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6191   if (rc < 0)
6192     {
6193     fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6194     if (!print_error_message(rc, "", "\n")) return FALSE;
6195     }
6196   else
6197     {
6198     PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6199     if (rc < 0)
6200       {
6201       fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6202       if (!print_error_message(rc, "", "\n")) return FALSE;
6203       }
6204     else if (length2 != length)
6205       {
6206       fprintf(outfile, "Mismatched substring lengths: %"
6207         SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6208       }
6209     fprintf(outfile, "  C ");
6210     PCHARSV(copybuffer, 0, length, utf, outfile);
6211     fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6212     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6213       else fprintf(outfile, " (non-unique)\n");
6214     }
6215   nptr += namelen + 1;
6216   }
6217 
6218 /* Test get strings by number */
6219 
6220 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6221   {
6222   int rc;
6223   PCRE2_SIZE length;
6224   void *gotbuffer;
6225   uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6226   PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6227   if (rc < 0)
6228     {
6229     fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6230     if (!print_error_message(rc, "", "\n")) return FALSE;
6231     }
6232   else
6233     {
6234     fprintf(outfile, "%2dG ", n);
6235     PCHARSV(gotbuffer, 0, length, utf, outfile);
6236     fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6237     PCRE2_SUBSTRING_FREE(gotbuffer);
6238     }
6239   }
6240 
6241 /* Test get strings by name */
6242 
6243 nptr = dat_datctl.get_names;
6244 for (;;)
6245   {
6246   PCRE2_SIZE length;
6247   void *gotbuffer;
6248   int rc;
6249   int groupnumber;
6250   int namelen = strlen((const char *)nptr);
6251 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6252   PCRE2_SIZE cnl = namelen;
6253 #endif
6254   if (namelen == 0) break;
6255 
6256 #ifdef SUPPORT_PCRE2_8
6257   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6258 #endif
6259 #ifdef SUPPORT_PCRE2_16
6260   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6261 #endif
6262 #ifdef SUPPORT_PCRE2_32
6263   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6264 #endif
6265 
6266   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6267   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6268     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6269 
6270   PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6271   if (rc < 0)
6272     {
6273     fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6274     if (!print_error_message(rc, "", "\n")) return FALSE;
6275     }
6276   else
6277     {
6278     fprintf(outfile, "  G ");
6279     PCHARSV(gotbuffer, 0, length, utf, outfile);
6280     fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6281     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6282       else fprintf(outfile, " (non-unique)\n");
6283     PCRE2_SUBSTRING_FREE(gotbuffer);
6284     }
6285   nptr += namelen + 1;
6286   }
6287 
6288 /* Test getting the complete list of captured strings. */
6289 
6290 if ((dat_datctl.control & CTL_GETALL) != 0)
6291   {
6292   int rc;
6293   void **stringlist;
6294   PCRE2_SIZE *lengths;
6295   PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6296   if (rc < 0)
6297     {
6298     fprintf(outfile, "get substring list failed (%d): ", rc);
6299     if (!print_error_message(rc, "", "\n")) return FALSE;
6300     }
6301   else
6302     {
6303     for (i = 0; i < capcount; i++)
6304       {
6305       fprintf(outfile, "%2dL ", i);
6306       PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6307       putc('\n', outfile);
6308       }
6309     if (stringlist[i] != NULL)
6310       fprintf(outfile, "string list not terminated by NULL\n");
6311     PCRE2_SUBSTRING_LIST_FREE(stringlist);
6312     }
6313   }
6314 
6315 return TRUE;
6316 }
6317 
6318 
6319 
6320 /*************************************************
6321 *               Process a data line              *
6322 *************************************************/
6323 
6324 /* The line is in buffer; it will not be empty.
6325 
6326 Arguments:  none
6327 
6328 Returns:    PR_OK     continue processing next line
6329             PR_SKIP   skip to a blank line
6330             PR_ABEND  abort the pcre2test run
6331 */
6332 
6333 static int
process_data(void)6334 process_data(void)
6335 {
6336 PCRE2_SIZE len, ulen, arg_ulen;
6337 uint32_t gmatched;
6338 uint32_t c, k;
6339 uint32_t g_notempty = 0;
6340 uint8_t *p, *pp, *start_rep;
6341 size_t needlen;
6342 void *use_dat_context;
6343 BOOL utf;
6344 BOOL subject_literal;
6345 PCRE2_SIZE ovecsave[3];
6346 
6347 #ifdef SUPPORT_PCRE2_8
6348 uint8_t *q8 = NULL;
6349 #endif
6350 #ifdef SUPPORT_PCRE2_16
6351 uint16_t *q16 = NULL;
6352 #endif
6353 #ifdef SUPPORT_PCRE2_32
6354 uint32_t *q32 = NULL;
6355 #endif
6356 
6357 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6358 
6359 /* Copy the default context and data control blocks to the active ones. Then
6360 copy from the pattern the controls that can be set in either the pattern or the
6361 data. This allows them to be overridden in the data line. We do not do this for
6362 options because those that are common apply separately to compiling and
6363 matching. */
6364 
6365 DATCTXCPY(dat_context, default_dat_context);
6366 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6367 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6368 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6369 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6370 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6371 
6372 /* Initialize for scanning the data line. */
6373 
6374 #ifdef SUPPORT_PCRE2_8
6375 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6376   ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6377   FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6378 #else
6379 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6380 #endif
6381 
6382 start_rep = NULL;
6383 len = strlen((const char *)buffer);
6384 while (len > 0 && isspace(buffer[len-1])) len--;
6385 buffer[len] = 0;
6386 p = buffer;
6387 while (isspace(*p)) p++;
6388 
6389 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6390 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6391 
6392 if (utf)
6393   {
6394   uint8_t *q;
6395   uint32_t cc;
6396   int n = 1;
6397   for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6398   if (n <= 0)
6399     {
6400     fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6401       "in UTF mode\n");
6402     return PR_OK;
6403     }
6404   }
6405 
6406 #ifdef SUPPORT_VALGRIND
6407 /* Mark the dbuffer as addressable but undefined again. */
6408 if (dbuffer != NULL)
6409   {
6410   VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6411   }
6412 #endif
6413 
6414 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6415 the number of code units that will be needed (though the buffer may have to be
6416 extended if replication is involved). */
6417 
6418 needlen = (size_t)((len+1) * code_unit_size);
6419 if (dbuffer == NULL || needlen >= dbuffer_size)
6420   {
6421   while (needlen >= dbuffer_size) dbuffer_size *= 2;
6422   dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6423   if (dbuffer == NULL)
6424     {
6425     fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6426     exit(1);
6427     }
6428   }
6429 SETCASTPTR(q, dbuffer);  /* Sets q8, q16, or q32, as appropriate. */
6430 
6431 /* Scan the data line, interpreting data escapes, and put the result into a
6432 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6433 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6434 */
6435 
6436 while ((c = *p++) != 0)
6437   {
6438   int32_t i = 0;
6439   size_t replen;
6440 
6441   /* ] may mark the end of a replicated sequence */
6442 
6443   if (c == ']' && start_rep != NULL)
6444     {
6445     long li;
6446     char *endptr;
6447     size_t qoffset = CAST8VAR(q) - dbuffer;
6448     size_t rep_offset = start_rep - dbuffer;
6449 
6450     if (*p++ != '{')
6451       {
6452       fprintf(outfile, "** Expected '{' after \\[....]\n");
6453       return PR_OK;
6454       }
6455 
6456     li = strtol((const char *)p, &endptr, 10);
6457     if (S32OVERFLOW(li))
6458       {
6459       fprintf(outfile, "** Repeat count too large\n");
6460       return PR_OK;
6461       }
6462 
6463     p = (uint8_t *)endptr;
6464     if (*p++ != '}')
6465       {
6466       fprintf(outfile, "** Expected '}' after \\[...]{...\n");
6467       return PR_OK;
6468       }
6469 
6470     i = (int32_t)li;
6471     if (i-- == 0)
6472       {
6473       fprintf(outfile, "** Zero repeat not allowed\n");
6474       return PR_OK;
6475       }
6476 
6477     replen = CAST8VAR(q) - start_rep;
6478     needlen += replen * i;
6479 
6480     if (needlen >= dbuffer_size)
6481       {
6482       while (needlen >= dbuffer_size) dbuffer_size *= 2;
6483       dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6484       if (dbuffer == NULL)
6485         {
6486         fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6487         exit(1);
6488         }
6489       SETCASTPTR(q, dbuffer + qoffset);
6490       start_rep = dbuffer + rep_offset;
6491       }
6492 
6493     while (i-- > 0)
6494       {
6495       memcpy(CAST8VAR(q), start_rep, replen);
6496       SETPLUS(q, replen/code_unit_size);
6497       }
6498 
6499     start_rep = NULL;
6500     continue;
6501     }
6502 
6503   /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
6504   set, do the fudge for setting the top bit. */
6505 
6506   if (c != '\\' || subject_literal)
6507     {
6508     uint32_t topbit = 0;
6509     if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
6510       {
6511       topbit = 0x80000000;
6512       c = *p++;
6513       }
6514     if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
6515       HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
6516     c |= topbit;
6517     }
6518 
6519   /* Handle backslash escapes */
6520 
6521   else switch ((c = *p++))
6522     {
6523     case '\\': break;
6524     case 'a': c = CHAR_BEL; break;
6525     case 'b': c = '\b'; break;
6526     case 'e': c = CHAR_ESC; break;
6527     case 'f': c = '\f'; break;
6528     case 'n': c = '\n'; break;
6529     case 'r': c = '\r'; break;
6530     case 't': c = '\t'; break;
6531     case 'v': c = '\v'; break;
6532 
6533     case '0': case '1': case '2': case '3':
6534     case '4': case '5': case '6': case '7':
6535     c -= '0';
6536     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
6537       c = c * 8 + *p++ - '0';
6538     break;
6539 
6540     case 'o':
6541     if (*p == '{')
6542       {
6543       uint8_t *pt = p;
6544       c = 0;
6545       for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
6546         {
6547         if (++i == 12)
6548           fprintf(outfile, "** Too many octal digits in \\o{...} item; "
6549                            "using only the first twelve.\n");
6550         else c = c * 8 + *pt - '0';
6551         }
6552       if (*pt == '}') p = pt + 1;
6553         else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
6554       }
6555     break;
6556 
6557     case 'x':
6558     if (*p == '{')
6559       {
6560       uint8_t *pt = p;
6561       c = 0;
6562 
6563       /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
6564       when isxdigit() is a macro that refers to its argument more than
6565       once. This is banned by the C Standard, but apparently happens in at
6566       least one MacOS environment. */
6567 
6568       for (pt++; isxdigit(*pt); pt++)
6569         {
6570         if (++i == 9)
6571           fprintf(outfile, "** Too many hex digits in \\x{...} item; "
6572                            "using only the first eight.\n");
6573         else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
6574         }
6575       if (*pt == '}')
6576         {
6577         p = pt + 1;
6578         break;
6579         }
6580       /* Not correct form for \x{...}; fall through */
6581       }
6582 
6583     /* \x without {} always defines just one byte in 8-bit mode. This
6584     allows UTF-8 characters to be constructed byte by byte, and also allows
6585     invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
6586     Otherwise, pass it down as data. */
6587 
6588     c = 0;
6589     while (i++ < 2 && isxdigit(*p))
6590       {
6591       c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
6592       p++;
6593       }
6594 #if defined SUPPORT_PCRE2_8
6595     if (utf && (test_mode == PCRE8_MODE))
6596       {
6597       *q8++ = c;
6598       continue;
6599       }
6600 #endif
6601     break;
6602 
6603     case 0:     /* \ followed by EOF allows for an empty line */
6604     p--;
6605     continue;
6606 
6607     case '=':   /* \= terminates the data, starts modifiers */
6608     goto ENDSTRING;
6609 
6610     case '[':   /* \[ introduces a replicated character sequence */
6611     if (start_rep != NULL)
6612       {
6613       fprintf(outfile, "** Nested replication is not supported\n");
6614       return PR_OK;
6615       }
6616     start_rep = CAST8VAR(q);
6617     continue;
6618 
6619     default:
6620     if (isalnum(c))
6621       {
6622       fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6623       return PR_OK;
6624       }
6625     }
6626 
6627   /* We now have a character value in c that may be greater than 255.
6628   In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6629   than 127 in UTF mode must have come from \x{...} or octal constructs
6630   because values from \x.. get this far only in non-UTF mode. */
6631 
6632 #ifdef SUPPORT_PCRE2_8
6633   if (test_mode == PCRE8_MODE)
6634     {
6635     if (utf)
6636       {
6637       if (c > 0x7fffffff)
6638         {
6639         fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6640           "and so cannot be converted to UTF-8\n", c);
6641         return PR_OK;
6642         }
6643       q8 += ord2utf8(c, q8);
6644       }
6645     else
6646       {
6647       if (c > 0xffu)
6648         {
6649         fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6650           "and UTF-8 mode is not enabled.\n", c);
6651         fprintf(outfile, "** Truncation will probably give the wrong "
6652           "result.\n");
6653         }
6654       *q8++ = c;
6655       }
6656     }
6657 #endif
6658 #ifdef SUPPORT_PCRE2_16
6659   if (test_mode == PCRE16_MODE)
6660     {
6661     if (utf)
6662       {
6663       if (c > 0x10ffffu)
6664         {
6665         fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6666           "0x10ffff and so cannot be converted to UTF-16\n", c);
6667         return PR_OK;
6668         }
6669       else if (c >= 0x10000u)
6670         {
6671         c-= 0x10000u;
6672         *q16++ = 0xD800 | (c >> 10);
6673         *q16++ = 0xDC00 | (c & 0x3ff);
6674         }
6675       else
6676         *q16++ = c;
6677       }
6678     else
6679       {
6680       if (c > 0xffffu)
6681         {
6682         fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6683           "and UTF-16 mode is not enabled.\n", c);
6684         fprintf(outfile, "** Truncation will probably give the wrong "
6685           "result.\n");
6686         }
6687 
6688       *q16++ = c;
6689       }
6690     }
6691 #endif
6692 #ifdef SUPPORT_PCRE2_32
6693   if (test_mode == PCRE32_MODE)
6694     {
6695     *q32++ = c;
6696     }
6697 #endif
6698   }
6699 
6700 ENDSTRING:
6701 SET(*q, 0);
6702 len = CASTVAR(uint8_t *, q) - dbuffer;    /* Length in bytes */
6703 ulen = len/code_unit_size;                /* Length in code units */
6704 arg_ulen = ulen;                          /* Value to use in match arg */
6705 
6706 /* If the string was terminated by \= we must now interpret modifiers. */
6707 
6708 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
6709   return PR_OK;
6710 
6711 /* Check for mutually exclusive modifiers. At present, these are all in the
6712 first control word. */
6713 
6714 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
6715   {
6716   c = dat_datctl.control & exclusive_dat_controls[k];
6717   if (c != 0 && c != (c & (~c+1)))
6718     {
6719     show_controls(c, 0, "** Not allowed together:");
6720     fprintf(outfile, "\n");
6721     return PR_OK;
6722     }
6723   }
6724 
6725 if (pat_patctl.replacement[0] != 0 &&
6726     (dat_datctl.control & CTL_NULLCONTEXT) != 0)
6727   {
6728   fprintf(outfile, "** Replacement text is not supported with null_context.\n");
6729   return PR_OK;
6730   }
6731 
6732 /* We now have the subject in dbuffer, with len containing the byte length, and
6733 ulen containing the code unit length, with a copy in arg_ulen for use in match
6734 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
6735 zero_terminate modifier is present).
6736 
6737 Move the data to the end of the buffer so that a read over the end can be
6738 caught by valgrind or other means. If we have explicit valgrind support, mark
6739 the unused start of the buffer unaddressable. If we are using the POSIX
6740 interface, or testing zero-termination, we must include the terminating zero in
6741 the usable data. */
6742 
6743 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
6744                        (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
6745 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
6746 #ifdef SUPPORT_VALGRIND
6747   VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
6748 #endif
6749 
6750 /* Now pp points to the subject string. POSIX matching is only possible in
6751 8-bit mode, and it does not support timing or other fancy features. Some were
6752 checked at compile time, but we need to check the match-time settings here. */
6753 
6754 #ifdef SUPPORT_PCRE2_8
6755 if ((pat_patctl.control & CTL_POSIX) != 0)
6756   {
6757   int rc;
6758   int eflags = 0;
6759   regmatch_t *pmatch = NULL;
6760   const char *msg = "** Ignored with POSIX interface:";
6761 
6762   if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
6763     prmsg(&msg, "callout_error");
6764   if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
6765     prmsg(&msg, "callout_fail");
6766   if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
6767     prmsg(&msg, "copy");
6768   if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
6769     prmsg(&msg, "get");
6770   if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
6771   if (dat_datctl.offset != 0) prmsg(&msg, "offset");
6772 
6773   if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
6774     {
6775     fprintf(outfile, "%s", msg);
6776     show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
6777     msg = "";
6778     }
6779   if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
6780       (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
6781     {
6782     show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
6783                   dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
6784     msg = "";
6785     }
6786 
6787   if (msg[0] == 0) fprintf(outfile, "\n");
6788 
6789   if (dat_datctl.oveccount > 0)
6790     {
6791     pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
6792     if (pmatch == NULL)
6793       {
6794       fprintf(outfile, "** Failed to get memory for recording matching "
6795         "information (size set = %du)\n", dat_datctl.oveccount);
6796       return PR_OK;
6797       }
6798     }
6799 
6800   if (dat_datctl.startend[0] != CFORE_UNSET)
6801     {
6802     pmatch[0].rm_so = dat_datctl.startend[0];
6803     pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
6804       dat_datctl.startend[1] : len;
6805     eflags |= REG_STARTEND;
6806     }
6807 
6808   if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
6809   if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
6810   if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
6811 
6812   rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
6813   if (rc != 0)
6814     {
6815     (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
6816     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
6817     }
6818   else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
6819     fprintf(outfile, "Matched with REG_NOSUB\n");
6820   else if (dat_datctl.oveccount == 0)
6821     fprintf(outfile, "Matched without capture\n");
6822   else
6823     {
6824     size_t i, j;
6825     size_t last_printed = (size_t)dat_datctl.oveccount;
6826     for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
6827       {
6828       if (pmatch[i].rm_so >= 0)
6829         {
6830         PCRE2_SIZE start = pmatch[i].rm_so;
6831         PCRE2_SIZE end = pmatch[i].rm_eo;
6832         for (j = last_printed + 1; j < i; j++)
6833           fprintf(outfile, "%2d: <unset>\n", (int)j);
6834         last_printed = i;
6835         if (start > end)
6836           {
6837           start = pmatch[i].rm_eo;
6838           end = pmatch[i].rm_so;
6839           fprintf(outfile, "Start of matched string is beyond its end - "
6840             "displaying from end to start.\n");
6841           }
6842         fprintf(outfile, "%2d: ", (int)i);
6843         PCHARSV(pp, start, end - start, utf, outfile);
6844         fprintf(outfile, "\n");
6845 
6846         if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
6847             (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
6848           {
6849           fprintf(outfile, "%2d+ ", (int)i);
6850           /* Note: don't use the start/end variables here because we want to
6851           show the text from what is reported as the end. */
6852           PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
6853           fprintf(outfile, "\n"); }
6854         }
6855       }
6856     }
6857   free(pmatch);
6858   return PR_OK;
6859   }
6860 #endif  /* SUPPORT_PCRE2_8 */
6861 
6862  /* Handle matching via the native interface. Check for consistency of
6863 modifiers. */
6864 
6865 if (dat_datctl.startend[0] != CFORE_UNSET)
6866   fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
6867 
6868 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
6869 matching, even if the JIT compiler was used. */
6870 
6871 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
6872     FLD(compiled_code, executable_jit) != NULL)
6873   {
6874   fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
6875   dat_datctl.control &= ~CTL_ALLUSEDTEXT;
6876   }
6877 
6878 /* Handle passing the subject as zero-terminated. */
6879 
6880 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
6881   arg_ulen = PCRE2_ZERO_TERMINATED;
6882 
6883 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
6884 NULL context. */
6885 
6886 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
6887   NULL : PTR(dat_context);
6888 
6889 /* Enable display of malloc/free if wanted. We can do this only if either the
6890 pattern or the subject is processed with a context. */
6891 
6892 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
6893 
6894 if (show_memory &&
6895     (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
6896   fprintf(outfile, "** \\=memory requires either a pattern or a subject "
6897     "context: ignored\n");
6898 
6899 /* Create and assign a JIT stack if requested. */
6900 
6901 if (dat_datctl.jitstack != 0)
6902   {
6903   if (dat_datctl.jitstack != jit_stack_size)
6904     {
6905     PCRE2_JIT_STACK_FREE(jit_stack);
6906     PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
6907     jit_stack_size = dat_datctl.jitstack;
6908     }
6909   PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
6910   }
6911 
6912 /* Or de-assign */
6913 
6914 else if (jit_stack != NULL)
6915   {
6916   PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
6917   PCRE2_JIT_STACK_FREE(jit_stack);
6918   jit_stack = NULL;
6919   jit_stack_size = 0;
6920   }
6921 
6922 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
6923 if we want to verify that JIT was actually used. */
6924 
6925 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
6926    {
6927    PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
6928    }
6929 
6930 /* Adjust match_data according to size of offsets required. A size of zero
6931 causes a new match data block to be obtained that exactly fits the pattern. */
6932 
6933 if (dat_datctl.oveccount == 0)
6934   {
6935   PCRE2_MATCH_DATA_FREE(match_data);
6936   PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
6937   PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
6938   }
6939 else if (dat_datctl.oveccount <= max_oveccount)
6940   {
6941   SETFLD(match_data, oveccount, dat_datctl.oveccount);
6942   }
6943 else
6944   {
6945   max_oveccount = dat_datctl.oveccount;
6946   PCRE2_MATCH_DATA_FREE(match_data);
6947   PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
6948   }
6949 
6950 if (CASTVAR(void *, match_data) == NULL)
6951   {
6952   fprintf(outfile, "** Failed to get memory for recording matching "
6953     "information (size requested: %d)\n", dat_datctl.oveccount);
6954   max_oveccount = 0;
6955   return PR_OK;
6956   }
6957 
6958 /* Replacement processing is ignored for DFA matching. */
6959 
6960 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
6961   {
6962   fprintf(outfile, "** Ignored for DFA matching: replace\n");
6963   dat_datctl.replacement[0] = 0;
6964   }
6965 
6966 /* If a replacement string is provided, call pcre2_substitute() instead of one
6967 of the matching functions. First we have to convert the replacement string to
6968 the appropriate width. */
6969 
6970 if (dat_datctl.replacement[0] != 0)
6971   {
6972   int rc;
6973   uint8_t *pr;
6974   uint8_t rbuffer[REPLACE_BUFFSIZE];
6975   uint8_t nbuffer[REPLACE_BUFFSIZE];
6976   uint32_t xoptions;
6977   PCRE2_SIZE rlen, nsize, erroroffset;
6978   BOOL badutf = FALSE;
6979 
6980 #ifdef SUPPORT_PCRE2_8
6981   uint8_t *r8 = NULL;
6982 #endif
6983 #ifdef SUPPORT_PCRE2_16
6984   uint16_t *r16 = NULL;
6985 #endif
6986 #ifdef SUPPORT_PCRE2_32
6987   uint32_t *r32 = NULL;
6988 #endif
6989 
6990   if (timeitm)
6991     fprintf(outfile, "** Timing is not supported with replace: ignored\n");
6992 
6993   if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
6994     fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
6995 
6996   xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
6997                 PCRE2_SUBSTITUTE_GLOBAL) |
6998              (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
6999                 PCRE2_SUBSTITUTE_EXTENDED) |
7000              (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7001                 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7002              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7003                 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7004              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7005                 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7006 
7007   SETCASTPTR(r, rbuffer);  /* Sets r8, r16, or r32, as appropriate. */
7008   pr = dat_datctl.replacement;
7009 
7010   /* If the replacement starts with '[<number>]' we interpret that as length
7011   value for the replacement buffer. */
7012 
7013   nsize = REPLACE_BUFFSIZE/code_unit_size;
7014   if (*pr == '[')
7015     {
7016     PCRE2_SIZE n = 0;
7017     while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7018     if (*pr++ != ']')
7019       {
7020       fprintf(outfile, "Bad buffer size in replacement string\n");
7021       return PR_OK;
7022       }
7023     if (n > nsize)
7024       {
7025       fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7026         "large (max %" SIZ_FORM ")\n", SIZ_CAST n, SIZ_CAST nsize);
7027       return PR_OK;
7028       }
7029     nsize = n;
7030     }
7031 
7032   /* Now copy the replacement string to a buffer of the appropriate width. No
7033   escape processing is done for replacements. In UTF mode, check for an invalid
7034   UTF-8 input string, and if it is invalid, just copy its code units without
7035   UTF interpretation. This provides a means of checking that an invalid string
7036   is detected. Otherwise, UTF-8 can be used to include wide characters in a
7037   replacement. */
7038 
7039   if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7040 
7041   /* Not UTF or invalid UTF-8: just copy the code units. */
7042 
7043   if (!utf || badutf)
7044     {
7045     while ((c = *pr++) != 0)
7046       {
7047 #ifdef SUPPORT_PCRE2_8
7048       if (test_mode == PCRE8_MODE) *r8++ = c;
7049 #endif
7050 #ifdef SUPPORT_PCRE2_16
7051       if (test_mode == PCRE16_MODE) *r16++ = c;
7052 #endif
7053 #ifdef SUPPORT_PCRE2_32
7054       if (test_mode == PCRE32_MODE) *r32++ = c;
7055 #endif
7056       }
7057     }
7058 
7059   /* Valid UTF-8 replacement string */
7060 
7061   else while ((c = *pr++) != 0)
7062     {
7063     if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7064 
7065 #ifdef SUPPORT_PCRE2_8
7066     if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7067 #endif
7068 
7069 #ifdef SUPPORT_PCRE2_16
7070     if (test_mode == PCRE16_MODE)
7071       {
7072       if (c >= 0x10000u)
7073         {
7074         c-= 0x10000u;
7075         *r16++ = 0xD800 | (c >> 10);
7076         *r16++ = 0xDC00 | (c & 0x3ff);
7077         }
7078       else *r16++ = c;
7079       }
7080 #endif
7081 
7082 #ifdef SUPPORT_PCRE2_32
7083     if (test_mode == PCRE32_MODE) *r32++ = c;
7084 #endif
7085     }
7086 
7087   SET(*r, 0);
7088   if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7089     rlen = PCRE2_ZERO_TERMINATED;
7090   else
7091     rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7092   PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7093     dat_datctl.options|xoptions, match_data, dat_context,
7094     rbuffer, rlen, nbuffer, &nsize);
7095 
7096   if (rc < 0)
7097     {
7098     fprintf(outfile, "Failed: error %d", rc);
7099     if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7100       fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7101     fprintf(outfile, ": ");
7102     if (!print_error_message(rc, "", "")) return PR_ABEND;
7103     if (rc == PCRE2_ERROR_NOMEMORY &&
7104         (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7105       fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7106     }
7107   else
7108     {
7109     fprintf(outfile, "%2d: ", rc);
7110     PCHARSV(nbuffer, 0, nsize, utf, outfile);
7111     }
7112 
7113   fprintf(outfile, "\n");
7114   show_memory = FALSE;
7115   return PR_OK;
7116   }   /* End of substitution handling */
7117 
7118 /* When a replacement string is not provided, run a loop for global matching
7119 with one of the basic matching functions. For altglobal (or first time round
7120 the loop), set an "unset" value for the previous match info. */
7121 
7122 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7123 
7124 for (gmatched = 0;; gmatched++)
7125   {
7126   PCRE2_SIZE j;
7127   int capcount;
7128   PCRE2_SIZE *ovector;
7129 
7130   ovector = FLD(match_data, ovector);
7131 
7132   /* Fill the ovector with junk to detect elements that do not get set
7133   when they should be. */
7134 
7135   for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
7136 
7137   /* When matching is via pcre2_match(), we will detect the use of JIT via the
7138   stack callback function. */
7139 
7140   jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7141 
7142   /* Do timing if required. */
7143 
7144   if (timeitm > 0)
7145     {
7146     int i;
7147     clock_t start_time, time_taken;
7148 
7149     if ((dat_datctl.control & CTL_DFA) != 0)
7150       {
7151       if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7152         {
7153         fprintf(outfile, "Timing DFA restarts is not supported\n");
7154         return PR_OK;
7155         }
7156       if (dfa_workspace == NULL)
7157         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7158       start_time = clock();
7159       for (i = 0; i < timeitm; i++)
7160         {
7161         PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7162           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7163           use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7164         }
7165       }
7166 
7167     else if ((pat_patctl.control & CTL_JITFAST) != 0)
7168       {
7169       start_time = clock();
7170       for (i = 0; i < timeitm; i++)
7171         {
7172         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7173           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7174           use_dat_context);
7175         }
7176       }
7177 
7178     else
7179       {
7180       start_time = clock();
7181       for (i = 0; i < timeitm; i++)
7182         {
7183         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7184           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7185           use_dat_context);
7186         }
7187       }
7188     total_match_time += (time_taken = clock() - start_time);
7189     fprintf(outfile, "Match time %.4f milliseconds\n",
7190       (((double)time_taken * 1000.0) / (double)timeitm) /
7191         (double)CLOCKS_PER_SEC);
7192     }
7193 
7194   /* Find the heap, match and depth limits if requested. The depth and heap
7195   limits are not relevant for JIT. The return from check_match_limit() is the
7196   return from the final call to pcre2_match() or pcre2_dfa_match(). */
7197 
7198   if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
7199     {
7200     capcount = 0;  /* This stops compiler warnings */
7201 
7202     if (FLD(compiled_code, executable_jit) == NULL ||
7203           (dat_datctl.options & PCRE2_NO_JIT) != 0)
7204       {
7205       (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7206       }
7207 
7208     capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7209       "match");
7210 
7211     if (FLD(compiled_code, executable_jit) == NULL ||
7212         (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7213         (dat_datctl.control & CTL_DFA) != 0)
7214       {
7215       capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7216         "depth");
7217       }
7218 
7219     if (capcount == 0)
7220       {
7221       fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7222       capcount = dat_datctl.oveccount;
7223       }
7224     }
7225 
7226   /* Otherwise just run a single match, setting up a callout if required (the
7227   default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7228 
7229   else
7230     {
7231     if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7232       {
7233       PCRE2_SET_CALLOUT(dat_context, callout_function,
7234         (void *)(&dat_datctl.callout_data));
7235       first_callout = TRUE;
7236       last_callout_mark = NULL;
7237       callout_count = 0;
7238       }
7239     else
7240       {
7241       PCRE2_SET_CALLOUT(dat_context, NULL, NULL);  /* No callout */
7242       }
7243 
7244     /* Run a single DFA or NFA match. */
7245 
7246     if ((dat_datctl.control & CTL_DFA) != 0)
7247       {
7248       if (dfa_workspace == NULL)
7249         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7250       if (dfa_matched++ == 0)
7251         dfa_workspace[0] = -1;  /* To catch bad restart */
7252       PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7253         dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7254         use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7255       if (capcount == 0)
7256         {
7257         fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7258         capcount = dat_datctl.oveccount;
7259         }
7260       }
7261     else
7262       {
7263       if ((pat_patctl.control & CTL_JITFAST) != 0)
7264         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7265           dat_datctl.options | g_notempty, match_data, use_dat_context);
7266       else
7267         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7268           dat_datctl.options | g_notempty, match_data, use_dat_context);
7269       if (capcount == 0)
7270         {
7271         fprintf(outfile, "Matched, but too many substrings\n");
7272         capcount = dat_datctl.oveccount;
7273         }
7274       }
7275     }
7276 
7277   /* The result of the match is now in capcount. First handle a successful
7278   match. */
7279 
7280   if (capcount >= 0)
7281     {
7282     int i;
7283     uint32_t oveccount;
7284 
7285     /* This is a check against a lunatic return value. */
7286 
7287     PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7288     if (capcount > (int)oveccount)
7289       {
7290       fprintf(outfile,
7291         "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7292         capcount, oveccount);
7293       capcount = oveccount;
7294       if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7295         {
7296         fprintf(outfile, "** Global loop abandoned\n");
7297         dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7298         }
7299       }
7300 
7301     /* If this is not the first time round a global loop, check that the
7302     returned string has changed. If it has not, check for an empty string match
7303     at different starting offset from the previous match. This is a failed test
7304     retry for null-matching patterns that don't match at their starting offset,
7305     for example /(?<=\G.)/. A repeated match at the same point is not such a
7306     pattern, and must be discarded, and we then proceed to seek a non-null
7307     match at the current point. For any other repeated match, there is a bug
7308     somewhere and we must break the loop because it will go on for ever. We
7309     know that there are always at least two elements in the ovector. */
7310 
7311     if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7312       {
7313       if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7314         {
7315         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7316         ovecsave[2] = dat_datctl.offset;
7317         continue;    /* Back to the top of the loop */
7318         }
7319       fprintf(outfile,
7320         "** PCRE2 error: global repeat returned the same string as previous\n");
7321       fprintf(outfile, "** Global loop abandoned\n");
7322       dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7323       }
7324 
7325     /* "allcaptures" requests showing of all captures in the pattern, to check
7326     unset ones at the end. It may be set on the pattern or the data. Implement
7327     by setting capcount to the maximum. This is not relevant for DFA matching,
7328     so ignore it. */
7329 
7330     if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7331       {
7332       uint32_t maxcapcount;
7333       if ((dat_datctl.control & CTL_DFA) != 0)
7334         {
7335         fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
7336         }
7337       else
7338         {
7339         if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
7340           return PR_SKIP;
7341         capcount = maxcapcount + 1;   /* Allow for full match */
7342         if (capcount > (int)oveccount) capcount = oveccount;
7343         }
7344       }
7345 
7346     /* Output the captured substrings. Note that, for the matched string,
7347     the use of \K in an assertion can make the start later than the end. */
7348 
7349     for (i = 0; i < 2*capcount; i += 2)
7350       {
7351       PCRE2_SIZE lleft, lmiddle, lright;
7352       PCRE2_SIZE start = ovector[i];
7353       PCRE2_SIZE end = ovector[i+1];
7354 
7355       if (start > end)
7356         {
7357         start = ovector[i+1];
7358         end = ovector[i];
7359         fprintf(outfile, "Start of matched string is beyond its end - "
7360           "displaying from end to start.\n");
7361         }
7362 
7363       fprintf(outfile, "%2d: ", i/2);
7364 
7365       /* Check for an unset group */
7366 
7367       if (start == PCRE2_UNSET)
7368         {
7369         fprintf(outfile, "<unset>\n");
7370         continue;
7371         }
7372 
7373       /* Check for silly offsets, in particular, values that have not been
7374       set when they should have been. */
7375 
7376       if (start > ulen || end > ulen)
7377         {
7378         fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
7379           (unsigned long int)start, (unsigned long int)end);
7380         continue;
7381         }
7382 
7383       /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
7384       JIT, it is disabled above, with a comment.) When the match is done by the
7385       interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
7386       set, and if the leftmost consulted character is before the start of the
7387       match or the rightmost consulted character is past the end of the match,
7388       we want to show all consulted characters for the main matched string, and
7389       indicate which were lookarounds. */
7390 
7391       if (i == 0)
7392         {
7393         BOOL showallused;
7394         PCRE2_SIZE leftchar, rightchar;
7395 
7396         if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7397           {
7398           leftchar = FLD(match_data, leftchar);
7399           rightchar = FLD(match_data, rightchar);
7400           showallused = i == 0 && (leftchar < start || rightchar > end);
7401           }
7402         else showallused = FALSE;
7403 
7404         if (showallused)
7405           {
7406           PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
7407           PCHARS(lmiddle, pp, start, end - start, utf, outfile);
7408           PCHARS(lright, pp, end, rightchar - end, utf, outfile);
7409           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7410             fprintf(outfile, " (JIT)");
7411           fprintf(outfile, "\n    ");
7412           for (j = 0; j < lleft; j++) fprintf(outfile, "<");
7413           for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
7414           for (j = 0; j < lright; j++) fprintf(outfile, ">");
7415           }
7416 
7417         /* When a pattern contains \K, the start of match position may be
7418         different to the start of the matched string. When this is the case,
7419         show it when requested. */
7420 
7421         else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
7422           {
7423           PCRE2_SIZE startchar;
7424           PCRE2_GET_STARTCHAR(startchar, match_data);
7425           PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
7426           PCHARSV(pp, start, end - start, utf, outfile);
7427           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7428             fprintf(outfile, " (JIT)");
7429           if (startchar != start)
7430             {
7431             fprintf(outfile, "\n    ");
7432             for (j = 0; j < lleft; j++) fprintf(outfile, "^");
7433             }
7434           }
7435 
7436         /* Otherwise, just show the matched string. */
7437 
7438         else
7439           {
7440           PCHARSV(pp, start, end - start, utf, outfile);
7441           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7442             fprintf(outfile, " (JIT)");
7443           }
7444         }
7445 
7446       /* Not the main matched string. Just show it unadorned. */
7447 
7448       else
7449         {
7450         PCHARSV(pp, start, end - start, utf, outfile);
7451         }
7452 
7453       fprintf(outfile, "\n");
7454 
7455       /* Note: don't use the start/end variables here because we want to
7456       show the text from what is reported as the end. */
7457 
7458       if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
7459           (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
7460         {
7461         fprintf(outfile, "%2d+ ", i/2);
7462         PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
7463         fprintf(outfile, "\n");
7464         }
7465       }
7466 
7467     /* Output (*MARK) data if requested */
7468 
7469     if ((dat_datctl.control & CTL_MARK) != 0 &&
7470          TESTFLD(match_data, mark, !=, NULL))
7471       {
7472       fprintf(outfile, "MK: ");
7473       PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
7474       fprintf(outfile, "\n");
7475       }
7476 
7477     /* Process copy/get strings */
7478 
7479     if (!copy_and_get(utf, capcount)) return PR_ABEND;
7480 
7481     }    /* End of handling a successful match */
7482 
7483   /* There was a partial match. The value of ovector[0] is the bumpalong point,
7484   that is, startchar, not any \K point that might have been passed. */
7485 
7486   else if (capcount == PCRE2_ERROR_PARTIAL)
7487     {
7488     PCRE2_SIZE poffset;
7489     int backlength;
7490     int rubriclength = 0;
7491 
7492     fprintf(outfile, "Partial match");
7493     if ((dat_datctl.control & CTL_MARK) != 0 &&
7494          TESTFLD(match_data, mark, !=, NULL))
7495       {
7496       fprintf(outfile, ", mark=");
7497       PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf,
7498         outfile);
7499       rubriclength += 7;
7500       }
7501     fprintf(outfile, ": ");
7502     rubriclength += 15;
7503 
7504     poffset = backchars(pp, ovector[0], maxlookbehind, utf);
7505     PCHARS(backlength, pp, poffset, ovector[0] - poffset, utf, outfile);
7506     PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
7507 
7508     if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7509       fprintf(outfile, " (JIT)");
7510     fprintf(outfile, "\n");
7511 
7512     if (backlength != 0)
7513       {
7514       int i;
7515       for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
7516       for (i = 0; i < backlength; i++) fprintf(outfile, "<");
7517       fprintf(outfile, "\n");
7518       }
7519 
7520     /* Process copy/get strings */
7521 
7522     if (!copy_and_get(utf, 1)) return PR_ABEND;
7523 
7524     break;  /* Out of the /g loop */
7525     }       /* End of handling partial match */
7526 
7527   /* Failed to match. If this is a /g or /G loop, we might previously have
7528   set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
7529   If that is the case, this is not necessarily the end. We want to advance the
7530   start offset, and continue. We won't be at the end of the string - that was
7531   checked before setting g_notempty. We achieve the effect by pretending that a
7532   single character was matched.
7533 
7534   Complication arises in the case when the newline convention is "any", "crlf",
7535   or "anycrlf". If the previous match was at the end of a line terminated by
7536   CRLF, an advance of one character just passes the CR, whereas we should
7537   prefer the longer newline sequence, as does the code in pcre2_match().
7538 
7539   Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
7540   character, not one byte. */
7541 
7542   else if (g_notempty != 0)   /* There was a previous null match */
7543     {
7544     uint16_t nl = FLD(compiled_code, newline_convention);
7545     PCRE2_SIZE start_offset = dat_datctl.offset;    /* Where the match was */
7546     PCRE2_SIZE end_offset = start_offset + 1;
7547 
7548     if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
7549          nl == PCRE2_NEWLINE_ANYCRLF) &&
7550         start_offset < ulen - 1 &&
7551         CODE_UNIT(pp, start_offset) == '\r' &&
7552         CODE_UNIT(pp, end_offset) == '\n')
7553       end_offset++;
7554 
7555     else if (utf && test_mode != PCRE32_MODE)
7556       {
7557       if (test_mode == PCRE8_MODE)
7558         {
7559         for (; end_offset < ulen; end_offset++)
7560           if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7561         }
7562       else  /* 16-bit mode */
7563         {
7564         for (; end_offset < ulen; end_offset++)
7565           if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7566         }
7567       }
7568 
7569     SETFLDVEC(match_data, ovector, 0, start_offset);
7570     SETFLDVEC(match_data, ovector, 1, end_offset);
7571     }  /* End of handling null match in a global loop */
7572 
7573   /* A "normal" match failure. There will be a negative error number in
7574   capcount. */
7575 
7576   else
7577     {
7578     switch(capcount)
7579       {
7580       case PCRE2_ERROR_NOMATCH:
7581       if (gmatched == 0)
7582         {
7583         fprintf(outfile, "No match");
7584         if ((dat_datctl.control & CTL_MARK) != 0 &&
7585              TESTFLD(match_data, mark, !=, NULL))
7586           {
7587           fprintf(outfile, ", mark = ");
7588           PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
7589           }
7590         if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7591           fprintf(outfile, " (JIT)");
7592         fprintf(outfile, "\n");
7593         }
7594       break;
7595 
7596       case PCRE2_ERROR_BADUTFOFFSET:
7597       fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
7598       break;
7599 
7600       default:
7601       fprintf(outfile, "Failed: error %d: ", capcount);
7602       if (!print_error_message(capcount, "", "")) return PR_ABEND;
7603       if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
7604           capcount >= PCRE2_ERROR_UTF32_ERR2)
7605         {
7606         PCRE2_SIZE startchar;
7607         PCRE2_GET_STARTCHAR(startchar, match_data);
7608         fprintf(outfile, " at offset %" SIZ_FORM, SIZ_CAST startchar);
7609         }
7610       fprintf(outfile, "\n");
7611       break;
7612       }
7613 
7614     break;  /* Out of the /g loop */
7615     }       /* End of failed match handling */
7616 
7617   /* Control reaches here in two circumstances: (a) after a match, and (b)
7618   after a non-match that immediately followed a match on an empty string when
7619   doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
7620   PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
7621   of one character. So effectively we get here only after a match. If we
7622   are not doing a global search, we are done. */
7623 
7624   if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
7625     {
7626     PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
7627     PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
7628 
7629     /* We must now set up for the next iteration of a global search. If we have
7630     matched an empty string, first check to see if we are at the end of the
7631     subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
7632     does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
7633     at the same point. If this fails it will be picked up above, where a fake
7634     match is set up so that at this point we advance to the next character.
7635 
7636     However, in order to cope with patterns that never match at their starting
7637     offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
7638     than the starting offset. This means there will be a retry with the
7639     starting offset at the match offset. If this returns the same match again,
7640     it is picked up above and ignored, and the special action is then taken. */
7641 
7642     if (match_offset == end_offset)
7643       {
7644       if (end_offset == ulen) break;           /* End of subject */
7645       if (match_offset <= dat_datctl.offset)
7646         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7647       }
7648 
7649     /* However, even after matching a non-empty string, there is still one
7650     tricky case. If a pattern contains \K within a lookbehind assertion at the
7651     start, the end of the matched string can be at the offset where the match
7652     started. In the case of a normal /g iteration without special action, this
7653     leads to a loop that keeps on returning the same substring. The loop would
7654     be caught above, but we really want to move on to the next match. */
7655 
7656     else
7657       {
7658       g_notempty = 0;   /* Set for a "normal" repeat */
7659       if ((dat_datctl.control & CTL_GLOBAL) != 0)
7660         {
7661         PCRE2_SIZE startchar;
7662         PCRE2_GET_STARTCHAR(startchar, match_data);
7663         if (end_offset <= startchar)
7664           {
7665           if (startchar >= ulen) break;       /* End of subject */
7666           end_offset = startchar + 1;
7667           if (utf && test_mode != PCRE32_MODE)
7668             {
7669             if (test_mode == PCRE8_MODE)
7670               {
7671               for (; end_offset < ulen; end_offset++)
7672                 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7673               }
7674             else  /* 16-bit mode */
7675               {
7676               for (; end_offset < ulen; end_offset++)
7677                 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7678               }
7679             }
7680           }
7681         }
7682       }
7683 
7684     /* For a normal global (/g) iteration, save the current ovector[0,1] and
7685     the starting offset so that we can check that they do change each time.
7686     Otherwise a matching bug that returns the same string causes an infinite
7687     loop. It has happened! Then update the start offset, leaving other
7688     parameters alone. */
7689 
7690     if ((dat_datctl.control & CTL_GLOBAL) != 0)
7691       {
7692       ovecsave[0] = ovector[0];
7693       ovecsave[1] = ovector[1];
7694       ovecsave[2] = dat_datctl.offset;
7695       dat_datctl.offset = end_offset;
7696       }
7697 
7698     /* For altglobal, just update the pointer and length. */
7699 
7700     else
7701       {
7702       pp += end_offset * code_unit_size;
7703       len -= end_offset * code_unit_size;
7704       ulen -= end_offset;
7705       if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
7706       }
7707     }
7708   }  /* End of global loop */
7709 
7710 show_memory = FALSE;
7711 return PR_OK;
7712 }
7713 
7714 
7715 
7716 
7717 /*************************************************
7718 *               Print PCRE2 version              *
7719 *************************************************/
7720 
7721 static void
print_version(FILE * f)7722 print_version(FILE *f)
7723 {
7724 VERSION_TYPE *vp;
7725 fprintf(f, "PCRE2 version ");
7726 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
7727 fprintf(f, "\n");
7728 }
7729 
7730 
7731 
7732 /*************************************************
7733 *               Print Unicode version            *
7734 *************************************************/
7735 
7736 static void
print_unicode_version(FILE * f)7737 print_unicode_version(FILE *f)
7738 {
7739 VERSION_TYPE *vp;
7740 fprintf(f, "Unicode version ");
7741 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
7742 }
7743 
7744 
7745 
7746 /*************************************************
7747 *               Print JIT target                 *
7748 *************************************************/
7749 
7750 static void
print_jit_target(FILE * f)7751 print_jit_target(FILE *f)
7752 {
7753 VERSION_TYPE *vp;
7754 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
7755 }
7756 
7757 
7758 
7759 /*************************************************
7760 *       Print newline configuration              *
7761 *************************************************/
7762 
7763 /* Output is always to stdout.
7764 
7765 Arguments:
7766   rc         the return code from PCRE2_CONFIG_NEWLINE
7767   isc        TRUE if called from "-C newline"
7768 Returns:     nothing
7769 */
7770 
7771 static void
print_newline_config(uint32_t optval,BOOL isc)7772 print_newline_config(uint32_t optval, BOOL isc)
7773 {
7774 if (!isc) printf("  Newline sequence is ");
7775 if (optval < sizeof(newlines)/sizeof(char *))
7776   printf("%s\n", newlines[optval]);
7777 else
7778   printf("a non-standard value: %d\n", optval);
7779 }
7780 
7781 
7782 
7783 /*************************************************
7784 *             Usage function                     *
7785 *************************************************/
7786 
7787 static void
usage(void)7788 usage(void)
7789 {
7790 printf("Usage:     pcre2test [options] [<input file> [<output file>]]\n\n");
7791 printf("Input and output default to stdin and stdout.\n");
7792 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
7793 printf("If input is a terminal, readline() is used to read from it.\n");
7794 #else
7795 printf("This version of pcre2test is not linked with readline().\n");
7796 #endif
7797 printf("\nOptions:\n");
7798 #ifdef SUPPORT_PCRE2_8
7799 printf("  -8            use the 8-bit library\n");
7800 #endif
7801 #ifdef SUPPORT_PCRE2_16
7802 printf("  -16           use the 16-bit library\n");
7803 #endif
7804 #ifdef SUPPORT_PCRE2_32
7805 printf("  -32           use the 32-bit library\n");
7806 #endif
7807 printf("  -ac           set default pattern modifier PCRE2_AUTO_CALLOUT\n");
7808 printf("  -AC           as -ac, but also set subject 'callout_extra' modifier\n");
7809 printf("  -b            set default pattern modifier 'fullbincode'\n");
7810 printf("  -C            show PCRE2 compile-time options and exit\n");
7811 printf("  -C arg        show a specific compile-time option and exit with its\n");
7812 printf("                  value if numeric (else 0). The arg can be:\n");
7813 printf("     backslash-C    use of \\C is enabled [0, 1]\n");
7814 printf("     bsr            \\R type [ANYCRLF, ANY]\n");
7815 printf("     ebcdic         compiled for EBCDIC character code [0,1]\n");
7816 printf("     ebcdic-nl      NL code if compiled for EBCDIC\n");
7817 printf("     jit            just-in-time compiler supported [0, 1]\n");
7818 printf("     linksize       internal link size [2, 3, 4]\n");
7819 printf("     newline        newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
7820 printf("     pcre2-8        8 bit library support enabled [0, 1]\n");
7821 printf("     pcre2-16       16 bit library support enabled [0, 1]\n");
7822 printf("     pcre2-32       32 bit library support enabled [0, 1]\n");
7823 printf("     unicode        Unicode and UTF support enabled [0, 1]\n");
7824 printf("  -d            set default pattern modifier 'debug'\n");
7825 printf("  -dfa          set default subject modifier 'dfa'\n");
7826 printf("  -error <n,m,..>  show messages for error numbers, then exit\n");
7827 printf("  -help         show usage information\n");
7828 printf("  -i            set default pattern modifier 'info'\n");
7829 printf("  -jit          set default pattern modifier 'jit'\n");
7830 printf("  -jitverify    set default pattern modifier 'jitverify'\n");
7831 printf("  -LM           list pattern and subject modifiers, then exit\n");
7832 printf("  -q            quiet: do not output PCRE2 version number at start\n");
7833 printf("  -pattern <s>  set default pattern modifier fields\n");
7834 printf("  -subject <s>  set default subject modifier fields\n");
7835 printf("  -S <n>        set stack size to <n> mebibytes\n");
7836 printf("  -t [<n>]      time compilation and execution, repeating <n> times\n");
7837 printf("  -tm [<n>]     time execution (matching) only, repeating <n> times\n");
7838 printf("  -T            same as -t, but show total times at the end\n");
7839 printf("  -TM           same as -tm, but show total time at the end\n");
7840 printf("  -version      show PCRE2 version and exit\n");
7841 }
7842 
7843 
7844 
7845 /*************************************************
7846 *             Handle -C option                   *
7847 *************************************************/
7848 
7849 /* This option outputs configuration options and sets an appropriate return
7850 code when asked for a single option. The code is abstracted into a separate
7851 function because of its size. Use whichever pcre2_config() function is
7852 available.
7853 
7854 Argument:   an option name or NULL
7855 Returns:    the return code
7856 */
7857 
7858 static int
c_option(const char * arg)7859 c_option(const char *arg)
7860 {
7861 uint32_t optval;
7862 unsigned int i = COPTLISTCOUNT;
7863 int yield = 0;
7864 
7865 if (arg != NULL && arg[0] != CHAR_MINUS)
7866   {
7867   for (i = 0; i < COPTLISTCOUNT; i++)
7868     if (strcmp(arg, coptlist[i].name) == 0) break;
7869 
7870   if (i >= COPTLISTCOUNT)
7871     {
7872     fprintf(stderr, "** Unknown -C option '%s'\n", arg);
7873     return 0;
7874     }
7875 
7876   switch (coptlist[i].type)
7877     {
7878     case CONF_BSR:
7879     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
7880     printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
7881     break;
7882 
7883     case CONF_FIX:
7884     yield = coptlist[i].value;
7885     printf("%d\n", yield);
7886     break;
7887 
7888     case CONF_FIZ:
7889     optval = coptlist[i].value;
7890     printf("%d\n", optval);
7891     break;
7892 
7893     case CONF_INT:
7894     (void)PCRE2_CONFIG(coptlist[i].value, &yield);
7895     printf("%d\n", yield);
7896     break;
7897 
7898     case CONF_NL:
7899     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
7900     print_newline_config(optval, TRUE);
7901     break;
7902     }
7903 
7904 /* For VMS, return the value by setting a symbol, for certain values only. */
7905 
7906 #ifdef __VMS
7907   if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
7908     {
7909     char ucname[16];
7910     strcpy(ucname, coptlist[i].name);
7911     for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
7912     vms_setsymbol(ucname, 0, optval);
7913     }
7914 #endif
7915 
7916   return yield;
7917   }
7918 
7919 /* No argument for -C: output all configuration information. */
7920 
7921 print_version(stdout);
7922 printf("Compiled with\n");
7923 
7924 #ifdef EBCDIC
7925 printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
7926 #if defined NATIVE_ZOS
7927 printf("  EBCDIC code page %s or similar\n", pcrz_cpversion());
7928 #endif
7929 #endif
7930 
7931 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
7932 if (optval & 1) printf("  8-bit support\n");
7933 if (optval & 2) printf("  16-bit support\n");
7934 if (optval & 4) printf("  32-bit support\n");
7935 
7936 #ifdef SUPPORT_VALGRIND
7937 printf("  Valgrind support\n");
7938 #endif
7939 
7940 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
7941 if (optval != 0)
7942   {
7943   printf("  UTF and UCP support (");
7944   print_unicode_version(stdout);
7945   printf(")\n");
7946   }
7947 else printf("  No Unicode support\n");
7948 
7949 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
7950 if (optval != 0)
7951   {
7952   printf("  Just-in-time compiler support: ");
7953   print_jit_target(stdout);
7954   printf("\n");
7955   }
7956 else
7957   {
7958   printf("  No just-in-time compiler support\n");
7959   }
7960 
7961 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
7962 print_newline_config(optval, FALSE);
7963 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
7964 printf("  \\R matches %s\n",
7965   (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
7966                                  "all Unicode newlines");
7967 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
7968 printf("  \\C is %ssupported\n", optval? "not ":"");
7969 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
7970 printf("  Internal link size = %d\n", optval);
7971 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
7972 printf("  Parentheses nest limit = %d\n", optval);
7973 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
7974 printf("  Default heap limit = %d\n", optval);
7975 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
7976 printf("  Default match limit = %d\n", optval);
7977 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
7978 printf("  Default depth limit = %d\n", optval);
7979 return 0;
7980 }
7981 
7982 
7983 
7984 /*************************************************
7985 *              Display one modifier              *
7986 *************************************************/
7987 
7988 static void
display_one_modifier(modstruct * m,BOOL for_pattern)7989 display_one_modifier(modstruct *m, BOOL for_pattern)
7990 {
7991 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
7992   '*' : ' ';
7993 printf("%c%s", c, m->name);
7994 }
7995 
7996 
7997 
7998 /*************************************************
7999 *       Display pattern or subject modifiers     *
8000 *************************************************/
8001 
8002 /* In order to print in two columns, first scan without printing to get a list
8003 of the modifiers that are required.
8004 
8005 Arguments:
8006   for_pattern   TRUE for pattern modifiers, FALSE for subject modifiers
8007   title         string to be used in title
8008 
8009 Returns:        nothing
8010 */
8011 
8012 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8013 display_selected_modifiers(BOOL for_pattern, const char *title)
8014 {
8015 uint32_t i, j;
8016 uint32_t n = 0;
8017 uint32_t list[MODLISTCOUNT];
8018 
8019 for (i = 0; i < MODLISTCOUNT; i++)
8020   {
8021   BOOL is_pattern = TRUE;
8022   modstruct *m = modlist + i;
8023 
8024   switch (m->which)
8025     {
8026     case MOD_CTC:       /* Compile context */
8027     case MOD_PAT:       /* Pattern */
8028     case MOD_PATP:      /* Pattern, OK for Perl-compatible test */
8029     break;
8030 
8031     /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8032     subjects, but can be given with a pattern. We list them as subject
8033     modifiers, but marked with an asterisk.*/
8034 
8035     case MOD_CTM:       /* Match context */
8036     case MOD_DAT:       /* Subject line */
8037     case MOD_PND:       /* As PD, but not default pattern */
8038     case MOD_PNDP:      /* As PND, OK for Perl-compatible test */
8039     is_pattern = FALSE;
8040     break;
8041 
8042     default: printf("** Unknown type for modifier '%s'\n", m->name);
8043     /* Fall through */
8044     case MOD_PD:        /* Pattern or subject */
8045     case MOD_PDP:       /* As PD, OK for Perl-compatible test */
8046     is_pattern = for_pattern;
8047     break;
8048     }
8049 
8050   if (for_pattern == is_pattern) list[n++] = i;
8051   }
8052 
8053 /* Now print from the list in two columns. */
8054 
8055 printf("-------------- %s MODIFIERS --------------\n", title);
8056 
8057 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8058   {
8059   modstruct *m = modlist + list[i];
8060   display_one_modifier(m, for_pattern);
8061   if (j < n)
8062     {
8063     uint32_t k = 27 - strlen(m->name);
8064     while (k-- > 0) printf(" ");
8065     display_one_modifier(modlist + list[j], for_pattern);
8066     }
8067   printf("\n");
8068   }
8069 }
8070 
8071 
8072 
8073 /*************************************************
8074 *          Display the list of modifiers         *
8075 *************************************************/
8076 
8077 static void
display_modifiers(void)8078 display_modifiers(void)
8079 {
8080 printf(
8081   "An asterisk on a subject modifier means that it may be given on a pattern\n"
8082   "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8083   "that are listed for both patterns and subjects have different effects in\n"
8084   "each case.\n\n");
8085 display_selected_modifiers(TRUE, "PATTERN");
8086 printf("\n");
8087 display_selected_modifiers(FALSE, "SUBJECT");
8088 }
8089 
8090 
8091 
8092 /*************************************************
8093 *                Main Program                    *
8094 *************************************************/
8095 
8096 int
main(int argc,char ** argv)8097 main(int argc, char **argv)
8098 {
8099 uint32_t temp;
8100 uint32_t yield = 0;
8101 uint32_t op = 1;
8102 BOOL notdone = TRUE;
8103 BOOL quiet = FALSE;
8104 BOOL showtotaltimes = FALSE;
8105 BOOL skipping = FALSE;
8106 char *arg_subject = NULL;
8107 char *arg_pattern = NULL;
8108 char *arg_error = NULL;
8109 
8110 /* The offsets to the options and control bits fields of the pattern and data
8111 control blocks must be the same so that common options and controls such as
8112 "anchored" or "memory" can work for either of them from a single table entry.
8113 We cannot test this till runtime because "offsetof" does not work in the
8114 preprocessor. */
8115 
8116 if (PO(options) != DO(options) || PO(control) != DO(control) ||
8117     PO(control2) != DO(control2))
8118   {
8119   fprintf(stderr, "** Coding error: "
8120     "options and control offsets for pattern and data must be the same.\n");
8121   return 1;
8122   }
8123 
8124 /* Get the PCRE2 and Unicode version number and JIT target information, at the
8125 same time checking that a request for the length gives the same answer. Also
8126 check lengths for non-string items. */
8127 
8128 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
8129     PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
8130 
8131     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
8132     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
8133 
8134     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
8135     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
8136 
8137     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
8138     PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
8139   {
8140   fprintf(stderr, "** Error in pcre2_config(): bad length\n");
8141   return 1;
8142   }
8143 
8144 /* Check that bad options are diagnosed. */
8145 
8146 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
8147     PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
8148   {
8149   fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
8150   return 1;
8151   }
8152 
8153 /* This configuration option is now obsolete, but running a quick check ensures
8154 that its code is covered. */
8155 
8156 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
8157 
8158 /* Get buffers from malloc() so that valgrind will check their misuse when
8159 debugging. They grow automatically when very long lines are read. The 16-
8160 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
8161 
8162 buffer = (uint8_t *)malloc(pbuffer8_size);
8163 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
8164 
8165 /* The following  _setmode() stuff is some Windows magic that tells its runtime
8166 library to translate CRLF into a single LF character. At least, that's what
8167 I've been told: never having used Windows I take this all on trust. Originally
8168 it set 0x8000, but then I was advised that _O_BINARY was better. */
8169 
8170 #if defined(_WIN32) || defined(WIN32)
8171 _setmode( _fileno( stdout ), _O_BINARY );
8172 #endif
8173 
8174 /* Initialization that does not depend on the running mode. */
8175 
8176 locale_name[0] = 0;
8177 
8178 memset(&def_patctl, 0, sizeof(patctl));
8179 def_patctl.convert_type = CONVERT_UNSET;
8180 
8181 memset(&def_datctl, 0, sizeof(datctl));
8182 def_datctl.oveccount = DEFAULT_OVECCOUNT;
8183 def_datctl.copy_numbers[0] = -1;
8184 def_datctl.get_numbers[0] = -1;
8185 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
8186 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
8187 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
8188 
8189 /* Scan command line options. */
8190 
8191 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
8192   {
8193   char *endptr;
8194   char *arg = argv[op];
8195   unsigned long uli;
8196 
8197   /* List modifiers and exit. */
8198 
8199   if (strcmp(arg, "-LM") == 0)
8200     {
8201     display_modifiers();
8202     goto EXIT;
8203     }
8204 
8205   /* Display and/or set return code for configuration options. */
8206 
8207   if (strcmp(arg, "-C") == 0)
8208     {
8209     yield = c_option(argv[op + 1]);
8210     goto EXIT;
8211     }
8212 
8213   /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
8214   and 32-bit modes because that won't happen naturally when 8-bit is also
8215   configured. Also call some other functions that are not otherwise used. This
8216   means that a coverage report won't claim there are uncalled functions. */
8217 
8218   if (strcmp(arg, "-8") == 0)
8219     {
8220 #ifdef SUPPORT_PCRE2_8
8221     test_mode = PCRE8_MODE;
8222     (void)pcre2_set_bsr_8(pat_context8, 999);
8223     (void)pcre2_set_newline_8(pat_context8, 999);
8224 #else
8225     fprintf(stderr,
8226       "** This version of PCRE2 was built without 8-bit support\n");
8227     exit(1);
8228 #endif
8229     }
8230 
8231   else if (strcmp(arg, "-16") == 0)
8232     {
8233 #ifdef SUPPORT_PCRE2_16
8234     test_mode = PCRE16_MODE;
8235     (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
8236     (void)pcre2_set_bsr_16(pat_context16, 999);
8237     (void)pcre2_set_newline_16(pat_context16, 999);
8238 #else
8239     fprintf(stderr,
8240       "** This version of PCRE2 was built without 16-bit support\n");
8241     exit(1);
8242 #endif
8243     }
8244 
8245   else if (strcmp(arg, "-32") == 0)
8246     {
8247 #ifdef SUPPORT_PCRE2_32
8248     test_mode = PCRE32_MODE;
8249     (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
8250     (void)pcre2_set_bsr_32(pat_context32, 999);
8251     (void)pcre2_set_newline_32(pat_context32, 999);
8252 #else
8253     fprintf(stderr,
8254       "** This version of PCRE2 was built without 32-bit support\n");
8255     exit(1);
8256 #endif
8257     }
8258 
8259   /* Set quiet (no version verification) */
8260 
8261   else if (strcmp(arg, "-q") == 0) quiet = TRUE;
8262 
8263   /* Set system stack size */
8264 
8265   else if (strcmp(arg, "-S") == 0 && argc > 2 &&
8266       ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
8267     {
8268 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
8269     fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
8270     exit(1);
8271 #else
8272     int rc;
8273     uint32_t stack_size;
8274     struct rlimit rlim;
8275     if (U32OVERFLOW(uli))
8276       {
8277       fprintf(stderr, "** Argument for -S is too big\n");
8278       exit(1);
8279       }
8280     stack_size = (uint32_t)uli;
8281     getrlimit(RLIMIT_STACK, &rlim);
8282     rlim.rlim_cur = stack_size * 1024 * 1024;
8283     if (rlim.rlim_cur > rlim.rlim_max)
8284       {
8285       fprintf(stderr,
8286         "pcre2test: requested stack size %luMiB is greater than hard limit "
8287           "%luMiB\n", (unsigned long int)stack_size,
8288           (unsigned long int)(rlim.rlim_max));
8289       exit(1);
8290       }
8291     rc = setrlimit(RLIMIT_STACK, &rlim);
8292     if (rc != 0)
8293       {
8294       fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
8295         (unsigned long int)stack_size, strerror(errno));
8296       exit(1);
8297       }
8298     op++;
8299     argc--;
8300 #endif
8301     }
8302 
8303   /* Set some common pattern and subject controls */
8304 
8305   else if (strcmp(arg, "-AC") == 0)
8306     {
8307     def_patctl.options |= PCRE2_AUTO_CALLOUT;
8308     def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
8309     }
8310   else if (strcmp(arg, "-ac") == 0)  def_patctl.options |= PCRE2_AUTO_CALLOUT;
8311   else if (strcmp(arg, "-b") == 0)   def_patctl.control |= CTL_FULLBINCODE;
8312   else if (strcmp(arg, "-d") == 0)   def_patctl.control |= CTL_DEBUG;
8313   else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
8314   else if (strcmp(arg, "-i") == 0)   def_patctl.control |= CTL_INFO;
8315   else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0)
8316     {
8317     if (arg[4] != 0) def_patctl.control |= CTL_JITVERIFY;
8318     def_patctl.jit = 7;  /* full & partial */
8319 #ifndef SUPPORT_JIT
8320     fprintf(stderr, "** Warning: JIT support is not available: "
8321                     "-jit[verify] calls functions that do nothing.\n");
8322 #endif
8323     }
8324 
8325   /* Set timing parameters */
8326 
8327   else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
8328            strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
8329     {
8330     int both = arg[2] == 0;
8331     showtotaltimes = arg[1] == 'T';
8332     if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
8333       {
8334       if (U32OVERFLOW(uli))
8335         {
8336         fprintf(stderr, "** Argument for %s is too big\n", arg);
8337         exit(1);
8338         }
8339       timeitm = (int)uli;
8340       op++;
8341       argc--;
8342       }
8343     else timeitm = LOOPREPEAT;
8344     if (both) timeit = timeitm;
8345     }
8346 
8347   /* Give help */
8348 
8349   else if (strcmp(arg, "-help") == 0 ||
8350            strcmp(arg, "--help") == 0)
8351     {
8352     usage();
8353     goto EXIT;
8354     }
8355 
8356   /* Show version */
8357 
8358   else if (strcmp(arg, "-version") == 0 ||
8359            strcmp(arg, "--version") == 0)
8360     {
8361     print_version(stdout);
8362     goto EXIT;
8363     }
8364 
8365   /* The following options save their data for processing once we know what
8366   the running mode is. */
8367 
8368   else if (strcmp(arg, "-error") == 0)
8369     {
8370     arg_error = argv[op+1];
8371     goto CHECK_VALUE_EXISTS;
8372     }
8373 
8374   else if (strcmp(arg, "-subject") == 0)
8375     {
8376     arg_subject = argv[op+1];
8377     goto CHECK_VALUE_EXISTS;
8378     }
8379 
8380   else if (strcmp(arg, "-pattern") == 0)
8381     {
8382     arg_pattern = argv[op+1];
8383     CHECK_VALUE_EXISTS:
8384     if (argc <= 2)
8385       {
8386       fprintf(stderr, "** Missing value for %s\n", arg);
8387       yield = 1;
8388       goto EXIT;
8389       }
8390     op++;
8391     argc--;
8392     }
8393 
8394   /* Unrecognized option */
8395 
8396   else
8397     {
8398     fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
8399     usage();
8400     yield = 1;
8401     goto EXIT;
8402     }
8403   op++;
8404   argc--;
8405   }
8406 
8407 /* If -error was present, get the error numbers, show the messages, and exit.
8408 We wait to do this until we know which mode we are in. */
8409 
8410 if (arg_error != NULL)
8411   {
8412   int len;
8413   int errcode;
8414   char *endptr;
8415 
8416 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
8417 least 128 code units, because it is used for retrieving error messages. */
8418 
8419 #ifdef SUPPORT_PCRE2_16
8420   if (test_mode == PCRE16_MODE)
8421     {
8422     pbuffer16_size = 256;
8423     pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
8424     if (pbuffer16 == NULL)
8425       {
8426       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
8427         SIZ_CAST pbuffer16_size);
8428       yield = 1;
8429       goto EXIT;
8430       }
8431     }
8432 #endif
8433 
8434 #ifdef SUPPORT_PCRE2_32
8435   if (test_mode == PCRE32_MODE)
8436     {
8437     pbuffer32_size = 512;
8438     pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
8439     if (pbuffer32 == NULL)
8440       {
8441       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
8442         SIZ_CAST pbuffer32_size);
8443       yield = 1;
8444       goto EXIT;
8445       }
8446     }
8447 #endif
8448 
8449   /* Loop along a list of error numbers. */
8450 
8451   for (;;)
8452     {
8453     errcode = strtol(arg_error, &endptr, 10);
8454     if (*endptr != 0 && *endptr != CHAR_COMMA)
8455       {
8456       fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
8457       yield = 1;
8458       goto EXIT;
8459       }
8460     printf("Error %d: ", errcode);
8461     PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
8462     if (len < 0)
8463       {
8464       switch (len)
8465         {
8466         case PCRE2_ERROR_BADDATA:
8467         printf("PCRE2_ERROR_BADDATA (unknown error number)");
8468         break;
8469 
8470         case PCRE2_ERROR_NOMEMORY:
8471         printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
8472         break;
8473 
8474         default:
8475         printf("Unexpected return (%d) from pcre2_get_error_message()", len);
8476         break;
8477         }
8478       }
8479     else
8480       {
8481       PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
8482       }
8483     printf("\n");
8484     if (*endptr == 0) goto EXIT;
8485     arg_error = endptr + 1;
8486     }
8487   /* Control never reaches here */
8488   }  /* End of -error handling */
8489 
8490 /* Initialize things that cannot be done until we know which test mode we are
8491 running in. Exercise the general context copying function, which is not
8492 otherwise used. */
8493 
8494 code_unit_size = test_mode/8;
8495 max_oveccount = DEFAULT_OVECCOUNT;
8496 
8497 /* Use macros to save a lot of duplication. */
8498 
8499 #define CREATECONTEXTS \
8500   G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
8501   G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
8502   G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
8503   G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
8504   G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
8505   G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
8506   G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
8507   G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
8508   G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
8509 
8510 #define CONTEXTTESTS \
8511   (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
8512   (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
8513   (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
8514   (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL)
8515 
8516 /* Call the appropriate functions for the current mode, and exercise some
8517 functions that are not otherwise called. */
8518 
8519 #ifdef SUPPORT_PCRE2_8
8520 #undef BITS
8521 #define BITS 8
8522 if (test_mode == PCRE8_MODE)
8523   {
8524   CREATECONTEXTS;
8525   CONTEXTTESTS;
8526   }
8527 #endif
8528 
8529 #ifdef SUPPORT_PCRE2_16
8530 #undef BITS
8531 #define BITS 16
8532 if (test_mode == PCRE16_MODE)
8533   {
8534   CREATECONTEXTS;
8535   CONTEXTTESTS;
8536   }
8537 #endif
8538 
8539 #ifdef SUPPORT_PCRE2_32
8540 #undef BITS
8541 #define BITS 32
8542 if (test_mode == PCRE32_MODE)
8543   {
8544   CREATECONTEXTS;
8545   CONTEXTTESTS;
8546   }
8547 #endif
8548 
8549 /* Set a default parentheses nest limit that is large enough to run the
8550 standard tests (this also exercises the function). */
8551 
8552 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
8553 
8554 /* Handle command line modifier settings, sending any error messages to
8555 stderr. We need to know the mode before modifying the context, and it is tidier
8556 to do them all in the same way. */
8557 
8558 outfile = stderr;
8559 if ((arg_pattern != NULL &&
8560     !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
8561     (arg_subject != NULL &&
8562     !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
8563   {
8564   yield = 1;
8565   goto EXIT;
8566   }
8567 
8568 /* Sort out the input and output files, defaulting to stdin/stdout. */
8569 
8570 infile = stdin;
8571 outfile = stdout;
8572 
8573 if (argc > 1 && strcmp(argv[op], "-") != 0)
8574   {
8575   infile = fopen(argv[op], INPUT_MODE);
8576   if (infile == NULL)
8577     {
8578     printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
8579     yield = 1;
8580     goto EXIT;
8581     }
8582   }
8583 
8584 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8585 if (INTERACTIVE(infile)) using_history();
8586 #endif
8587 
8588 if (argc > 2)
8589   {
8590   outfile = fopen(argv[op+1], OUTPUT_MODE);
8591   if (outfile == NULL)
8592     {
8593     printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
8594     yield = 1;
8595     goto EXIT;
8596     }
8597   }
8598 
8599 /* Output a heading line unless quiet, then process input lines. */
8600 
8601 if (!quiet) print_version(outfile);
8602 
8603 SET(compiled_code, NULL);
8604 
8605 #ifdef SUPPORT_PCRE2_8
8606 preg.re_pcre2_code = NULL;
8607 preg.re_match_data = NULL;
8608 #endif
8609 
8610 while (notdone)
8611   {
8612   uint8_t *p;
8613   int rc = PR_OK;
8614   BOOL expectdata = TEST(compiled_code, !=, NULL);
8615 #ifdef SUPPORT_PCRE2_8
8616   expectdata |= preg.re_pcre2_code != NULL;
8617 #endif
8618 
8619   if (extend_inputline(infile, buffer, expectdata? "data> " : "  re> ") == NULL)
8620     break;
8621   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
8622   fflush(outfile);
8623   p = buffer;
8624 
8625   /* If we have a pattern set up for testing, or we are skipping after a
8626   compile failure, a blank line terminates this test. */
8627 
8628   if (expectdata || skipping)
8629     {
8630     while (isspace(*p)) p++;
8631     if (*p == 0)
8632       {
8633 #ifdef SUPPORT_PCRE2_8
8634       if (preg.re_pcre2_code != NULL)
8635         {
8636         regfree(&preg);
8637         preg.re_pcre2_code = NULL;
8638         preg.re_match_data = NULL;
8639         }
8640 #endif  /* SUPPORT_PCRE2_8 */
8641       if (TEST(compiled_code, !=, NULL))
8642         {
8643         SUB1(pcre2_code_free, compiled_code);
8644         SET(compiled_code, NULL);
8645         }
8646       skipping = FALSE;
8647       setlocale(LC_CTYPE, "C");
8648       }
8649 
8650     /* Otherwise, if we are not skipping, and the line is not a data comment
8651     line starting with "\=", process a data line. */
8652 
8653     else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
8654       {
8655       rc = process_data();
8656       }
8657     }
8658 
8659   /* We do not have a pattern set up for testing. Lines starting with # are
8660   either comments or special commands. Blank lines are ignored. Otherwise, the
8661   line must start with a valid delimiter. It is then processed as a pattern
8662   line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
8663   valgrind, make the unused part of the buffer undefined, to catch overruns. */
8664 
8665   else if (*p == '#')
8666     {
8667     if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
8668     rc = process_command();
8669     }
8670 
8671   else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
8672     {
8673     rc = process_pattern();
8674     dfa_matched = 0;
8675     }
8676 
8677   else
8678     {
8679     while (isspace(*p)) p++;
8680     if (*p != 0)
8681       {
8682       fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
8683         *buffer);
8684       rc = PR_SKIP;
8685       }
8686     }
8687 
8688   if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
8689   else if (rc == PR_ABEND)
8690     {
8691     fprintf(outfile, "** pcre2test run abandoned\n");
8692     yield = 1;
8693     goto EXIT;
8694     }
8695   }
8696 
8697 /* Finish off a normal run. */
8698 
8699 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
8700 
8701 if (showtotaltimes)
8702   {
8703   const char *pad = "";
8704   fprintf(outfile, "--------------------------------------\n");
8705   if (timeit > 0)
8706     {
8707     fprintf(outfile, "Total compile time %.4f milliseconds\n",
8708       (((double)total_compile_time * 1000.0) / (double)timeit) /
8709         (double)CLOCKS_PER_SEC);
8710     if (total_jit_compile_time > 0)
8711       fprintf(outfile, "Total JIT compile  %.4f milliseconds\n",
8712         (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
8713           (double)CLOCKS_PER_SEC);
8714     pad = "  ";
8715     }
8716   fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
8717     (((double)total_match_time * 1000.0) / (double)timeitm) /
8718       (double)CLOCKS_PER_SEC);
8719   }
8720 
8721 
8722 EXIT:
8723 
8724 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8725 if (infile != NULL && INTERACTIVE(infile)) clear_history();
8726 #endif
8727 
8728 if (infile != NULL && infile != stdin) fclose(infile);
8729 if (outfile != NULL && outfile != stdout) fclose(outfile);
8730 
8731 free(buffer);
8732 free(dbuffer);
8733 free(pbuffer8);
8734 free(dfa_workspace);
8735 free((void *)locale_tables);
8736 PCRE2_MATCH_DATA_FREE(match_data);
8737 SUB1(pcre2_code_free, compiled_code);
8738 
8739 while(patstacknext-- > 0)
8740   {
8741   SET(compiled_code, patstack[patstacknext]);
8742   SUB1(pcre2_code_free, compiled_code);
8743   }
8744 
8745 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
8746 if (jit_stack != NULL)
8747   {
8748   PCRE2_JIT_STACK_FREE(jit_stack);
8749   }
8750 
8751 #define FREECONTEXTS \
8752   G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
8753   G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
8754   G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
8755   G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
8756   G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
8757   G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
8758   G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
8759   G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
8760 
8761 #ifdef SUPPORT_PCRE2_8
8762 #undef BITS
8763 #define BITS 8
8764 if (preg.re_pcre2_code != NULL) regfree(&preg);
8765 FREECONTEXTS;
8766 #endif
8767 
8768 #ifdef SUPPORT_PCRE2_16
8769 #undef BITS
8770 #define BITS 16
8771 free(pbuffer16);
8772 FREECONTEXTS;
8773 #endif
8774 
8775 #ifdef SUPPORT_PCRE2_32
8776 #undef BITS
8777 #define BITS 32
8778 free(pbuffer32);
8779 FREECONTEXTS;
8780 #endif
8781 
8782 #if defined(__VMS)
8783   yield = SS$_NORMAL;  /* Return values via DCL symbols */
8784 #endif
8785 
8786 return yield;
8787 }
8788 
8789 /* End of pcre2test.c */
8790