1 /*************************************************
2 *               pcre2grep program                *
3 *************************************************/
4 
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9 
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15 
16            Copyright (c) 1997-2020 University of Cambridge
17 
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21 
22     * Redistributions of source code must retain the above copyright notice,
23       this list of conditions and the following disclaimer.
24 
25     * Redistributions in binary form must reproduce the above copyright
26       notice, this list of conditions and the following disclaimer in the
27       documentation and/or other materials provided with the distribution.
28 
29     * Neither the name of the University of Cambridge nor the names of its
30       contributors may be used to endorse or promote products derived from
31       this software without specific prior written permission.
32 
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46 
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50 
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57 
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62   && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65 
66 /* Some cmake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70 
71 #ifdef __VMS
72 #include clidef
73 #include descrip
74 #include lib$routines
75 #endif
76 
77 #ifdef WIN32
78 #include <io.h>                /* For _setmode() */
79 #include <fcntl.h>             /* For _O_BINARY */
80 #endif
81 
82 #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83 #ifdef WIN32
84 #include <process.h>
85 #else
86 #include <sys/wait.h>
87 #endif
88 #endif
89 
90 #ifdef HAVE_UNISTD_H
91 #include <unistd.h>
92 #endif
93 
94 #ifdef SUPPORT_LIBZ
95 #include <zlib.h>
96 #endif
97 
98 #ifdef SUPPORT_LIBBZ2
99 #include <bzlib.h>
100 #endif
101 
102 #define PCRE2_CODE_UNIT_WIDTH 8
103 #include "pcre2.h"
104 
105 /* Older versions of MSVC lack snprintf(). This define allows for
106 warning/error-free compilation and testing with MSVC compilers back to at least
107 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108 
109 #if defined(_MSC_VER) && (_MSC_VER < 1900)
110 #define snprintf _snprintf
111 #endif
112 
113 /* VC and older compilers don't support %td or %zu, and even some that claim to
114 be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115 
116 #if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L || defined(DISABLE_PERCENT_ZT)
117 #define PTR_FORM "lu"
118 #define SIZ_FORM "lu"
119 #define SIZ_CAST (unsigned long int)
120 #else
121 #define PTR_FORM "td"
122 #define SIZ_FORM "zu"
123 #define SIZ_CAST
124 #endif
125 
126 #define FALSE 0
127 #define TRUE 1
128 
129 typedef int BOOL;
130 
131 #define DEFAULT_CAPTURE_MAX 50
132 
133 #if BUFSIZ > 8192
134 #define MAXPATLEN BUFSIZ
135 #else
136 #define MAXPATLEN 8192
137 #endif
138 
139 #define FNBUFSIZ 2048
140 #define ERRBUFSIZ 256
141 
142 /* Values for the "filenames" variable, which specifies options for file name
143 output. The order is important; it is assumed that a file name is wanted for
144 all values greater than FN_DEFAULT. */
145 
146 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
147 
148 /* File reading styles */
149 
150 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
151 
152 /* Actions for the -d and -D options */
153 
154 enum { dee_READ, dee_SKIP, dee_RECURSE };
155 enum { DEE_READ, DEE_SKIP };
156 
157 /* Actions for special processing options (flag bits) */
158 
159 #define PO_WORD_MATCH     0x0001
160 #define PO_LINE_MATCH     0x0002
161 #define PO_FIXED_STRINGS  0x0004
162 
163 /* Binary file options */
164 
165 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
166 
167 /* Return values from decode_dollar_escape() */
168 
169 enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
170 
171 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
172 environments), a warning is issued if the value of fwrite() is ignored.
173 Unfortunately, casting to (void) does not suppress the warning. To get round
174 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
175 apply to fprintf(). */
176 
177 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
178 
179 /* Under Windows, we have to set stdout to be binary, so that it does not
180 convert \r\n at the ends of output lines to \r\r\n. However, that means that
181 any messages written to stdout must have \r\n as their line terminator. This is
182 handled by using STDOUT_NL as the newline string. We also use a normal double
183 quote for the example, as single quotes aren't usually available. */
184 
185 #ifdef WIN32
186 #define STDOUT_NL     "\r\n"
187 #define STDOUT_NL_LEN  2
188 #define QUOT          "\""
189 #else
190 #define STDOUT_NL      "\n"
191 #define STDOUT_NL_LEN  1
192 #define QUOT           "'"
193 #endif
194 
195 /* This code is returned from decode_dollar_escape() when $n is encountered,
196 and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
197 point. */
198 
199 #define STDOUT_NL_CODE 0x7fffffffu
200 
201 
202 
203 /*************************************************
204 *               Global variables                 *
205 *************************************************/
206 
207 /* Jeffrey Friedl has some debugging requirements that are not part of the
208 regular code. */
209 
210 #ifdef JFRIEDL_DEBUG
211 static int S_arg = -1;
212 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
213 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
214 static const char *jfriedl_prefix = "";
215 static const char *jfriedl_postfix = "";
216 #endif
217 
218 static const char *colour_string = "1;31";
219 static const char *colour_option = NULL;
220 static const char *dee_option = NULL;
221 static const char *DEE_option = NULL;
222 static const char *locale = NULL;
223 static const char *newline_arg = NULL;
224 static const char *om_separator = NULL;
225 static const char *stdin_name = "(standard input)";
226 static const char *output_text = NULL;
227 
228 static char *main_buffer = NULL;
229 
230 static int after_context = 0;
231 static int before_context = 0;
232 static int binary_files = BIN_BINARY;
233 static int both_context = 0;
234 static int bufthird = PCRE2GREP_BUFSIZE;
235 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
236 static int bufsize = 3*PCRE2GREP_BUFSIZE;
237 static int endlinetype;
238 
239 static int count_limit = -1;  /* Not long, so that it works with OP_NUMBER */
240 static unsigned long int counts_printed = 0;
241 static unsigned long int total_count = 0;
242 
243 #ifdef WIN32
244 static int dee_action = dee_SKIP;
245 #else
246 static int dee_action = dee_READ;
247 #endif
248 
249 static int DEE_action = DEE_READ;
250 static int error_count = 0;
251 static int filenames = FN_DEFAULT;
252 
253 #ifdef SUPPORT_PCRE2GREP_JIT
254 static BOOL use_jit = TRUE;
255 #else
256 static BOOL use_jit = FALSE;
257 #endif
258 
259 static const uint8_t *character_tables = NULL;
260 
261 static uint32_t pcre2_options = 0;
262 static uint32_t extra_options = 0;
263 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
264 static uint32_t match_limit = 0;
265 static uint32_t depth_limit = 0;
266 
267 static pcre2_compile_context *compile_context;
268 static pcre2_match_context *match_context;
269 static pcre2_match_data *match_data;
270 static PCRE2_SIZE *offsets;
271 static uint32_t offset_size;
272 static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
273 
274 static BOOL count_only = FALSE;
275 static BOOL do_colour = FALSE;
276 #ifdef WIN32
277 static BOOL do_ansi = FALSE;
278 #endif
279 static BOOL file_offsets = FALSE;
280 static BOOL hyphenpending = FALSE;
281 static BOOL invert = FALSE;
282 static BOOL line_buffered = FALSE;
283 static BOOL line_offsets = FALSE;
284 static BOOL multiline = FALSE;
285 static BOOL number = FALSE;
286 static BOOL omit_zero_count = FALSE;
287 static BOOL resource_error = FALSE;
288 static BOOL quiet = FALSE;
289 static BOOL show_total_count = FALSE;
290 static BOOL silent = FALSE;
291 static BOOL utf = FALSE;
292 
293 static uint8_t utf8_buffer[8];
294 
295 
296 /* Structure for list of --only-matching capturing numbers. */
297 
298 typedef struct omstr {
299   struct omstr *next;
300   int groupnum;
301 } omstr;
302 
303 static omstr *only_matching = NULL;
304 static omstr *only_matching_last = NULL;
305 static int only_matching_count;
306 
307 /* Structure for holding the two variables that describe a number chain. */
308 
309 typedef struct omdatastr {
310   omstr **anchor;
311   omstr **lastptr;
312 } omdatastr;
313 
314 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
315 
316 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
317 
318 typedef struct fnstr {
319   struct fnstr *next;
320   char *name;
321 } fnstr;
322 
323 static fnstr *exclude_from = NULL;
324 static fnstr *exclude_from_last = NULL;
325 static fnstr *include_from = NULL;
326 static fnstr *include_from_last = NULL;
327 
328 static fnstr *file_lists = NULL;
329 static fnstr *file_lists_last = NULL;
330 static fnstr *pattern_files = NULL;
331 static fnstr *pattern_files_last = NULL;
332 
333 /* Structure for holding the two variables that describe a file name chain. */
334 
335 typedef struct fndatastr {
336   fnstr **anchor;
337   fnstr **lastptr;
338 } fndatastr;
339 
340 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
341 static fndatastr include_from_data = { &include_from, &include_from_last };
342 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
343 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
344 
345 /* Structure for pattern and its compiled form; used for matching patterns and
346 also for include/exclude patterns. */
347 
348 typedef struct patstr {
349   struct patstr *next;
350   char *string;
351   PCRE2_SIZE length;
352   pcre2_code *compiled;
353 } patstr;
354 
355 static patstr *patterns = NULL;
356 static patstr *patterns_last = NULL;
357 static patstr *include_patterns = NULL;
358 static patstr *include_patterns_last = NULL;
359 static patstr *exclude_patterns = NULL;
360 static patstr *exclude_patterns_last = NULL;
361 static patstr *include_dir_patterns = NULL;
362 static patstr *include_dir_patterns_last = NULL;
363 static patstr *exclude_dir_patterns = NULL;
364 static patstr *exclude_dir_patterns_last = NULL;
365 
366 /* Structure holding the two variables that describe a pattern chain. A pointer
367 to such structures is used for each appropriate option. */
368 
369 typedef struct patdatastr {
370   patstr **anchor;
371   patstr **lastptr;
372 } patdatastr;
373 
374 static patdatastr match_patdata = { &patterns, &patterns_last };
375 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
376 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
377 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
378 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
379 
380 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
381                                  &include_dir_patterns, &exclude_dir_patterns };
382 
383 static const char *incexname[4] = { "--include", "--exclude",
384                                     "--include-dir", "--exclude-dir" };
385 
386 /* Structure for options and list of them */
387 
388 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
389        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
390 
391 typedef struct option_item {
392   int type;
393   int one_char;
394   void *dataptr;
395   const char *long_name;
396   const char *help_text;
397 } option_item;
398 
399 /* Options without a single-letter equivalent get a negative value. This can be
400 used to identify them. */
401 
402 #define N_COLOUR       (-1)
403 #define N_EXCLUDE      (-2)
404 #define N_EXCLUDE_DIR  (-3)
405 #define N_HELP         (-4)
406 #define N_INCLUDE      (-5)
407 #define N_INCLUDE_DIR  (-6)
408 #define N_LABEL        (-7)
409 #define N_LOCALE       (-8)
410 #define N_NULL         (-9)
411 #define N_LOFFSETS     (-10)
412 #define N_FOFFSETS     (-11)
413 #define N_LBUFFER      (-12)
414 #define N_H_LIMIT      (-13)
415 #define N_M_LIMIT      (-14)
416 #define N_M_LIMIT_DEP  (-15)
417 #define N_BUFSIZE      (-16)
418 #define N_NOJIT        (-17)
419 #define N_FILE_LIST    (-18)
420 #define N_BINARY_FILES (-19)
421 #define N_EXCLUDE_FROM (-20)
422 #define N_INCLUDE_FROM (-21)
423 #define N_OM_SEPARATOR (-22)
424 #define N_MAX_BUFSIZE  (-23)
425 #define N_OM_CAPTURE   (-24)
426 
427 static option_item optionlist[] = {
428   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
429   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
430   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
431   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
432   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
433   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
434   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer starting size" },
435   { OP_NUMBER,     N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number",  "set processing buffer maximum size" },
436   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
437   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
438   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
439   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
440   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
441   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
442   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
443   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
444   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
445   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
446   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
447   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
448   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
449   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
450   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
451   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
452   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
453   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
454   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
455   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
456   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
457   { OP_SIZE,       N_H_LIMIT, &heap_limit,      "heap-limit=number",  "set PCRE2 heap limit option (kibibytes)" },
458   { OP_U32NUMBER,  N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE2 match limit option" },
459   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
460   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
461   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
462   { OP_NUMBER,     'm',      &count_limit,      "max-count=number", "stop after <number> matched lines" },
463   { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
464   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
465 #ifdef SUPPORT_PCRE2GREP_JIT
466   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
467 #else
468   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcre2grep does not support JIT" },
469 #endif
470   { OP_STRING,     'O',      &output_text,       "output=text",   "show only this text (possibly expanded)" },
471   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
472   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
473   { OP_U32NUMBER,  N_OM_CAPTURE, &capture_max,  "om-capture=n",  "set capture count for --only-matching" },
474   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
475   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
476   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
477   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
478   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
479   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
480   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
481   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
482 #ifdef JFRIEDL_DEBUG
483   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
484 #endif
485   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
486   { OP_NODATA,    't',      NULL,              "total-count",   "print total count of matching lines" },
487   { OP_NODATA,    'u',      NULL,              "utf",           "use UTF mode" },
488   { OP_NODATA,    'U',      NULL,              "utf-allow-invalid", "use UTF mode, allow for invalid code units" },
489   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
490   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
491   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
492   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
493   { OP_NODATA,    0,        NULL,               NULL,            NULL }
494 };
495 
496 /* Table of names for newline types. Must be kept in step with the definitions
497 of PCRE2_NEWLINE_xx in pcre2.h. */
498 
499 static const char *newlines[] = {
500   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
501 
502 /* UTF-8 tables  */
503 
504 const int utf8_table1[] =
505   { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
506 const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
507 
508 const int utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
509 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
510 
511 const char utf8_table4[] = {
512   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
513   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
514   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
515   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
516 
517 
518 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
519 /*************************************************
520 *    Emulated memmove() for systems without it   *
521 *************************************************/
522 
523 /* This function can make use of bcopy() if it is available. Otherwise do it by
524 steam, as there are some non-Unix environments that lack both memmove() and
525 bcopy(). */
526 
527 static void *
emulated_memmove(void * d,const void * s,size_t n)528 emulated_memmove(void *d, const void *s, size_t n)
529 {
530 #ifdef HAVE_BCOPY
531 bcopy(s, d, n);
532 return d;
533 #else
534 size_t i;
535 unsigned char *dest = (unsigned char *)d;
536 const unsigned char *src = (const unsigned char *)s;
537 if (dest > src)
538   {
539   dest += n;
540   src += n;
541   for (i = 0; i < n; ++i) *(--dest) = *(--src);
542   return (void *)dest;
543   }
544 else
545   {
546   for (i = 0; i < n; ++i) *dest++ = *src++;
547   return (void *)(dest - n);
548   }
549 #endif   /* not HAVE_BCOPY */
550 }
551 #undef memmove
552 #define memmove(d,s,n) emulated_memmove(d,s,n)
553 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
554 
555 
556 
557 /*************************************************
558 *           Convert code point to UTF-8          *
559 *************************************************/
560 
561 /* A static buffer is used. Returns the number of bytes. */
562 
563 static int
ord2utf8(uint32_t value)564 ord2utf8(uint32_t value)
565 {
566 int i, j;
567 uint8_t *utf8bytes = utf8_buffer;
568 for (i = 0; i < utf8_table1_size; i++)
569   if (value <= (uint32_t)utf8_table1[i]) break;
570 utf8bytes += i;
571 for (j = i; j > 0; j--)
572   {
573   *utf8bytes-- = 0x80 | (value & 0x3f);
574   value >>= 6;
575   }
576 *utf8bytes = utf8_table2[i] | value;
577 return i + 1;
578 }
579 
580 
581 
582 /*************************************************
583 *         Case-independent string compare        *
584 *************************************************/
585 
586 static int
strcmpic(const char * str1,const char * str2)587 strcmpic(const char *str1, const char *str2)
588 {
589 unsigned int c1, c2;
590 while (*str1 != '\0' || *str2 != '\0')
591   {
592   c1 = tolower(*str1++);
593   c2 = tolower(*str2++);
594   if (c1 != c2) return ((c1 > c2) << 1) - 1;
595   }
596 return 0;
597 }
598 
599 
600 /*************************************************
601 *         Parse GREP_COLORS                      *
602 *************************************************/
603 
604 /* Extract ms or mt from GREP_COLORS.
605 
606 Argument:  the string, possibly NULL
607 Returns:   the value of ms or mt, or NULL if neither present
608 */
609 
610 static char *
parse_grep_colors(const char * gc)611 parse_grep_colors(const char *gc)
612 {
613 static char seq[16];
614 char *col;
615 uint32_t len;
616 if (gc == NULL) return NULL;
617 col = strstr(gc, "ms=");
618 if (col == NULL) col = strstr(gc, "mt=");
619 if (col == NULL) return NULL;
620 len = 0;
621 col += 3;
622 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
623   seq[len++] = *col++;
624 seq[len] = 0;
625 return seq;
626 }
627 
628 
629 /*************************************************
630 *         Exit from the program                  *
631 *************************************************/
632 
633 /* If there has been a resource error, give a suitable message.
634 
635 Argument:  the return code
636 Returns:   does not return
637 */
638 
639 static void
pcre2grep_exit(int rc)640 pcre2grep_exit(int rc)
641 {
642 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
643 status of 1, which is not helpful. To help with this problem, define a symbol
644 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
645 therein. */
646 
647 #ifdef __VMS
648   char val_buf[4];
649   $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
650   $DESCRIPTOR(sym_val, val_buf);
651   sprintf(val_buf, "%d", rc);
652   sym_val.dsc$w_length = strlen(val_buf);
653   lib$set_symbol(&sym_nam, &sym_val);
654 #endif
655 
656 if (resource_error)
657   {
658   fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
659     "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
660     PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
661   fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
662   }
663 exit(rc);
664 }
665 
666 
667 /*************************************************
668 *          Add item to chain of patterns         *
669 *************************************************/
670 
671 /* Used to add an item onto a chain, or just return an unconnected item if the
672 "after" argument is NULL.
673 
674 Arguments:
675   s          pattern string to add
676   patlen     length of pattern
677   after      if not NULL points to item to insert after
678 
679 Returns:     new pattern block or NULL on error
680 */
681 
682 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)683 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
684 {
685 patstr *p = (patstr *)malloc(sizeof(patstr));
686 if (p == NULL)
687   {
688   fprintf(stderr, "pcre2grep: malloc failed\n");
689   pcre2grep_exit(2);
690   }
691 if (patlen > MAXPATLEN)
692   {
693   fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
694     MAXPATLEN);
695   free(p);
696   return NULL;
697   }
698 p->next = NULL;
699 p->string = s;
700 p->length = patlen;
701 p->compiled = NULL;
702 
703 if (after != NULL)
704   {
705   p->next = after->next;
706   after->next = p;
707   }
708 return p;
709 }
710 
711 
712 /*************************************************
713 *           Free chain of patterns               *
714 *************************************************/
715 
716 /* Used for several chains of patterns.
717 
718 Argument: pointer to start of chain
719 Returns:  nothing
720 */
721 
722 static void
free_pattern_chain(patstr * pc)723 free_pattern_chain(patstr *pc)
724 {
725 while (pc != NULL)
726   {
727   patstr *p = pc;
728   pc = p->next;
729   if (p->compiled != NULL) pcre2_code_free(p->compiled);
730   free(p);
731   }
732 }
733 
734 
735 /*************************************************
736 *           Free chain of file names             *
737 *************************************************/
738 
739 /*
740 Argument: pointer to start of chain
741 Returns:  nothing
742 */
743 
744 static void
free_file_chain(fnstr * fn)745 free_file_chain(fnstr *fn)
746 {
747 while (fn != NULL)
748   {
749   fnstr *f = fn;
750   fn = f->next;
751   free(f);
752   }
753 }
754 
755 
756 /*************************************************
757 *            OS-specific functions               *
758 *************************************************/
759 
760 /* These definitions are needed in all Windows environments, even those where
761 Unix-style directory scanning can be used (see below). */
762 
763 #ifdef WIN32
764 
765 #ifndef STRICT
766 # define STRICT
767 #endif
768 #ifndef WIN32_LEAN_AND_MEAN
769 # define WIN32_LEAN_AND_MEAN
770 #endif
771 
772 #include <windows.h>
773 
774 #define iswild(name) (strpbrk(name, "*?") != NULL)
775 
776 /* Convert ANSI BGR format to RGB used by Windows */
777 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
778 
779 static HANDLE hstdout;
780 static CONSOLE_SCREEN_BUFFER_INFO csbi;
781 static WORD match_colour;
782 
783 static WORD
decode_ANSI_colour(const char * cs)784 decode_ANSI_colour(const char *cs)
785 {
786 WORD result = csbi.wAttributes;
787 while (*cs)
788   {
789   if (isdigit(*cs))
790     {
791     int code = atoi(cs);
792     if (code == 1) result |= 0x08;
793     else if (code == 4) result |= 0x8000;
794     else if (code == 5) result |= 0x80;
795     else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
796     else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
797     else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
798     else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
799     /* aixterm high intensity colour codes */
800     else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
801     else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
802 
803     while (isdigit(*cs)) cs++;
804     }
805   if (*cs) cs++;
806   }
807 return result;
808 }
809 
810 
811 static void
init_colour_output()812 init_colour_output()
813 {
814 if (do_colour)
815   {
816   hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
817   /* This fails when redirected to con; try again if so. */
818   if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
819     {
820     HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
821       FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
822     GetConsoleScreenBufferInfo(hcon, &csbi);
823     CloseHandle(hcon);
824     }
825   match_colour = decode_ANSI_colour(colour_string);
826   /* No valid colour found - turn off colouring */
827   if (!match_colour) do_colour = FALSE;
828   }
829 }
830 
831 #endif  /* WIN32 */
832 
833 
834 /* The following sets of functions are defined so that they can be made system
835 specific. At present there are versions for Unix-style environments, Windows,
836 native z/OS, and "no support". */
837 
838 
839 /************* Directory scanning Unix-style and z/OS ***********/
840 
841 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
842 #include <sys/types.h>
843 #include <sys/stat.h>
844 #include <dirent.h>
845 
846 #if defined NATIVE_ZOS
847 /************* Directory and PDS/E scanning for z/OS ***********/
848 /************* z/OS looks mostly like Unix with USS ************/
849 /* However, z/OS needs the #include statements in this header */
850 #include "pcrzosfs.h"
851 /* That header is not included in the main PCRE distribution because
852    other apparatus is needed to compile pcre2grep for z/OS. The header
853    can be found in the special z/OS distribution, which is available
854    from www.zaconsultants.net or from www.cbttape.org. */
855 #endif
856 
857 typedef DIR directory_type;
858 #define FILESEP '/'
859 
860 static int
isdirectory(char * filename)861 isdirectory(char *filename)
862 {
863 struct stat statbuf;
864 if (stat(filename, &statbuf) < 0)
865   return 0;        /* In the expectation that opening as a file will fail */
866 return S_ISDIR(statbuf.st_mode);
867 }
868 
869 static directory_type *
opendirectory(char * filename)870 opendirectory(char *filename)
871 {
872 return opendir(filename);
873 }
874 
875 static char *
readdirectory(directory_type * dir)876 readdirectory(directory_type *dir)
877 {
878 for (;;)
879   {
880   struct dirent *dent = readdir(dir);
881   if (dent == NULL) return NULL;
882   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
883     return dent->d_name;
884   }
885 /* Control never reaches here */
886 }
887 
888 static void
closedirectory(directory_type * dir)889 closedirectory(directory_type *dir)
890 {
891 closedir(dir);
892 }
893 
894 
895 /************* Test for regular file, Unix-style **********/
896 
897 static int
isregfile(char * filename)898 isregfile(char *filename)
899 {
900 struct stat statbuf;
901 if (stat(filename, &statbuf) < 0)
902   return 1;        /* In the expectation that opening as a file will fail */
903 return S_ISREG(statbuf.st_mode);
904 }
905 
906 
907 #if defined NATIVE_ZOS
908 /************* Test for a terminal in z/OS **********/
909 /* isatty() does not work in a TSO environment, so always give FALSE.*/
910 
911 static BOOL
is_stdout_tty(void)912 is_stdout_tty(void)
913 {
914 return FALSE;
915 }
916 
917 static BOOL
is_file_tty(FILE * f)918 is_file_tty(FILE *f)
919 {
920 return FALSE;
921 }
922 
923 
924 /************* Test for a terminal, Unix-style **********/
925 
926 #else
927 static BOOL
is_stdout_tty(void)928 is_stdout_tty(void)
929 {
930 return isatty(fileno(stdout));
931 }
932 
933 static BOOL
is_file_tty(FILE * f)934 is_file_tty(FILE *f)
935 {
936 return isatty(fileno(f));
937 }
938 #endif
939 
940 
941 /************* Print optionally coloured match Unix-style and z/OS **********/
942 
943 static void
print_match(const void * buf,int length)944 print_match(const void *buf, int length)
945 {
946 if (length == 0) return;
947 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
948 FWRITE_IGNORE(buf, 1, length, stdout);
949 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
950 }
951 
952 /* End of Unix-style or native z/OS environment functions. */
953 
954 
955 /************* Directory scanning in Windows ***********/
956 
957 /* I (Philip Hazel) have no means of testing this code. It was contributed by
958 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
959 when it did not exist. David Byron added a patch that moved the #include of
960 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
961 */
962 
963 #elif defined WIN32
964 
965 #ifndef INVALID_FILE_ATTRIBUTES
966 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
967 #endif
968 
969 typedef struct directory_type
970 {
971 HANDLE handle;
972 BOOL first;
973 WIN32_FIND_DATA data;
974 } directory_type;
975 
976 #define FILESEP '/'
977 
978 int
isdirectory(char * filename)979 isdirectory(char *filename)
980 {
981 DWORD attr = GetFileAttributes(filename);
982 if (attr == INVALID_FILE_ATTRIBUTES)
983   return 0;
984 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
985 }
986 
987 directory_type *
opendirectory(char * filename)988 opendirectory(char *filename)
989 {
990 size_t len;
991 char *pattern;
992 directory_type *dir;
993 DWORD err;
994 len = strlen(filename);
995 pattern = (char *)malloc(len + 3);
996 dir = (directory_type *)malloc(sizeof(*dir));
997 if ((pattern == NULL) || (dir == NULL))
998   {
999   fprintf(stderr, "pcre2grep: malloc failed\n");
1000   pcre2grep_exit(2);
1001   }
1002 memcpy(pattern, filename, len);
1003 if (iswild(filename))
1004   pattern[len] = 0;
1005 else
1006   memcpy(&(pattern[len]), "\\*", 3);
1007 dir->handle = FindFirstFile(pattern, &(dir->data));
1008 if (dir->handle != INVALID_HANDLE_VALUE)
1009   {
1010   free(pattern);
1011   dir->first = TRUE;
1012   return dir;
1013   }
1014 err = GetLastError();
1015 free(pattern);
1016 free(dir);
1017 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1018 return NULL;
1019 }
1020 
1021 char *
readdirectory(directory_type * dir)1022 readdirectory(directory_type *dir)
1023 {
1024 for (;;)
1025   {
1026   if (!dir->first)
1027     {
1028     if (!FindNextFile(dir->handle, &(dir->data)))
1029       return NULL;
1030     }
1031   else
1032     {
1033     dir->first = FALSE;
1034     }
1035   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1036     return dir->data.cFileName;
1037   }
1038 #ifndef _MSC_VER
1039 return NULL;   /* Keep compiler happy; never executed */
1040 #endif
1041 }
1042 
1043 void
closedirectory(directory_type * dir)1044 closedirectory(directory_type *dir)
1045 {
1046 FindClose(dir->handle);
1047 free(dir);
1048 }
1049 
1050 
1051 /************* Test for regular file in Windows **********/
1052 
1053 /* I don't know how to do this, or if it can be done; assume all paths are
1054 regular if they are not directories. */
1055 
isregfile(char * filename)1056 int isregfile(char *filename)
1057 {
1058 return !isdirectory(filename);
1059 }
1060 
1061 
1062 /************* Test for a terminal in Windows **********/
1063 
1064 static BOOL
is_stdout_tty(void)1065 is_stdout_tty(void)
1066 {
1067 return _isatty(_fileno(stdout));
1068 }
1069 
1070 static BOOL
is_file_tty(FILE * f)1071 is_file_tty(FILE *f)
1072 {
1073 return _isatty(_fileno(f));
1074 }
1075 
1076 
1077 /************* Print optionally coloured match in Windows **********/
1078 
1079 static void
print_match(const void * buf,int length)1080 print_match(const void *buf, int length)
1081 {
1082 if (length == 0) return;
1083 if (do_colour)
1084   {
1085   if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1086     else SetConsoleTextAttribute(hstdout, match_colour);
1087   }
1088 FWRITE_IGNORE(buf, 1, length, stdout);
1089 if (do_colour)
1090   {
1091   if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1092     else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1093   }
1094 }
1095 
1096 /* End of Windows functions */
1097 
1098 
1099 /************* Directory scanning when we can't do it ***********/
1100 
1101 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1102 
1103 #else
1104 
1105 #define FILESEP 0
1106 typedef void directory_type;
1107 
isdirectory(char * filename)1108 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1109 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1110 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1111 void closedirectory(directory_type *dir) {}
1112 
1113 
1114 /************* Test for regular file when we can't do it **********/
1115 
1116 /* Assume all files are regular. */
1117 
isregfile(char * filename)1118 int isregfile(char *filename) { return 1; }
1119 
1120 
1121 /************* Test for a terminal when we can't do it **********/
1122 
1123 static BOOL
is_stdout_tty(void)1124 is_stdout_tty(void)
1125 {
1126 return FALSE;
1127 }
1128 
1129 static BOOL
is_file_tty(FILE * f)1130 is_file_tty(FILE *f)
1131 {
1132 return FALSE;
1133 }
1134 
1135 
1136 /************* Print optionally coloured match when we can't do it **********/
1137 
1138 static void
print_match(const void * buf,int length)1139 print_match(const void *buf, int length)
1140 {
1141 if (length == 0) return;
1142 FWRITE_IGNORE(buf, 1, length, stdout);
1143 }
1144 
1145 #endif  /* End of system-specific functions */
1146 
1147 
1148 
1149 #ifndef HAVE_STRERROR
1150 /*************************************************
1151 *     Provide strerror() for non-ANSI libraries  *
1152 *************************************************/
1153 
1154 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1155 in their libraries, but can provide the same facility by this simple
1156 alternative function. */
1157 
1158 extern int   sys_nerr;
1159 extern char *sys_errlist[];
1160 
1161 char *
strerror(int n)1162 strerror(int n)
1163 {
1164 if (n < 0 || n >= sys_nerr) return "unknown error number";
1165 return sys_errlist[n];
1166 }
1167 #endif /* HAVE_STRERROR */
1168 
1169 
1170 
1171 /*************************************************
1172 *                Usage function                  *
1173 *************************************************/
1174 
1175 static int
usage(int rc)1176 usage(int rc)
1177 {
1178 option_item *op;
1179 fprintf(stderr, "Usage: pcre2grep [-");
1180 for (op = optionlist; op->one_char != 0; op++)
1181   {
1182   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1183   }
1184 fprintf(stderr, "] [long options] [pattern] [files]\n");
1185 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1186   "options.\n");
1187 return rc;
1188 }
1189 
1190 
1191 
1192 /*************************************************
1193 *                Help function                   *
1194 *************************************************/
1195 
1196 static void
help(void)1197 help(void)
1198 {
1199 option_item *op;
1200 
1201 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1202 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1203 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1204 
1205 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1206 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1207 printf("All callout scripts in patterns are supported." STDOUT_NL);
1208 #else
1209 printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1210 #endif
1211 #else
1212 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1213 #endif
1214 
1215 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1216 
1217 #ifdef SUPPORT_LIBZ
1218 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1219 #endif
1220 
1221 #ifdef SUPPORT_LIBBZ2
1222 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1223 #endif
1224 
1225 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1226 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1227 #else
1228 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1229 #endif
1230 
1231 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1232 printf("Options:" STDOUT_NL);
1233 
1234 for (op = optionlist; op->one_char != 0; op++)
1235   {
1236   int n;
1237   char s[4];
1238 
1239   if (op->one_char > 0 && (op->long_name)[0] == 0)
1240     n = 31 - printf("  -%c", op->one_char);
1241   else
1242     {
1243     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1244       else strcpy(s, "   ");
1245     n = 31 - printf("  %s --%s", s, op->long_name);
1246     }
1247 
1248   if (n < 1) n = 1;
1249   printf("%.*s%s" STDOUT_NL, n, "                           ", op->help_text);
1250   }
1251 
1252 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1253 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1254 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1255 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1256 printf("space is removed and blank lines are ignored." STDOUT_NL);
1257 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1258 
1259 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1260 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1261 }
1262 
1263 
1264 
1265 /*************************************************
1266 *            Test exclude/includes               *
1267 *************************************************/
1268 
1269 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1270 there are no includes, the path must match an include pattern.
1271 
1272 Arguments:
1273   path      the path to be matched
1274   ip        the chain of include patterns
1275   ep        the chain of exclude patterns
1276 
1277 Returns:    TRUE if the path is not excluded
1278 */
1279 
1280 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1281 test_incexc(char *path, patstr *ip, patstr *ep)
1282 {
1283 int plen = strlen((const char *)path);
1284 
1285 for (; ep != NULL; ep = ep->next)
1286   {
1287   if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1288     return FALSE;
1289   }
1290 
1291 if (ip == NULL) return TRUE;
1292 
1293 for (; ip != NULL; ip = ip->next)
1294   {
1295   if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1296     return TRUE;
1297   }
1298 
1299 return FALSE;
1300 }
1301 
1302 
1303 
1304 /*************************************************
1305 *         Decode integer argument value          *
1306 *************************************************/
1307 
1308 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1309 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1310 just keep it simple.
1311 
1312 Arguments:
1313   option_data   the option data string
1314   op            the option item (for error messages)
1315   longop        TRUE if option given in long form
1316 
1317 Returns:        a long integer
1318 */
1319 
1320 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1321 decode_number(char *option_data, option_item *op, BOOL longop)
1322 {
1323 unsigned long int n = 0;
1324 char *endptr = option_data;
1325 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1326 while (isdigit((unsigned char)(*endptr)))
1327   n = n * 10 + (int)(*endptr++ - '0');
1328 if (toupper(*endptr) == 'K')
1329   {
1330   n *= 1024;
1331   endptr++;
1332   }
1333 else if (toupper(*endptr) == 'M')
1334   {
1335   n *= 1024*1024;
1336   endptr++;
1337   }
1338 
1339 if (*endptr != 0)   /* Error */
1340   {
1341   if (longop)
1342     {
1343     char *equals = strchr(op->long_name, '=');
1344     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1345       (int)(equals - op->long_name);
1346     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1347       option_data, nlen, op->long_name);
1348     }
1349   else
1350     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1351       option_data, op->one_char);
1352   pcre2grep_exit(usage(2));
1353   }
1354 
1355 return n;
1356 }
1357 
1358 
1359 
1360 /*************************************************
1361 *       Add item to a chain of numbers           *
1362 *************************************************/
1363 
1364 /* Used to add an item onto a chain, or just return an unconnected item if the
1365 "after" argument is NULL.
1366 
1367 Arguments:
1368   n          the number to add
1369   after      if not NULL points to item to insert after
1370 
1371 Returns:     new number block
1372 */
1373 
1374 static omstr *
add_number(int n,omstr * after)1375 add_number(int n, omstr *after)
1376 {
1377 omstr *om = (omstr *)malloc(sizeof(omstr));
1378 
1379 if (om == NULL)
1380   {
1381   fprintf(stderr, "pcre2grep: malloc failed\n");
1382   pcre2grep_exit(2);
1383   }
1384 om->next = NULL;
1385 om->groupnum = n;
1386 
1387 if (after != NULL)
1388   {
1389   om->next = after->next;
1390   after->next = om;
1391   }
1392 return om;
1393 }
1394 
1395 
1396 
1397 /*************************************************
1398 *            Read one line of input              *
1399 *************************************************/
1400 
1401 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1402 BZ2_read) into a large buffer, so many lines may be read at once. However,
1403 doing this for tty input means that no output appears until a lot of input has
1404 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1405 for this, because it does not stop at a binary zero, and therefore there is no
1406 way of telling how many characters it has read, because there may be binary
1407 zeros embedded in the data. This function is also used for reading patterns
1408 from files (the -f option).
1409 
1410 Arguments:
1411   buffer     the buffer to read into
1412   length     the maximum number of characters to read
1413   f          the file
1414 
1415 Returns:     the number of characters read, zero at end of file
1416 */
1417 
1418 static PCRE2_SIZE
read_one_line(char * buffer,int length,FILE * f)1419 read_one_line(char *buffer, int length, FILE *f)
1420 {
1421 int c;
1422 int yield = 0;
1423 while ((c = fgetc(f)) != EOF)
1424   {
1425   buffer[yield++] = c;
1426   if (c == '\n' || yield >= length) break;
1427   }
1428 return yield;
1429 }
1430 
1431 
1432 
1433 /*************************************************
1434 *             Find end of line                   *
1435 *************************************************/
1436 
1437 /* The length of the endline sequence that is found is set via lenptr. This may
1438 be zero at the very end of the file if there is no line-ending sequence there.
1439 
1440 Arguments:
1441   p         current position in line
1442   endptr    end of available data
1443   lenptr    where to put the length of the eol sequence
1444 
1445 Returns:    pointer after the last byte of the line,
1446             including the newline byte(s)
1447 */
1448 
1449 static char *
end_of_line(char * p,char * endptr,int * lenptr)1450 end_of_line(char *p, char *endptr, int *lenptr)
1451 {
1452 switch(endlinetype)
1453   {
1454   default:      /* Just in case */
1455   case PCRE2_NEWLINE_LF:
1456   while (p < endptr && *p != '\n') p++;
1457   if (p < endptr)
1458     {
1459     *lenptr = 1;
1460     return p + 1;
1461     }
1462   *lenptr = 0;
1463   return endptr;
1464 
1465   case PCRE2_NEWLINE_CR:
1466   while (p < endptr && *p != '\r') p++;
1467   if (p < endptr)
1468     {
1469     *lenptr = 1;
1470     return p + 1;
1471     }
1472   *lenptr = 0;
1473   return endptr;
1474 
1475   case PCRE2_NEWLINE_NUL:
1476   while (p < endptr && *p != '\0') p++;
1477   if (p < endptr)
1478     {
1479     *lenptr = 1;
1480     return p + 1;
1481     }
1482   *lenptr = 0;
1483   return endptr;
1484 
1485   case PCRE2_NEWLINE_CRLF:
1486   for (;;)
1487     {
1488     while (p < endptr && *p != '\r') p++;
1489     if (++p >= endptr)
1490       {
1491       *lenptr = 0;
1492       return endptr;
1493       }
1494     if (*p == '\n')
1495       {
1496       *lenptr = 2;
1497       return p + 1;
1498       }
1499     }
1500   break;
1501 
1502   case PCRE2_NEWLINE_ANYCRLF:
1503   while (p < endptr)
1504     {
1505     int extra = 0;
1506     int c = *((unsigned char *)p);
1507 
1508     if (utf && c >= 0xc0)
1509       {
1510       int gcii, gcss;
1511       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1512       gcss = 6*extra;
1513       c = (c & utf8_table3[extra]) << gcss;
1514       for (gcii = 1; gcii <= extra; gcii++)
1515         {
1516         gcss -= 6;
1517         c |= (p[gcii] & 0x3f) << gcss;
1518         }
1519       }
1520 
1521     p += 1 + extra;
1522 
1523     switch (c)
1524       {
1525       case '\n':
1526       *lenptr = 1;
1527       return p;
1528 
1529       case '\r':
1530       if (p < endptr && *p == '\n')
1531         {
1532         *lenptr = 2;
1533         p++;
1534         }
1535       else *lenptr = 1;
1536       return p;
1537 
1538       default:
1539       break;
1540       }
1541     }   /* End of loop for ANYCRLF case */
1542 
1543   *lenptr = 0;  /* Must have hit the end */
1544   return endptr;
1545 
1546   case PCRE2_NEWLINE_ANY:
1547   while (p < endptr)
1548     {
1549     int extra = 0;
1550     int c = *((unsigned char *)p);
1551 
1552     if (utf && c >= 0xc0)
1553       {
1554       int gcii, gcss;
1555       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1556       gcss = 6*extra;
1557       c = (c & utf8_table3[extra]) << gcss;
1558       for (gcii = 1; gcii <= extra; gcii++)
1559         {
1560         gcss -= 6;
1561         c |= (p[gcii] & 0x3f) << gcss;
1562         }
1563       }
1564 
1565     p += 1 + extra;
1566 
1567     switch (c)
1568       {
1569       case '\n':    /* LF */
1570       case '\v':    /* VT */
1571       case '\f':    /* FF */
1572       *lenptr = 1;
1573       return p;
1574 
1575       case '\r':    /* CR */
1576       if (p < endptr && *p == '\n')
1577         {
1578         *lenptr = 2;
1579         p++;
1580         }
1581       else *lenptr = 1;
1582       return p;
1583 
1584 #ifndef EBCDIC
1585       case 0x85:    /* Unicode NEL */
1586       *lenptr = utf? 2 : 1;
1587       return p;
1588 
1589       case 0x2028:  /* Unicode LS */
1590       case 0x2029:  /* Unicode PS */
1591       *lenptr = 3;
1592       return p;
1593 #endif  /* Not EBCDIC */
1594 
1595       default:
1596       break;
1597       }
1598     }   /* End of loop for ANY case */
1599 
1600   *lenptr = 0;  /* Must have hit the end */
1601   return endptr;
1602   }     /* End of overall switch */
1603 }
1604 
1605 
1606 
1607 /*************************************************
1608 *         Find start of previous line            *
1609 *************************************************/
1610 
1611 /* This is called when looking back for before lines to print.
1612 
1613 Arguments:
1614   p         start of the subsequent line
1615   startptr  start of available data
1616 
1617 Returns:    pointer to the start of the previous line
1618 */
1619 
1620 static char *
previous_line(char * p,char * startptr)1621 previous_line(char *p, char *startptr)
1622 {
1623 switch(endlinetype)
1624   {
1625   default:      /* Just in case */
1626   case PCRE2_NEWLINE_LF:
1627   p--;
1628   while (p > startptr && p[-1] != '\n') p--;
1629   return p;
1630 
1631   case PCRE2_NEWLINE_CR:
1632   p--;
1633   while (p > startptr && p[-1] != '\n') p--;
1634   return p;
1635 
1636   case PCRE2_NEWLINE_NUL:
1637   p--;
1638   while (p > startptr && p[-1] != '\0') p--;
1639   return p;
1640 
1641   case PCRE2_NEWLINE_CRLF:
1642   for (;;)
1643     {
1644     p -= 2;
1645     while (p > startptr && p[-1] != '\n') p--;
1646     if (p <= startptr + 1 || p[-2] == '\r') return p;
1647     }
1648   /* Control can never get here */
1649 
1650   case PCRE2_NEWLINE_ANY:
1651   case PCRE2_NEWLINE_ANYCRLF:
1652   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1653   if (utf) while ((*p & 0xc0) == 0x80) p--;
1654 
1655   while (p > startptr)
1656     {
1657     unsigned int c;
1658     char *pp = p - 1;
1659 
1660     if (utf)
1661       {
1662       int extra = 0;
1663       while ((*pp & 0xc0) == 0x80) pp--;
1664       c = *((unsigned char *)pp);
1665       if (c >= 0xc0)
1666         {
1667         int gcii, gcss;
1668         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1669         gcss = 6*extra;
1670         c = (c & utf8_table3[extra]) << gcss;
1671         for (gcii = 1; gcii <= extra; gcii++)
1672           {
1673           gcss -= 6;
1674           c |= (pp[gcii] & 0x3f) << gcss;
1675           }
1676         }
1677       }
1678     else c = *((unsigned char *)pp);
1679 
1680     if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1681       {
1682       case '\n':    /* LF */
1683       case '\r':    /* CR */
1684       return p;
1685 
1686       default:
1687       break;
1688       }
1689 
1690     else switch (c)
1691       {
1692       case '\n':    /* LF */
1693       case '\v':    /* VT */
1694       case '\f':    /* FF */
1695       case '\r':    /* CR */
1696 #ifndef EBCDIC
1697       case 0x85:    /* Unicode NEL */
1698       case 0x2028:  /* Unicode LS */
1699       case 0x2029:  /* Unicode PS */
1700 #endif  /* Not EBCDIC */
1701       return p;
1702 
1703       default:
1704       break;
1705       }
1706 
1707     p = pp;  /* Back one character */
1708     }        /* End of loop for ANY case */
1709 
1710   return startptr;  /* Hit start of data */
1711   }     /* End of overall switch */
1712 }
1713 
1714 
1715 
1716 /*************************************************
1717 *              Output newline at end             *
1718 *************************************************/
1719 
1720 /* This function is called if the final line of a file has been written to
1721 stdout, but it does not have a terminating newline.
1722 
1723 Arguments:  none
1724 Returns:    nothing
1725 */
1726 
1727 static void
write_final_newline(void)1728 write_final_newline(void)
1729 {
1730 switch(endlinetype)
1731   {
1732   default:      /* Just in case */
1733   case PCRE2_NEWLINE_LF:
1734   case PCRE2_NEWLINE_ANY:
1735   case PCRE2_NEWLINE_ANYCRLF:
1736   fprintf(stdout, "\n");
1737   break;
1738 
1739   case PCRE2_NEWLINE_CR:
1740   fprintf(stdout, "\r");
1741   break;
1742 
1743   case PCRE2_NEWLINE_CRLF:
1744   fprintf(stdout, "\r\n");
1745   break;
1746 
1747   case PCRE2_NEWLINE_NUL:
1748   fprintf(stdout, "%c", 0);
1749   break;
1750   }
1751 }
1752 
1753 
1754 /*************************************************
1755 *       Print the previous "after" lines         *
1756 *************************************************/
1757 
1758 /* This is called if we are about to lose said lines because of buffer filling,
1759 and at the end of the file. The data in the line is written using fwrite() so
1760 that a binary zero does not terminate it.
1761 
1762 Arguments:
1763   lastmatchnumber   the number of the last matching line, plus one
1764   lastmatchrestart  where we restarted after the last match
1765   endptr            end of available data
1766   printname         filename for printing
1767 
1768 Returns:            nothing
1769 */
1770 
1771 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1772 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1773   char *endptr, const char *printname)
1774 {
1775 if (after_context > 0 && lastmatchnumber > 0)
1776   {
1777   int count = 0;
1778   int ellength = 0;
1779   while (lastmatchrestart < endptr && count < after_context)
1780     {
1781     char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1782     if (ellength == 0 && pp == main_buffer + bufsize) break;
1783     if (printname != NULL) fprintf(stdout, "%s-", printname);
1784     if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1785     FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1786     lastmatchrestart = pp;
1787     count++;
1788     }
1789 
1790   /* If we have printed any lines, arrange for a hyphen separator if anything
1791   else follows. Also, if the last line is the final line in the file and it had
1792   no newline, add one. */
1793 
1794   if (count > 0)
1795     {
1796     hyphenpending = TRUE;
1797     if (ellength == 0 && lastmatchrestart >= endptr)
1798       write_final_newline();
1799     }
1800   }
1801 }
1802 
1803 
1804 
1805 /*************************************************
1806 *   Apply patterns to subject till one matches   *
1807 *************************************************/
1808 
1809 /* This function is called to run through all patterns, looking for a match. It
1810 is used multiple times for the same subject when colouring is enabled, in order
1811 to find all possible matches.
1812 
1813 Arguments:
1814   matchptr     the start of the subject
1815   length       the length of the subject to match
1816   options      options for pcre_exec
1817   startoffset  where to start matching
1818   mrc          address of where to put the result of pcre2_match()
1819 
1820 Returns:      TRUE if there was a match
1821               FALSE if there was no match
1822               invert if there was a non-fatal error
1823 */
1824 
1825 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1826 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1827   PCRE2_SIZE startoffset, int *mrc)
1828 {
1829 int i;
1830 PCRE2_SIZE slen = length;
1831 patstr *p = patterns;
1832 const char *msg = "this text:\n\n";
1833 
1834 if (slen > 200)
1835   {
1836   slen = 200;
1837   msg = "text that starts:\n\n";
1838   }
1839 
1840 for (i = 1; p != NULL; p = p->next, i++)
1841   {
1842   *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1843     startoffset, options, match_data, match_context);
1844   if (*mrc >= 0) return TRUE;
1845   if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1846   fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1847   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1848   fprintf(stderr, "%s", msg);
1849   FWRITE_IGNORE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1850   fprintf(stderr, "\n\n");
1851   if (*mrc <= PCRE2_ERROR_UTF8_ERR1 &&
1852       *mrc >= PCRE2_ERROR_UTF8_ERR21)
1853     {
1854     unsigned char mbuffer[256];
1855     PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1856     (void)pcre2_get_error_message(*mrc, mbuffer, sizeof(mbuffer));
1857     fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer,
1858       SIZ_CAST startchar);
1859     }
1860   if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1861       *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1862     resource_error = TRUE;
1863   if (error_count++ > 20)
1864     {
1865     fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1866     pcre2grep_exit(2);
1867     }
1868   return invert;    /* No more matching; don't show the line again */
1869   }
1870 
1871 return FALSE;  /* No match, no errors */
1872 }
1873 
1874 
1875 
1876 /*************************************************
1877 *          Decode dollar escape sequence         *
1878 *************************************************/
1879 
1880 /* Called from various places to decode $ escapes in output strings. The escape
1881 sequences are as follows:
1882 
1883 $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1884 zero is never returned; '0' is substituted.
1885 
1886 $a returns bell.
1887 $b returns backspace.
1888 $e returns escape.
1889 $f returns form feed.
1890 $n returns newline.
1891 $r returns carriage return.
1892 $t returns tab.
1893 $v returns vertical tab.
1894 $o<digits> returns the character represented by the given octal
1895   number; up to three digits are processed.
1896 $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1897   code points.
1898 $x<digits> returns the character represented by the given hexadecimal
1899   number; up to two digits are processed.
1900 $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1901   code points.
1902 Any other character is substituted by itself. E.g: $$ is replaced by a single
1903 dollar.
1904 
1905 Arguments:
1906   begin      the start of the whole string
1907   string     points to the $
1908   callout    TRUE if in a callout (inhibits error messages)
1909   value      where to return a value
1910   last       where to return pointer to the last used character
1911 
1912 Returns:     DDE_ERROR    after a syntax error
1913              DDE_CAPTURE  if *value is a capture number
1914              DDE_CHAR     if *value is a character code
1915 */
1916 
1917 static int
decode_dollar_escape(PCRE2_SPTR begin,PCRE2_SPTR string,BOOL callout,uint32_t * value,PCRE2_SPTR * last)1918 decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1919   uint32_t *value, PCRE2_SPTR *last)
1920 {
1921 uint32_t c = 0;
1922 int base = 10;
1923 int dcount;
1924 int rc = DDE_CHAR;
1925 BOOL brace = FALSE;
1926 
1927 switch (*(++string))
1928   {
1929   case 0:   /* Syntax error: a character must be present after $. */
1930   if (!callout)
1931     fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1932       (int)(string - begin), "no character after $");
1933   *last = string;
1934   return DDE_ERROR;
1935 
1936   case '{':
1937   brace = TRUE;
1938   string++;
1939   if (!isdigit(*string))  /* Syntax error: a decimal number required. */
1940     {
1941     if (!callout)
1942       fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1943         (int)(string - begin), "decimal number expected");
1944     rc = DDE_ERROR;
1945     break;
1946     }
1947 
1948   /* Fall through */
1949 
1950   /* The maximum capture number is 65535, so any number greater than that will
1951   always be an unknown capture number. We just stop incrementing, in order to
1952   avoid overflow. */
1953 
1954   case '0': case '1': case '2': case '3': case '4':
1955   case '5': case '6': case '7': case '8': case '9':
1956   do
1957     {
1958     if (c <= 65535) c = c * 10 + (*string - '0');
1959     string++;
1960     }
1961   while (*string >= '0' && *string <= '9');
1962   string--;  /* Point to last digit */
1963 
1964   /* In a callout, capture number 0 is not available. No error can be given,
1965   so just return the character '0'. */
1966 
1967   if (callout && c == 0)
1968     {
1969     *value = '0';
1970     }
1971   else
1972     {
1973     *value = c;
1974     rc = DDE_CAPTURE;
1975     }
1976   break;
1977 
1978   /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
1979   for valid Unicode code points. */
1980 
1981   case 'o':
1982   base = 8;
1983   string++;
1984   if (*string == '{')
1985     {
1986     brace = TRUE;
1987     string++;
1988     dcount = 7;
1989     }
1990   else dcount = 3;
1991   for (; dcount > 0; dcount--)
1992     {
1993     if (*string < '0' || *string > '7') break;
1994     c = c * 8 + (*string++ - '0');
1995     }
1996   *value = c;
1997   string--;  /* Point to last digit */
1998   break;
1999 
2000   /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
2001   for valid Unicode code points. */
2002 
2003   case 'x':
2004   base = 16;
2005   string++;
2006   if (*string == '{')
2007     {
2008     brace = TRUE;
2009     string++;
2010     dcount = 6;
2011     }
2012   else dcount = 2;
2013   for (; dcount > 0; dcount--)
2014     {
2015     if (!isxdigit(*string)) break;
2016     if (*string >= '0' && *string <= '9')
2017       c = c *16 + *string++ - '0';
2018     else
2019       c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2020     }
2021   *value = c;
2022   string--;  /* Point to last digit */
2023   break;
2024 
2025   case 'a': *value = '\a'; break;
2026   case 'b': *value = '\b'; break;
2027 #ifndef EBCDIC
2028   case 'e': *value = '\033'; break;
2029 #else
2030   case 'e': *value = '\047'; break;
2031 #endif
2032   case 'f': *value = '\f'; break;
2033   case 'n': *value = STDOUT_NL_CODE; break;
2034   case 'r': *value = '\r'; break;
2035   case 't': *value = '\t'; break;
2036   case 'v': *value = '\v'; break;
2037 
2038   default: *value = *string; break;
2039   }
2040 
2041 if (brace)
2042   {
2043   c = string[1];
2044   if (c != '}')
2045     {
2046     rc = DDE_ERROR;
2047     if (!callout)
2048       {
2049       if ((base == 8 && c >= '0' && c <= '7') ||
2050           (base == 16 && isxdigit(c)))
2051         {
2052         fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2053           "too many %s digits\n", (int)(string - begin),
2054           (base == 8)? "octal" : "hex");
2055         }
2056       else
2057         {
2058         fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2059           (int)(string - begin), "missing closing brace");
2060         }
2061       }
2062     }
2063   else string++;
2064   }
2065 
2066 /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2067 
2068 if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2069   {
2070   uint32_t max = utf? 0x0010ffffu : 0xffu;
2071   if (*value > max)
2072     {
2073     if (!callout)
2074       fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2075         "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2076     rc = DDE_ERROR;
2077     }
2078   }
2079 
2080 *last = string;
2081 return rc;
2082 }
2083 
2084 
2085 
2086 /*************************************************
2087 *          Check output text for errors          *
2088 *************************************************/
2089 
2090 /* Called early, to get errors before doing anything for -O text; also called
2091 from callouts to check before outputting.
2092 
2093 Arguments:
2094   string    an --output text string
2095   callout   TRUE if in a callout (stops printing errors)
2096 
2097 Returns:    TRUE if OK, FALSE on error
2098 */
2099 
2100 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)2101 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2102 {
2103 uint32_t value;
2104 PCRE2_SPTR begin = string;
2105 
2106 for (; *string != 0; string++)
2107   {
2108   if (*string == '$' &&
2109     decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2110       return FALSE;
2111   }
2112 
2113 return TRUE;
2114 }
2115 
2116 
2117 /*************************************************
2118 *              Display output text               *
2119 *************************************************/
2120 
2121 /* Display the output text, which is assumed to have already been syntax
2122 checked. Output may contain escape sequences started by the dollar sign.
2123 
2124 Arguments:
2125   string:       the output text
2126   callout:      TRUE for the builtin callout, FALSE for --output
2127   subject       the start of the subject
2128   ovector:      capture offsets
2129   capture_top:  number of captures
2130 
2131 Returns:        TRUE if something was output, other than newline
2132                 FALSE if nothing was output, or newline was last output
2133 */
2134 
2135 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)2136 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2137   PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2138 {
2139 uint32_t value;
2140 BOOL printed = FALSE;
2141 PCRE2_SPTR begin = string;
2142 
2143 for (; *string != 0; string++)
2144   {
2145   if (*string == '$')
2146     {
2147     switch(decode_dollar_escape(begin, string, callout, &value, &string))
2148       {
2149       case DDE_CHAR:
2150       if (value == STDOUT_NL_CODE)
2151         {
2152         fprintf(stdout, STDOUT_NL);
2153         printed = FALSE;
2154         continue;
2155         }
2156       break;  /* Will print value */
2157 
2158       case DDE_CAPTURE:
2159       if (value < capture_top)
2160         {
2161         PCRE2_SIZE capturesize;
2162         value *= 2;
2163         capturesize = ovector[value + 1] - ovector[value];
2164         if (capturesize > 0)
2165           {
2166           print_match(subject + ovector[value], capturesize);
2167           printed = TRUE;
2168           }
2169         }
2170       continue;
2171 
2172       default:  /* Should not occur */
2173       break;
2174       }
2175     }
2176 
2177   else value = *string;  /* Not a $ escape */
2178 
2179   if (utf && value <= 127) fprintf(stdout, "%c", *string); else
2180     {
2181     int i;
2182     int n = ord2utf8(value);
2183     for (i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2184     }
2185 
2186   printed = TRUE;
2187   }
2188 
2189 return printed;
2190 }
2191 
2192 
2193 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2194 
2195 /*************************************************
2196 *        Parse and execute callout scripts       *
2197 *************************************************/
2198 
2199 /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2200 string block and executes the program specified by the string. The string is a
2201 list of substrings separated by pipe characters. The first substring represents
2202 the executable name, and the following substrings specify the arguments:
2203 
2204   program_name|param1|param2|...
2205 
2206 Any substring (including the program name) can contain escape sequences
2207 started by the dollar character. The escape sequences are substituted as
2208 follows:
2209 
2210   $<digits> or ${<digits>} is replaced by the captured substring of the given
2211   decimal number, which must be greater than zero. If the number is greater
2212   than the number of capturing substrings, or if the capture is unset, the
2213   replacement is empty.
2214 
2215   Any other character is substituted by itself. E.g: $$ is replaced by a single
2216   dollar or $| replaced by a pipe character.
2217 
2218 Alternatively, if string starts with pipe, the remainder is taken as an output
2219 string, same as --output. This is the only form that is supported if
2220 SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2221 separate each callout, defaulting to newline.
2222 
2223 Example:
2224 
2225   echo -e "abcde\n12345" | pcre2grep \
2226     '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2227 
2228   Output:
2229 
2230     Arg1: [a] [bcd] [d] Arg2: |a| ()
2231     abcde
2232     Arg1: [1] [234] [4] Arg2: |1| ()
2233     12345
2234 
2235 Arguments:
2236   blockptr     the callout block
2237 
2238 Returns:       currently it always returns with 0
2239 */
2240 
2241 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2242 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2243 {
2244 PCRE2_SIZE length = calloutptr->callout_string_length;
2245 PCRE2_SPTR string = calloutptr->callout_string;
2246 PCRE2_SPTR subject = calloutptr->subject;
2247 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2248 PCRE2_SIZE capture_top = calloutptr->capture_top;
2249 
2250 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2251 PCRE2_SIZE argsvectorlen = 2;
2252 PCRE2_SIZE argslen = 1;
2253 char *args;
2254 char *argsptr;
2255 char **argsvector;
2256 char **argsvectorptr;
2257 #ifndef WIN32
2258 pid_t pid;
2259 #endif
2260 int result = 0;
2261 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2262 
2263 (void)unused;   /* Avoid compiler warning */
2264 
2265 /* Only callouts with strings are supported. */
2266 
2267 if (string == NULL || length == 0) return 0;
2268 
2269 /* If there's no command, output the remainder directly. */
2270 
2271 if (*string == '|')
2272   {
2273   string++;
2274   if (!syntax_check_output_text(string, TRUE)) return 0;
2275   (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2276   return 0;
2277   }
2278 
2279 #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2280 return 0;
2281 #else
2282 
2283 /* Checking syntax and compute the number of string fragments. Callout strings
2284 are silently ignored in the event of a syntax error. */
2285 
2286 while (length > 0)
2287   {
2288   if (*string == '|')
2289     {
2290     argsvectorlen++;
2291     if (argsvectorlen > 10000) return 0;  /* Too many args */
2292     }
2293 
2294   else if (*string == '$')
2295     {
2296     uint32_t value;
2297     PCRE2_SPTR begin = string;
2298 
2299     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2300       {
2301       case DDE_CAPTURE:
2302       if (value < capture_top)
2303         {
2304         value *= 2;
2305         argslen += ovector[value + 1] - ovector[value];
2306         }
2307       argslen--;   /* Negate the effect of argslen++ below. */
2308       break;
2309 
2310       case DDE_CHAR:
2311       if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2312         else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2313       break;
2314 
2315       default:         /* Should not occur */
2316       case DDE_ERROR:
2317       return 0;
2318       }
2319 
2320     length -= (string - begin);
2321     }
2322 
2323   string++;
2324   length--;
2325   argslen++;
2326   }
2327 
2328 /* Get memory for the argument vector and its strings. */
2329 
2330 args = (char*)malloc(argslen);
2331 if (args == NULL) return 0;
2332 
2333 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2334 if (argsvector == NULL)
2335   {
2336   free(args);
2337   return 0;
2338   }
2339 
2340 /* Now reprocess the string and set up the arguments. */
2341 
2342 argsptr = args;
2343 argsvectorptr = argsvector;
2344 *argsvectorptr++ = argsptr;
2345 
2346 length = calloutptr->callout_string_length;
2347 string = calloutptr->callout_string;
2348 
2349 while (length > 0)
2350   {
2351   if (*string == '|')
2352     {
2353     *argsptr++ = '\0';
2354     *argsvectorptr++ = argsptr;
2355     }
2356 
2357   else if (*string == '$')
2358     {
2359     uint32_t value;
2360     PCRE2_SPTR begin = string;
2361 
2362     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2363       {
2364       case DDE_CAPTURE:
2365       if (value < capture_top)
2366         {
2367         PCRE2_SIZE capturesize;
2368         value *= 2;
2369         capturesize = ovector[value + 1] - ovector[value];
2370         memcpy(argsptr, subject + ovector[value], capturesize);
2371         argsptr += capturesize;
2372         }
2373       break;
2374 
2375       case DDE_CHAR:
2376       if (value == STDOUT_NL_CODE)
2377         {
2378         memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2379         argsptr += STDOUT_NL_LEN;
2380         }
2381       else if (utf && value > 127)
2382         {
2383         int n = ord2utf8(value);
2384         memcpy(argsptr, utf8_buffer, n);
2385         argsptr += n;
2386         }
2387       else
2388         {
2389         *argsptr++ = value;
2390         }
2391       break;
2392 
2393       default:         /* Even though this should not occur, the string having */
2394       case DDE_ERROR:  /* been checked above, we need to include the free() */
2395       free(args);      /* calls so that source checkers do not complain. */
2396       free(argsvector);
2397       return 0;
2398       }
2399 
2400     length -= (string - begin);
2401     }
2402 
2403   else *argsptr++ = *string;
2404 
2405   /* Advance along the string */
2406 
2407   string++;
2408   length--;
2409   }
2410 
2411 *argsptr++ = '\0';
2412 *argsvectorptr = NULL;
2413 
2414 /* Running an external command is system-dependent. Handle Windows and VMS as
2415 necessary, otherwise assume fork(). */
2416 
2417 #ifdef WIN32
2418 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2419 
2420 #elif defined __VMS
2421   {
2422   char cmdbuf[500];
2423   short i = 0;
2424   int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2425   $DESCRIPTOR(cmd, cmdbuf);
2426 
2427   cmdbuf[0] = 0;
2428   while (argsvector[i])
2429   {
2430     strcat(cmdbuf, argsvector[i]);
2431     strcat(cmdbuf, " ");
2432     i++;
2433   }
2434   cmd.dsc$w_length = strlen(cmdbuf) - 1;
2435   status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2436   if (!(status & 1)) result = 0;
2437   else result = retstat & 1 ? 0 : 1;
2438   }
2439 
2440 #else  /* Neither Windows nor VMS */
2441 pid = fork();
2442 if (pid == 0)
2443   {
2444   (void)execv(argsvector[0], argsvector);
2445   /* Control gets here if there is an error, e.g. a non-existent program */
2446   exit(1);
2447   }
2448 else if (pid > 0)
2449   (void)waitpid(pid, &result, 0);
2450 #endif  /* End Windows/VMS/other handling */
2451 
2452 free(args);
2453 free(argsvector);
2454 
2455 /* Currently negative return values are not supported, only zero (match
2456 continues) or non-zero (match fails). */
2457 
2458 return result != 0;
2459 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2460 }
2461 #endif  /* SUPPORT_PCRE2GREP_CALLOUT */
2462 
2463 
2464 
2465 /*************************************************
2466 *     Read a portion of the file into buffer     *
2467 *************************************************/
2468 
2469 static int
fill_buffer(void * handle,int frtype,char * buffer,int length,BOOL input_line_buffered)2470 fill_buffer(void *handle, int frtype, char *buffer, int length,
2471   BOOL input_line_buffered)
2472 {
2473 (void)frtype;  /* Avoid warning when not used */
2474 
2475 #ifdef SUPPORT_LIBZ
2476 if (frtype == FR_LIBZ)
2477   return gzread((gzFile)handle, buffer, length);
2478 else
2479 #endif
2480 
2481 #ifdef SUPPORT_LIBBZ2
2482 if (frtype == FR_LIBBZ2)
2483   return BZ2_bzread((BZFILE *)handle, buffer, length);
2484 else
2485 #endif
2486 
2487 return (input_line_buffered ?
2488   read_one_line(buffer, length, (FILE *)handle) :
2489   fread(buffer, 1, length, (FILE *)handle));
2490 }
2491 
2492 
2493 
2494 /*************************************************
2495 *            Grep an individual file             *
2496 *************************************************/
2497 
2498 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2499 times the value of bufthird. The matching point is never allowed to stray into
2500 the top third of the buffer, thus keeping more of the file available for
2501 context printing or for multiline scanning. For large files, the pointer will
2502 be in the middle third most of the time, so the bottom third is available for
2503 "before" context printing.
2504 
2505 Arguments:
2506   handle       the fopened FILE stream for a normal file
2507                the gzFile pointer when reading is via libz
2508                the BZFILE pointer when reading is via libbz2
2509   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2510   filename     the file name or NULL (for errors)
2511   printname    the file name if it is to be printed for each match
2512                or NULL if the file name is not to be printed
2513                it cannot be NULL if filenames[_nomatch]_only is set
2514 
2515 Returns:       0 if there was at least one match
2516                1 otherwise (no matches)
2517                2 if an overlong line is encountered
2518                3 if there is a read error on a .bz2 file
2519 */
2520 
2521 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2522 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2523 {
2524 int rc = 1;
2525 int filepos = 0;
2526 unsigned long int linenumber = 1;
2527 unsigned long int lastmatchnumber = 0;
2528 unsigned long int count = 0;
2529 long int count_matched_lines = 0;
2530 char *lastmatchrestart = main_buffer;
2531 char *ptr = main_buffer;
2532 char *endptr;
2533 PCRE2_SIZE bufflength;
2534 BOOL binary = FALSE;
2535 BOOL endhyphenpending = FALSE;
2536 BOOL lines_printed = FALSE;
2537 BOOL input_line_buffered = line_buffered;
2538 FILE *in = NULL;                    /* Ensure initialized */
2539 
2540 /* Do the first read into the start of the buffer and set up the pointer to end
2541 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2542 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2543 fail. */
2544 
2545 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2546   {
2547   in = (FILE *)handle;
2548   if (is_file_tty(in)) input_line_buffered = TRUE;
2549   }
2550 else input_line_buffered = FALSE;
2551 
2552 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2553   input_line_buffered);
2554 
2555 #ifdef SUPPORT_LIBBZ2
2556 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2;   /* Gotcha: bufflength is PCRE2_SIZE */
2557 #endif
2558 
2559 endptr = main_buffer + bufflength;
2560 
2561 /* Unless binary-files=text, see if we have a binary file. This uses the same
2562 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2563 file. However, when the newline convention is binary zero, we can't do this. */
2564 
2565 if (binary_files != BIN_TEXT)
2566   {
2567   if (endlinetype != PCRE2_NEWLINE_NUL)
2568     binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2569       != NULL;
2570   if (binary && binary_files == BIN_NOMATCH) return 1;
2571   }
2572 
2573 /* Loop while the current pointer is not at the end of the file. For large
2574 files, endptr will be at the end of the buffer when we are in the middle of the
2575 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2576 way, the buffer is shifted left and re-filled. */
2577 
2578 while (ptr < endptr)
2579   {
2580   int endlinelength;
2581   int mrc = 0;
2582   unsigned int options = 0;
2583   BOOL match;
2584   BOOL line_matched = FALSE;
2585   char *t = ptr;
2586   PCRE2_SIZE length, linelength;
2587   PCRE2_SIZE startoffset = 0;
2588 
2589   /* If the -m option set a limit for the number of matched or non-matched
2590   lines, check it here. A limit of zero means that no matching is ever done.
2591   For stdin from a file, set the file position. */
2592 
2593   if (count_limit >= 0 && count_matched_lines >= count_limit)
2594     {
2595     if (frtype == FR_PLAIN && filename == stdin_name && !is_file_tty(handle))
2596       (void)fseek(handle, (long int)filepos, SEEK_SET);
2597     rc = (count_limit == 0)? 1 : 0;
2598     break;
2599     }
2600 
2601   /* At this point, ptr is at the start of a line. We need to find the length
2602   of the subject string to pass to pcre2_match(). In multiline mode, it is the
2603   length remainder of the data in the buffer. Otherwise, it is the length of
2604   the next line, excluding the terminating newline. After matching, we always
2605   advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2606   option is used for compiling, so that any match is constrained to be in the
2607   first line. */
2608 
2609   t = end_of_line(t, endptr, &endlinelength);
2610   linelength = t - ptr - endlinelength;
2611   length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2612 
2613   /* Check to see if the line we are looking at extends right to the very end
2614   of the buffer without a line terminator. This means the line is too long to
2615   handle at the current buffer size. Until the buffer reaches its maximum size,
2616   try doubling it and reading more data. */
2617 
2618   if (endlinelength == 0 && t == main_buffer + bufsize)
2619     {
2620     if (bufthird < max_bufthird)
2621       {
2622       char *new_buffer;
2623       int new_bufthird = 2*bufthird;
2624 
2625       if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2626       new_buffer = (char *)malloc(3*new_bufthird);
2627 
2628       if (new_buffer == NULL)
2629         {
2630         fprintf(stderr,
2631           "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2632           "pcre2grep: not enough memory to increase the buffer size to %d\n",
2633           linenumber,
2634           (filename == NULL)? "" : " of file ",
2635           (filename == NULL)? "" : filename,
2636           new_bufthird);
2637         return 2;
2638         }
2639 
2640       /* Copy the data and adjust pointers to the new buffer location. */
2641 
2642       memcpy(new_buffer, main_buffer, bufsize);
2643       bufthird = new_bufthird;
2644       bufsize = 3*bufthird;
2645       ptr = new_buffer + (ptr - main_buffer);
2646       lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2647       free(main_buffer);
2648       main_buffer = new_buffer;
2649 
2650       /* Read more data into the buffer and then try to find the line ending
2651       again. */
2652 
2653       bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2654         bufsize - bufflength, input_line_buffered);
2655       endptr = main_buffer + bufflength;
2656       continue;
2657       }
2658     else
2659       {
2660       fprintf(stderr,
2661         "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2662         "pcre2grep: the maximum buffer size is %d\n"
2663         "pcre2grep: use the --max-buffer-size option to change it\n",
2664         linenumber,
2665         (filename == NULL)? "" : " of file ",
2666         (filename == NULL)? "" : filename,
2667         bufthird);
2668       return 2;
2669       }
2670     }
2671 
2672   /* Extra processing for Jeffrey Friedl's debugging. */
2673 
2674 #ifdef JFRIEDL_DEBUG
2675   if (jfriedl_XT || jfriedl_XR)
2676   {
2677 #     include <sys/time.h>
2678 #     include <time.h>
2679       struct timeval start_time, end_time;
2680       struct timezone dummy;
2681       int i;
2682 
2683       if (jfriedl_XT)
2684       {
2685           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
2686           const char *orig = ptr;
2687           ptr = malloc(newlen + 1);
2688           if (!ptr) {
2689                   printf("out of memory");
2690                   pcre2grep_exit(2);
2691           }
2692           endptr = ptr;
2693           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
2694           for (i = 0; i < jfriedl_XT; i++) {
2695                   strncpy(endptr, orig,  length);
2696                   endptr += length;
2697           }
2698           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
2699           length = newlen;
2700       }
2701 
2702       if (gettimeofday(&start_time, &dummy) != 0)
2703               perror("bad gettimeofday");
2704 
2705 
2706       for (i = 0; i < jfriedl_XR; i++)
2707           match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
2708               PCRE2_NOTEMPTY, offsets, offset_size) >= 0);
2709 
2710       if (gettimeofday(&end_time, &dummy) != 0)
2711               perror("bad gettimeofday");
2712 
2713       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
2714                       -
2715                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
2716 
2717       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
2718       return 0;
2719   }
2720 #endif
2721 
2722   /* We come back here after a match when only_matching_count is non-zero, in
2723   order to find any further matches in the same line. This applies to
2724   --only-matching, --file-offsets, and --line-offsets. */
2725 
2726   ONLY_MATCHING_RESTART:
2727 
2728   /* Run through all the patterns until one matches or there is an error other
2729   than NOMATCH. This code is in a subroutine so that it can be re-used for
2730   finding subsequent matches when colouring matched lines. After finding one
2731   match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2732   this line. */
2733 
2734   match = match_patterns(ptr, length, options, startoffset, &mrc);
2735   options = PCRE2_NOTEMPTY;
2736 
2737   /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2738   only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2739   return code - to output data lines, so that binary zeroes are treated as just
2740   another data character. */
2741 
2742   if (match != invert)
2743     {
2744     BOOL hyphenprinted = FALSE;
2745 
2746     /* We've failed if we want a file that doesn't have any matches. */
2747 
2748     if (filenames == FN_NOMATCH_ONLY) return 1;
2749 
2750     /* Remember that this line matched (for counting matched lines) */
2751 
2752     line_matched = TRUE;
2753 
2754     /* If all we want is a yes/no answer, we can return immediately. */
2755 
2756     if (quiet) return 0;
2757 
2758     /* Just count if just counting is wanted. */
2759 
2760     else if (count_only || show_total_count) count++;
2761 
2762     /* When handling a binary file and binary-files==binary, the "binary"
2763     variable will be set true (it's false in all other cases). In this
2764     situation we just want to output the file name. No need to scan further. */
2765 
2766     else if (binary)
2767       {
2768       fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2769       return 0;
2770       }
2771 
2772     /* Likewise, if all we want is a file name, there is no need to scan any
2773     more lines in the file. */
2774 
2775     else if (filenames == FN_MATCH_ONLY)
2776       {
2777       fprintf(stdout, "%s" STDOUT_NL, printname);
2778       return 0;
2779       }
2780 
2781     /* The --only-matching option prints just the substring that matched,
2782     and/or one or more captured portions of it, as long as these strings are
2783     not empty. The --file-offsets and --line-offsets options output offsets for
2784     the matching substring (all three set only_matching_count non-zero). None
2785     of these mutually exclusive options prints any context. Afterwards, adjust
2786     the start and then jump back to look for further matches in the same line.
2787     If we are in invert mode, however, nothing is printed and we do not restart
2788     - this could still be useful because the return code is set. */
2789 
2790     else if (only_matching_count != 0)
2791       {
2792       if (!invert)
2793         {
2794         PCRE2_SIZE oldstartoffset;
2795 
2796         if (printname != NULL) fprintf(stdout, "%s:", printname);
2797         if (number) fprintf(stdout, "%lu:", linenumber);
2798 
2799         /* Handle --line-offsets */
2800 
2801         if (line_offsets)
2802           fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2803             (int)(offsets[1] - offsets[0]));
2804 
2805         /* Handle --file-offsets */
2806 
2807         else if (file_offsets)
2808           fprintf(stdout, "%d,%d" STDOUT_NL,
2809             (int)(filepos + ptr + offsets[0] - ptr),
2810             (int)(offsets[1] - offsets[0]));
2811 
2812         /* Handle --output (which has already been syntax checked) */
2813 
2814         else if (output_text != NULL)
2815           {
2816           if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2817               (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2818               number)
2819             fprintf(stdout, STDOUT_NL);
2820           }
2821 
2822         /* Handle --only-matching, which may occur many times */
2823 
2824         else
2825           {
2826           BOOL printed = FALSE;
2827           omstr *om;
2828 
2829           for (om = only_matching; om != NULL; om = om->next)
2830             {
2831             int n = om->groupnum;
2832             if (n == 0 || n < mrc)
2833               {
2834               int plen = offsets[2*n + 1] - offsets[2*n];
2835               if (plen > 0)
2836                 {
2837                 if (printed && om_separator != NULL)
2838                   fprintf(stdout, "%s", om_separator);
2839                 print_match(ptr + offsets[n*2], plen);
2840                 printed = TRUE;
2841                 }
2842               }
2843             }
2844 
2845           if (printed || printname != NULL || number)
2846             fprintf(stdout, STDOUT_NL);
2847           }
2848 
2849         /* Prepare to repeat to find the next match in the line. */
2850 
2851         match = FALSE;
2852         if (line_buffered) fflush(stdout);
2853         rc = 0;                      /* Had some success */
2854 
2855         /* If the pattern contained a lookbehind that included \K, it is
2856         possible that the end of the match might be at or before the actual
2857         starting offset we have just used. In this case, start one character
2858         further on. */
2859 
2860         startoffset = offsets[1];    /* Restart after the match */
2861         oldstartoffset = pcre2_get_startchar(match_data);
2862         if (startoffset <= oldstartoffset)
2863           {
2864           if (startoffset >= length) goto END_ONE_MATCH;  /* Were at end */
2865           startoffset = oldstartoffset + 1;
2866           if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2867           }
2868 
2869         /* If the current match ended past the end of the line (only possible
2870         in multiline mode), we must move on to the line in which it did end
2871         before searching for more matches. */
2872 
2873         while (startoffset > linelength)
2874           {
2875           ptr += linelength + endlinelength;
2876           filepos += (int)(linelength + endlinelength);
2877           linenumber++;
2878           startoffset -= (int)(linelength + endlinelength);
2879           t = end_of_line(ptr, endptr, &endlinelength);
2880           linelength = t - ptr - endlinelength;
2881           length = (PCRE2_SIZE)(endptr - ptr);
2882           }
2883 
2884         goto ONLY_MATCHING_RESTART;
2885         }
2886       }
2887 
2888     /* This is the default case when none of the above options is set. We print
2889     the matching lines(s), possibly preceded and/or followed by other lines of
2890     context. */
2891 
2892     else
2893       {
2894       lines_printed = TRUE;
2895 
2896       /* See if there is a requirement to print some "after" lines from a
2897       previous match. We never print any overlaps. */
2898 
2899       if (after_context > 0 && lastmatchnumber > 0)
2900         {
2901         int ellength;
2902         int linecount = 0;
2903         char *p = lastmatchrestart;
2904 
2905         while (p < ptr && linecount < after_context)
2906           {
2907           p = end_of_line(p, ptr, &ellength);
2908           linecount++;
2909           }
2910 
2911         /* It is important to advance lastmatchrestart during this printing so
2912         that it interacts correctly with any "before" printing below. Print
2913         each line's data using fwrite() in case there are binary zeroes. */
2914 
2915         while (lastmatchrestart < p)
2916           {
2917           char *pp = lastmatchrestart;
2918           if (printname != NULL) fprintf(stdout, "%s-", printname);
2919           if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2920           pp = end_of_line(pp, endptr, &ellength);
2921           FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2922           lastmatchrestart = pp;
2923           }
2924         if (lastmatchrestart != ptr) hyphenpending = TRUE;
2925         }
2926 
2927       /* If there were non-contiguous lines printed above, insert hyphens. */
2928 
2929       if (hyphenpending)
2930         {
2931         fprintf(stdout, "--" STDOUT_NL);
2932         hyphenpending = FALSE;
2933         hyphenprinted = TRUE;
2934         }
2935 
2936       /* See if there is a requirement to print some "before" lines for this
2937       match. Again, don't print overlaps. */
2938 
2939       if (before_context > 0)
2940         {
2941         int linecount = 0;
2942         char *p = ptr;
2943 
2944         while (p > main_buffer &&
2945                (lastmatchnumber == 0 || p > lastmatchrestart) &&
2946                linecount < before_context)
2947           {
2948           linecount++;
2949           p = previous_line(p, main_buffer);
2950           }
2951 
2952         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2953           fprintf(stdout, "--" STDOUT_NL);
2954 
2955         while (p < ptr)
2956           {
2957           int ellength;
2958           char *pp = p;
2959           if (printname != NULL) fprintf(stdout, "%s-", printname);
2960           if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2961           pp = end_of_line(pp, endptr, &ellength);
2962           FWRITE_IGNORE(p, 1, pp - p, stdout);
2963           p = pp;
2964           }
2965         }
2966 
2967       /* Now print the matching line(s); ensure we set hyphenpending at the end
2968       of the file if any context lines are being output. */
2969 
2970       if (after_context > 0 || before_context > 0)
2971         endhyphenpending = TRUE;
2972 
2973       if (printname != NULL) fprintf(stdout, "%s:", printname);
2974       if (number) fprintf(stdout, "%lu:", linenumber);
2975 
2976       /* This extra option, for Jeffrey Friedl's debugging requirements,
2977       replaces the matched string, or a specific captured string if it exists,
2978       with X. When this happens, colouring is ignored. */
2979 
2980 #ifdef JFRIEDL_DEBUG
2981       if (S_arg >= 0 && S_arg < mrc)
2982         {
2983         int first = S_arg * 2;
2984         int last  = first + 1;
2985         FWRITE_IGNORE(ptr, 1, offsets[first], stdout);
2986         fprintf(stdout, "X");
2987         FWRITE_IGNORE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2988         }
2989       else
2990 #endif
2991 
2992       /* In multiline mode, or if colouring, we have to split the line(s) up
2993       and search for further matches, but not of course if the line is a
2994       non-match. In multiline mode this is necessary in case there is another
2995       match that spans the end of the current line. When colouring we want to
2996       colour all matches. */
2997 
2998       if ((multiline || do_colour) && !invert)
2999         {
3000         int plength;
3001         PCRE2_SIZE endprevious;
3002 
3003         /* The use of \K may make the end offset earlier than the start. In
3004         this situation, swap them round. */
3005 
3006         if (offsets[0] > offsets[1])
3007           {
3008           PCRE2_SIZE temp = offsets[0];
3009           offsets[0] = offsets[1];
3010           offsets[1] = temp;
3011           }
3012 
3013         FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
3014         print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3015 
3016         for (;;)
3017           {
3018           PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
3019 
3020           endprevious = offsets[1];
3021           startoffset = endprevious;  /* Advance after previous match. */
3022 
3023           /* If the pattern contained a lookbehind that included \K, it is
3024           possible that the end of the match might be at or before the actual
3025           starting offset we have just used. In this case, start one character
3026           further on. */
3027 
3028           if (startoffset <= oldstartoffset)
3029             {
3030             startoffset = oldstartoffset + 1;
3031             if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
3032             }
3033 
3034           /* If the current match ended past the end of the line (only possible
3035           in multiline mode), we must move on to the line in which it did end
3036           before searching for more matches. Because the PCRE2_FIRSTLINE option
3037           is set, the start of the match will always be before the first
3038           newline sequence. */
3039 
3040           while (startoffset > linelength + endlinelength)
3041             {
3042             ptr += linelength + endlinelength;
3043             filepos += (int)(linelength + endlinelength);
3044             linenumber++;
3045             startoffset -= (int)(linelength + endlinelength);
3046             endprevious -= (int)(linelength + endlinelength);
3047             t = end_of_line(ptr, endptr, &endlinelength);
3048             linelength = t - ptr - endlinelength;
3049             length = (PCRE2_SIZE)(endptr - ptr);
3050             }
3051 
3052           /* If startoffset is at the exact end of the line it means this
3053           complete line was the final part of the match, so there is nothing
3054           more to do. */
3055 
3056           if (startoffset == linelength + endlinelength) break;
3057 
3058           /* Otherwise, run a match from within the final line, and if found,
3059           loop for any that may follow. */
3060 
3061           if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
3062 
3063           /* The use of \K may make the end offset earlier than the start. In
3064           this situation, swap them round. */
3065 
3066           if (offsets[0] > offsets[1])
3067             {
3068             PCRE2_SIZE temp = offsets[0];
3069             offsets[0] = offsets[1];
3070             offsets[1] = temp;
3071             }
3072 
3073           FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3074           print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3075           }
3076 
3077         /* In multiline mode, we may have already printed the complete line
3078         and its line-ending characters (if they matched the pattern), so there
3079         may be no more to print. */
3080 
3081         plength = (int)((linelength + endlinelength) - endprevious);
3082         if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3083         }
3084 
3085       /* Not colouring or multiline; no need to search for further matches. */
3086 
3087       else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3088       }
3089 
3090     /* End of doing what has to be done for a match. If --line-buffered was
3091     given, flush the output. */
3092 
3093     if (line_buffered) fflush(stdout);
3094     rc = 0;    /* Had some success */
3095 
3096     /* Remember where the last match happened for after_context. We remember
3097     where we are about to restart, and that line's number. */
3098 
3099     lastmatchrestart = ptr + linelength + endlinelength;
3100     lastmatchnumber = linenumber + 1;
3101 
3102     /* If a line was printed and we are now at the end of the file and the last
3103     line had no newline, output one. */
3104 
3105     if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3106       write_final_newline();
3107     }
3108 
3109   /* For a match in multiline inverted mode (which of course did not cause
3110   anything to be printed), we have to move on to the end of the match before
3111   proceeding. */
3112 
3113   if (multiline && invert && match)
3114     {
3115     int ellength;
3116     char *endmatch = ptr + offsets[1];
3117     t = ptr;
3118     while (t < endmatch)
3119       {
3120       t = end_of_line(t, endptr, &ellength);
3121       if (t <= endmatch) linenumber++; else break;
3122       }
3123     endmatch = end_of_line(endmatch, endptr, &ellength);
3124     linelength = endmatch - ptr - ellength;
3125     }
3126 
3127   /* Advance to after the newline and increment the line number. The file
3128   offset to the current line is maintained in filepos. */
3129 
3130   END_ONE_MATCH:
3131   ptr += linelength + endlinelength;
3132   filepos += (int)(linelength + endlinelength);
3133   linenumber++;
3134 
3135   /* If there was at least one match (or a non-match, as required) in the line,
3136   increment the count for the -m option. */
3137 
3138   if (line_matched) count_matched_lines++;
3139 
3140   /* If input is line buffered, and the buffer is not yet full, read another
3141   line and add it into the buffer. */
3142 
3143   if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3144     {
3145     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
3146     bufflength += add;
3147     endptr += add;
3148     }
3149 
3150   /* If we haven't yet reached the end of the file (the buffer is full), and
3151   the current point is in the top 1/3 of the buffer, slide the buffer down by
3152   1/3 and refill it. Before we do this, if some unprinted "after" lines are
3153   about to be lost, print them. */
3154 
3155   if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3156     {
3157     if (after_context > 0 &&
3158         lastmatchnumber > 0 &&
3159         lastmatchrestart < main_buffer + bufthird)
3160       {
3161       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3162       lastmatchnumber = 0;  /* Indicates no after lines pending */
3163       }
3164 
3165     /* Now do the shuffle */
3166 
3167     (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3168     ptr -= bufthird;
3169 
3170     bufflength = 2*bufthird + fill_buffer(handle, frtype,
3171       main_buffer + 2*bufthird, bufthird, input_line_buffered);
3172     endptr = main_buffer + bufflength;
3173 
3174     /* Adjust any last match point */
3175 
3176     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3177     }
3178   }     /* Loop through the whole file */
3179 
3180 /* End of file; print final "after" lines if wanted; do_after_lines sets
3181 hyphenpending if it prints something. */
3182 
3183 if (only_matching_count == 0 && !(count_only|show_total_count))
3184   {
3185   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3186   hyphenpending |= endhyphenpending;
3187   }
3188 
3189 /* Print the file name if we are looking for those without matches and there
3190 were none. If we found a match, we won't have got this far. */
3191 
3192 if (filenames == FN_NOMATCH_ONLY)
3193   {
3194   fprintf(stdout, "%s" STDOUT_NL, printname);
3195   return 0;
3196   }
3197 
3198 /* Print the match count if wanted */
3199 
3200 if (count_only && !quiet)
3201   {
3202   if (count > 0 || !omit_zero_count)
3203     {
3204     if (printname != NULL && filenames != FN_NONE)
3205       fprintf(stdout, "%s:", printname);
3206     fprintf(stdout, "%lu" STDOUT_NL, count);
3207     counts_printed++;
3208     }
3209   }
3210 
3211 total_count += count;   /* Can be set without count_only */
3212 return rc;
3213 }
3214 
3215 
3216 
3217 /*************************************************
3218 *     Grep a file or recurse into a directory    *
3219 *************************************************/
3220 
3221 /* Given a path name, if it's a directory, scan all the files if we are
3222 recursing; if it's a file, grep it.
3223 
3224 Arguments:
3225   pathname          the path to investigate
3226   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
3227   only_one_at_top   TRUE if the path is the only one at toplevel
3228 
3229 Returns:  -1 the file/directory was skipped
3230            0 if there was at least one match
3231            1 if there were no matches
3232            2 there was some kind of error
3233 
3234 However, file opening failures are suppressed if "silent" is set.
3235 */
3236 
3237 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3238 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3239 {
3240 int rc = 1;
3241 int frtype;
3242 void *handle;
3243 char *lastcomp;
3244 FILE *in = NULL;           /* Ensure initialized */
3245 
3246 #ifdef SUPPORT_LIBZ
3247 gzFile ingz = NULL;
3248 #endif
3249 
3250 #ifdef SUPPORT_LIBBZ2
3251 BZFILE *inbz2 = NULL;
3252 #endif
3253 
3254 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3255 int pathlen;
3256 #endif
3257 
3258 #if defined NATIVE_ZOS
3259 int zos_type;
3260 FILE *zos_test_file;
3261 #endif
3262 
3263 /* If the file name is "-" we scan stdin */
3264 
3265 if (strcmp(pathname, "-") == 0)
3266   {
3267   return pcre2grep(stdin, FR_PLAIN, stdin_name,
3268     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3269       stdin_name : NULL);
3270   }
3271 
3272 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3273 directories, whereas --include and --exclude apply to everything else. The test
3274 is against the final component of the path. */
3275 
3276 lastcomp = strrchr(pathname, FILESEP);
3277 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3278 
3279 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3280 Otherwise, scan the directory and recurse for each path within it. The scanning
3281 code is localized so it can be made system-specific. */
3282 
3283 
3284 /* For z/OS, determine the file type. */
3285 
3286 #if defined NATIVE_ZOS
3287 zos_test_file =  fopen(pathname,"rb");
3288 
3289 if (zos_test_file == NULL)
3290    {
3291    if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3292      pathname, strerror(errno));
3293    return -1;
3294    }
3295 zos_type = identifyzosfiletype (zos_test_file);
3296 fclose (zos_test_file);
3297 
3298 /* Handle a PDS in separate code */
3299 
3300 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3301    {
3302    return travelonpdsdir (pathname, only_one_at_top);
3303    }
3304 
3305 /* Deal with regular files in the normal way below. These types are:
3306    zos_type == __ZOS_PDS_MEMBER
3307    zos_type == __ZOS_PS
3308    zos_type == __ZOS_VSAM_KSDS
3309    zos_type == __ZOS_VSAM_ESDS
3310    zos_type == __ZOS_VSAM_RRDS
3311 */
3312 
3313 /* Handle a z/OS directory using common code. */
3314 
3315 else if (zos_type == __ZOS_HFS)
3316  {
3317 #endif  /* NATIVE_ZOS */
3318 
3319 
3320 /* Handle directories: common code for all OS */
3321 
3322 if (isdirectory(pathname))
3323   {
3324   if (dee_action == dee_SKIP ||
3325       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3326     return -1;
3327 
3328   if (dee_action == dee_RECURSE)
3329     {
3330     char buffer[FNBUFSIZ];
3331     char *nextfile;
3332     directory_type *dir = opendirectory(pathname);
3333 
3334     if (dir == NULL)
3335       {
3336       if (!silent)
3337         fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3338           strerror(errno));
3339       return 2;
3340       }
3341 
3342     while ((nextfile = readdirectory(dir)) != NULL)
3343       {
3344       int frc;
3345       int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3346       if (fnlength > FNBUFSIZ)
3347         {
3348         fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3349         rc = 2;
3350         break;
3351         }
3352       sprintf(buffer, "%s%c%s", pathname, FILESEP, nextfile);
3353       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3354       if (frc > 1) rc = frc;
3355        else if (frc == 0 && rc == 1) rc = 0;
3356       }
3357 
3358     closedirectory(dir);
3359     return rc;
3360     }
3361   }
3362 
3363 #ifdef WIN32
3364 if (iswild(pathname))
3365   {
3366   char buffer[1024];
3367   char *nextfile;
3368   char *name;
3369   directory_type *dir = opendirectory(pathname);
3370 
3371   if (dir == NULL)
3372     return 0;
3373 
3374   for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3375     if (*nextfile == '/' || *nextfile == '\\')
3376       name = nextfile + 1;
3377   *name = 0;
3378 
3379   while ((nextfile = readdirectory(dir)) != NULL)
3380     {
3381     int frc;
3382     sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3383     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3384     if (frc > 1) rc = frc;
3385      else if (frc == 0 && rc == 1) rc = 0;
3386     }
3387 
3388   closedirectory(dir);
3389   return rc;
3390   }
3391 #endif
3392 
3393 #if defined NATIVE_ZOS
3394  }
3395 #endif
3396 
3397 /* If the file is not a directory, check for a regular file, and if it is not,
3398 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3399 exclusion. */
3400 
3401 else if (
3402 #if defined NATIVE_ZOS
3403         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3404 #else  /* all other OS */
3405         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3406 #endif
3407         !test_incexc(lastcomp, include_patterns, exclude_patterns))
3408   return -1;  /* File skipped */
3409 
3410 /* Control reaches here if we have a regular file, or if we have a directory
3411 and recursion or skipping was not requested, or if we have anything else and
3412 skipping was not requested. The scan proceeds. If this is the first and only
3413 argument at top level, we don't show the file name, unless we are only showing
3414 the file name, or the filename was forced (-H). */
3415 
3416 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3417 pathlen = (int)(strlen(pathname));
3418 #endif
3419 
3420 /* Open using zlib if it is supported and the file name ends with .gz. */
3421 
3422 #ifdef SUPPORT_LIBZ
3423 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3424   {
3425   ingz = gzopen(pathname, "rb");
3426   if (ingz == NULL)
3427     {
3428     if (!silent)
3429       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3430         strerror(errno));
3431     return 2;
3432     }
3433   handle = (void *)ingz;
3434   frtype = FR_LIBZ;
3435   }
3436 else
3437 #endif
3438 
3439 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3440 
3441 #ifdef SUPPORT_LIBBZ2
3442 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3443   {
3444   inbz2 = BZ2_bzopen(pathname, "rb");
3445   handle = (void *)inbz2;
3446   frtype = FR_LIBBZ2;
3447   }
3448 else
3449 #endif
3450 
3451 /* Otherwise use plain fopen(). The label is so that we can come back here if
3452 an attempt to read a .bz2 file indicates that it really is a plain file. */
3453 
3454 #ifdef SUPPORT_LIBBZ2
3455 PLAIN_FILE:
3456 #endif
3457   {
3458   in = fopen(pathname, "rb");
3459   handle = (void *)in;
3460   frtype = FR_PLAIN;
3461   }
3462 
3463 /* All the opening methods return errno when they fail. */
3464 
3465 if (handle == NULL)
3466   {
3467   if (!silent)
3468     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3469       strerror(errno));
3470   return 2;
3471   }
3472 
3473 /* Now grep the file */
3474 
3475 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3476   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3477 
3478 /* Close in an appropriate manner. */
3479 
3480 #ifdef SUPPORT_LIBZ
3481 if (frtype == FR_LIBZ)
3482   gzclose(ingz);
3483 else
3484 #endif
3485 
3486 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3487 read failed. If the error indicates that the file isn't in fact bzipped, try
3488 again as a normal file. */
3489 
3490 #ifdef SUPPORT_LIBBZ2
3491 if (frtype == FR_LIBBZ2)
3492   {
3493   if (rc == 3)
3494     {
3495     int errnum;
3496     const char *err = BZ2_bzerror(inbz2, &errnum);
3497     if (errnum == BZ_DATA_ERROR_MAGIC)
3498       {
3499       BZ2_bzclose(inbz2);
3500       goto PLAIN_FILE;
3501       }
3502     else if (!silent)
3503       fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3504         pathname, err);
3505     rc = 2;    /* The normal "something went wrong" code */
3506     }
3507   BZ2_bzclose(inbz2);
3508   }
3509 else
3510 #endif
3511 
3512 /* Normal file close */
3513 
3514 fclose(in);
3515 
3516 /* Pass back the yield from pcre2grep(). */
3517 
3518 return rc;
3519 }
3520 
3521 
3522 
3523 /*************************************************
3524 *    Handle a single-letter, no data option      *
3525 *************************************************/
3526 
3527 static int
handle_option(int letter,int options)3528 handle_option(int letter, int options)
3529 {
3530 switch(letter)
3531   {
3532   case N_FOFFSETS: file_offsets = TRUE; break;
3533   case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3534   case N_LBUFFER: line_buffered = TRUE; break;
3535   case N_LOFFSETS: line_offsets = number = TRUE; break;
3536   case N_NOJIT: use_jit = FALSE; break;
3537   case 'a': binary_files = BIN_TEXT; break;
3538   case 'c': count_only = TRUE; break;
3539   case 'F': options |= PCRE2_LITERAL; break;
3540   case 'H': filenames = FN_FORCE; break;
3541   case 'I': binary_files = BIN_NOMATCH; break;
3542   case 'h': filenames = FN_NONE; break;
3543   case 'i': options |= PCRE2_CASELESS; break;
3544   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3545   case 'L': filenames = FN_NOMATCH_ONLY; break;
3546   case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3547   case 'n': number = TRUE; break;
3548 
3549   case 'o':
3550   only_matching_last = add_number(0, only_matching_last);
3551   if (only_matching == NULL) only_matching = only_matching_last;
3552   break;
3553 
3554   case 'q': quiet = TRUE; break;
3555   case 'r': dee_action = dee_RECURSE; break;
3556   case 's': silent = TRUE; break;
3557   case 't': show_total_count = TRUE; break;
3558   case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3559   case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
3560   case 'v': invert = TRUE; break;
3561   case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3562   case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3563 
3564   case 'V':
3565     {
3566     unsigned char buffer[128];
3567     (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3568     fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3569     }
3570   pcre2grep_exit(0);
3571   break;
3572 
3573   default:
3574   fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3575   pcre2grep_exit(usage(2));
3576   }
3577 
3578 return options;
3579 }
3580 
3581 
3582 
3583 /*************************************************
3584 *          Construct printed ordinal             *
3585 *************************************************/
3586 
3587 /* This turns a number into "1st", "3rd", etc. */
3588 
3589 static char *
ordin(int n)3590 ordin(int n)
3591 {
3592 static char buffer[14];
3593 char *p = buffer;
3594 sprintf(p, "%d", n);
3595 while (*p != 0) p++;
3596 n %= 100;
3597 if (n >= 11 && n <= 13) n = 0;
3598 switch (n%10)
3599   {
3600   case 1: strcpy(p, "st"); break;
3601   case 2: strcpy(p, "nd"); break;
3602   case 3: strcpy(p, "rd"); break;
3603   default: strcpy(p, "th"); break;
3604   }
3605 return buffer;
3606 }
3607 
3608 
3609 
3610 /*************************************************
3611 *          Compile a single pattern              *
3612 *************************************************/
3613 
3614 /* Do nothing if the pattern has already been compiled. This is the case for
3615 include/exclude patterns read from a file.
3616 
3617 When the -F option has been used, each "pattern" may be a list of strings,
3618 separated by line breaks. They will be matched literally. We split such a
3619 string and compile the first substring, inserting an additional block into the
3620 pattern chain.
3621 
3622 Arguments:
3623   p              points to the pattern block
3624   options        the PCRE options
3625   fromfile       TRUE if the pattern was read from a file
3626   fromtext       file name or identifying text (e.g. "include")
3627   count          0 if this is the only command line pattern, or
3628                  number of the command line pattern, or
3629                  linenumber for a pattern from a file
3630 
3631 Returns:         TRUE on success, FALSE after an error
3632 */
3633 
3634 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3635 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3636   int count)
3637 {
3638 char *ps;
3639 int errcode;
3640 PCRE2_SIZE patlen, erroffset;
3641 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3642 
3643 if (p->compiled != NULL) return TRUE;
3644 ps = p->string;
3645 patlen = p->length;
3646 
3647 if ((options & PCRE2_LITERAL) != 0)
3648   {
3649   int ellength;
3650   char *eop = ps + patlen;
3651   char *pe = end_of_line(ps, eop, &ellength);
3652 
3653   if (ellength != 0)
3654     {
3655     patlen = pe - ps - ellength;
3656     if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3657     }
3658   }
3659 
3660 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3661   &erroffset, compile_context);
3662 
3663 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3664 ignore any JIT compiler errors, relying falling back to interpreting if
3665 anything goes wrong with JIT. */
3666 
3667 if (p->compiled != NULL)
3668   {
3669 #ifdef SUPPORT_PCRE2GREP_JIT
3670   if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3671 #endif
3672   return TRUE;
3673   }
3674 
3675 /* Handle compile errors */
3676 
3677 if (erroffset > patlen) erroffset = patlen;
3678 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3679 
3680 if (fromfile)
3681   {
3682   fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3683     "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3684   }
3685 else
3686   {
3687   if (count == 0)
3688     fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3689       fromtext, (int)erroffset, errmessbuffer);
3690   else
3691     fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3692       ordin(count), fromtext, (int)erroffset, errmessbuffer);
3693   }
3694 
3695 return FALSE;
3696 }
3697 
3698 
3699 
3700 /*************************************************
3701 *     Read and compile a file of patterns        *
3702 *************************************************/
3703 
3704 /* This is used for --filelist, --include-from, and --exclude-from.
3705 
3706 Arguments:
3707   name         the name of the file; "-" is stdin
3708   patptr       pointer to the pattern chain anchor
3709   patlastptr   pointer to the last pattern pointer
3710 
3711 Returns:       TRUE if all went well
3712 */
3713 
3714 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3715 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3716 {
3717 int linenumber = 0;
3718 PCRE2_SIZE patlen;
3719 FILE *f;
3720 const char *filename;
3721 char buffer[MAXPATLEN+20];
3722 
3723 if (strcmp(name, "-") == 0)
3724   {
3725   f = stdin;
3726   filename = stdin_name;
3727   }
3728 else
3729   {
3730   f = fopen(name, "r");
3731   if (f == NULL)
3732     {
3733     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3734     return FALSE;
3735     }
3736   filename = name;
3737   }
3738 
3739 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3740   {
3741   while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3742   linenumber++;
3743   if (patlen == 0) continue;   /* Skip blank lines */
3744 
3745   /* Note: this call to add_pattern() puts a pointer to the local variable
3746   "buffer" into the pattern chain. However, that pointer is used only when
3747   compiling the pattern, which happens immediately below, so we flatten it
3748   afterwards, as a precaution against any later code trying to use it. */
3749 
3750   *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3751   if (*patlastptr == NULL)
3752     {
3753     if (f != stdin) fclose(f);
3754     return FALSE;
3755     }
3756   if (*patptr == NULL) *patptr = *patlastptr;
3757 
3758   /* This loop is needed because compiling a "pattern" when -F is set may add
3759   on additional literal patterns if the original contains a newline. In the
3760   common case, it never will, because read_one_line() stops at a newline.
3761   However, the -N option can be used to give pcre2grep a different newline
3762   setting. */
3763 
3764   for(;;)
3765     {
3766     if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3767         linenumber))
3768       {
3769       if (f != stdin) fclose(f);
3770       return FALSE;
3771       }
3772     (*patlastptr)->string = NULL;            /* Insurance */
3773     if ((*patlastptr)->next == NULL) break;
3774     *patlastptr = (*patlastptr)->next;
3775     }
3776   }
3777 
3778 if (f != stdin) fclose(f);
3779 return TRUE;
3780 }
3781 
3782 
3783 
3784 /*************************************************
3785 *                Main program                    *
3786 *************************************************/
3787 
3788 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3789 
3790 int
main(int argc,char ** argv)3791 main(int argc, char **argv)
3792 {
3793 int i, j;
3794 int rc = 1;
3795 BOOL only_one_at_top;
3796 patstr *cp;
3797 fnstr *fn;
3798 omstr *om;
3799 const char *locale_from = "--locale";
3800 
3801 #ifdef SUPPORT_PCRE2GREP_JIT
3802 pcre2_jit_stack *jit_stack = NULL;
3803 #endif
3804 
3805 /* In Windows, stdout is set up as a text stream, which means that \n is
3806 converted to \r\n. This causes output lines that are copied from the input to
3807 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3808 that stdout is a binary stream. Note that this means all other output to stdout
3809 must use STDOUT_NL to terminate lines. */
3810 
3811 #ifdef WIN32
3812 _setmode(_fileno(stdout), _O_BINARY);
3813 #endif
3814 
3815 /* Process the options */
3816 
3817 for (i = 1; i < argc; i++)
3818   {
3819   option_item *op = NULL;
3820   char *option_data = (char *)"";    /* default to keep compiler happy */
3821   BOOL longop;
3822   BOOL longopwasequals = FALSE;
3823 
3824   if (argv[i][0] != '-') break;
3825 
3826   /* If we hit an argument that is just "-", it may be a reference to STDIN,
3827   but only if we have previously had -e or -f to define the patterns. */
3828 
3829   if (argv[i][1] == 0)
3830     {
3831     if (pattern_files != NULL || patterns != NULL) break;
3832       else pcre2grep_exit(usage(2));
3833     }
3834 
3835   /* Handle a long name option, or -- to terminate the options */
3836 
3837   if (argv[i][1] == '-')
3838     {
3839     char *arg = argv[i] + 2;
3840     char *argequals = strchr(arg, '=');
3841 
3842     if (*arg == 0)    /* -- terminates options */
3843       {
3844       i++;
3845       break;                /* out of the options-handling loop */
3846       }
3847 
3848     longop = TRUE;
3849 
3850     /* Some long options have data that follows after =, for example file=name.
3851     Some options have variations in the long name spelling: specifically, we
3852     allow "regexp" because GNU grep allows it, though I personally go along
3853     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3854     These options are entered in the table as "regex(p)". Options can be in
3855     both these categories. */
3856 
3857     for (op = optionlist; op->one_char != 0; op++)
3858       {
3859       char *opbra = strchr(op->long_name, '(');
3860       char *equals = strchr(op->long_name, '=');
3861 
3862       /* Handle options with only one spelling of the name */
3863 
3864       if (opbra == NULL)     /* Does not contain '(' */
3865         {
3866         if (equals == NULL)  /* Not thing=data case */
3867           {
3868           if (strcmp(arg, op->long_name) == 0) break;
3869           }
3870         else                 /* Special case xxx=data */
3871           {
3872           int oplen = (int)(equals - op->long_name);
3873           int arglen = (argequals == NULL)?
3874             (int)strlen(arg) : (int)(argequals - arg);
3875           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3876             {
3877             option_data = arg + arglen;
3878             if (*option_data == '=')
3879               {
3880               option_data++;
3881               longopwasequals = TRUE;
3882               }
3883             break;
3884             }
3885           }
3886         }
3887 
3888       /* Handle options with an alternate spelling of the name */
3889 
3890       else
3891         {
3892         char buff1[24];
3893         char buff2[24];
3894         int ret;
3895 
3896         int baselen = (int)(opbra - op->long_name);
3897         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3898         int arglen = (argequals == NULL || equals == NULL)?
3899           (int)strlen(arg) : (int)(argequals - arg);
3900 
3901         if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3902              ret < 0 || ret > (int)sizeof(buff1)) ||
3903             (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3904                      fulllen - baselen - 2, opbra + 1),
3905              ret < 0 || ret > (int)sizeof(buff2)))
3906           {
3907           fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3908             op->long_name);
3909           pcre2grep_exit(2);
3910           }
3911 
3912         if (strncmp(arg, buff1, arglen) == 0 ||
3913            strncmp(arg, buff2, arglen) == 0)
3914           {
3915           if (equals != NULL && argequals != NULL)
3916             {
3917             option_data = argequals;
3918             if (*option_data == '=')
3919               {
3920               option_data++;
3921               longopwasequals = TRUE;
3922               }
3923             }
3924           break;
3925           }
3926         }
3927       }
3928 
3929     if (op->one_char == 0)
3930       {
3931       fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3932       pcre2grep_exit(usage(2));
3933       }
3934     }
3935 
3936   /* Jeffrey Friedl's debugging harness uses these additional options which
3937   are not in the right form for putting in the option table because they use
3938   only one hyphen, yet are more than one character long. By putting them
3939   separately here, they will not get displayed as part of the help() output,
3940   but I don't think Jeffrey will care about that. */
3941 
3942 #ifdef JFRIEDL_DEBUG
3943   else if (strcmp(argv[i], "-pre") == 0) {
3944           jfriedl_prefix = argv[++i];
3945           continue;
3946   } else if (strcmp(argv[i], "-post") == 0) {
3947           jfriedl_postfix = argv[++i];
3948           continue;
3949   } else if (strcmp(argv[i], "-XT") == 0) {
3950           sscanf(argv[++i], "%d", &jfriedl_XT);
3951           continue;
3952   } else if (strcmp(argv[i], "-XR") == 0) {
3953           sscanf(argv[++i], "%d", &jfriedl_XR);
3954           continue;
3955   }
3956 #endif
3957 
3958 
3959   /* One-char options; many that have no data may be in a single argument; we
3960   continue till we hit the last one or one that needs data. */
3961 
3962   else
3963     {
3964     char *s = argv[i] + 1;
3965     longop = FALSE;
3966 
3967     while (*s != 0)
3968       {
3969       for (op = optionlist; op->one_char != 0; op++)
3970         {
3971         if (*s == op->one_char) break;
3972         }
3973       if (op->one_char == 0)
3974         {
3975         fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3976           *s, argv[i]);
3977         pcre2grep_exit(usage(2));
3978         }
3979 
3980       option_data = s+1;
3981 
3982       /* Break out if this is the last character in the string; it's handled
3983       below like a single multi-char option. */
3984 
3985       if (*option_data == 0) break;
3986 
3987       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3988       are used for ones that either have a numerical number or defaults, i.e.
3989       the data is optional. If a digit follows, there is data; if not, carry on
3990       with other single-character options in the same string. */
3991 
3992       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3993         {
3994         if (isdigit((unsigned char)s[1])) break;
3995         }
3996       else   /* Check for an option with data */
3997         {
3998         if (op->type != OP_NODATA) break;
3999         }
4000 
4001       /* Handle a single-character option with no data, then loop for the
4002       next character in the string. */
4003 
4004       pcre2_options = handle_option(*s++, pcre2_options);
4005       }
4006     }
4007 
4008   /* At this point we should have op pointing to a matched option. If the type
4009   is NO_DATA, it means that there is no data, and the option might set
4010   something in the PCRE options. */
4011 
4012   if (op->type == OP_NODATA)
4013     {
4014     pcre2_options = handle_option(op->one_char, pcre2_options);
4015     continue;
4016     }
4017 
4018   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
4019   either has a value or defaults to something. It cannot have data in a
4020   separate item. At the moment, the only such options are "colo(u)r",
4021   "only-matching", and Jeffrey Friedl's special -S debugging option. */
4022 
4023   if (*option_data == 0 &&
4024       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
4025        op->type == OP_OP_NUMBERS))
4026     {
4027     switch (op->one_char)
4028       {
4029       case N_COLOUR:
4030       colour_option = "auto";
4031       break;
4032 
4033       case 'o':
4034       only_matching_last = add_number(0, only_matching_last);
4035       if (only_matching == NULL) only_matching = only_matching_last;
4036       break;
4037 
4038 #ifdef JFRIEDL_DEBUG
4039       case 'S':
4040       S_arg = 0;
4041       break;
4042 #endif
4043       }
4044     continue;
4045     }
4046 
4047   /* Otherwise, find the data string for the option. */
4048 
4049   if (*option_data == 0)
4050     {
4051     if (i >= argc - 1 || longopwasequals)
4052       {
4053       fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
4054       pcre2grep_exit(usage(2));
4055       }
4056     option_data = argv[++i];
4057     }
4058 
4059   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
4060   added to a chain of numbers. */
4061 
4062   if (op->type == OP_OP_NUMBERS)
4063     {
4064     unsigned long int n = decode_number(option_data, op, longop);
4065     omdatastr *omd = (omdatastr *)op->dataptr;
4066     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4067     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4068     }
4069 
4070   /* If the option type is OP_PATLIST, it's the -e option, or one of the
4071   include/exclude options, which can be called multiple times to create lists
4072   of patterns. */
4073 
4074   else if (op->type == OP_PATLIST)
4075     {
4076     patdatastr *pd = (patdatastr *)op->dataptr;
4077     *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4078       *(pd->lastptr));
4079     if (*(pd->lastptr) == NULL) goto EXIT2;
4080     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4081     }
4082 
4083   /* If the option type is OP_FILELIST, it's one of the options that names a
4084   file. */
4085 
4086   else if (op->type == OP_FILELIST)
4087     {
4088     fndatastr *fd = (fndatastr *)op->dataptr;
4089     fn = (fnstr *)malloc(sizeof(fnstr));
4090     if (fn == NULL)
4091       {
4092       fprintf(stderr, "pcre2grep: malloc failed\n");
4093       goto EXIT2;
4094       }
4095     fn->next = NULL;
4096     fn->name = option_data;
4097     if (*(fd->anchor) == NULL)
4098       *(fd->anchor) = fn;
4099     else
4100       (*(fd->lastptr))->next = fn;
4101     *(fd->lastptr) = fn;
4102     }
4103 
4104   /* Handle OP_BINARY_FILES */
4105 
4106   else if (op->type == OP_BINFILES)
4107     {
4108     if (strcmp(option_data, "binary") == 0)
4109       binary_files = BIN_BINARY;
4110     else if (strcmp(option_data, "without-match") == 0)
4111       binary_files = BIN_NOMATCH;
4112     else if (strcmp(option_data, "text") == 0)
4113       binary_files = BIN_TEXT;
4114     else
4115       {
4116       fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4117         option_data);
4118       pcre2grep_exit(usage(2));
4119       }
4120     }
4121 
4122   /* Otherwise, deal with a single string or numeric data value. */
4123 
4124   else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4125            op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4126     {
4127     *((char **)op->dataptr) = option_data;
4128     }
4129   else
4130     {
4131     unsigned long int n = decode_number(option_data, op, longop);
4132     if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4133       else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4134       else *((int *)op->dataptr) = n;
4135     }
4136   }
4137 
4138 /* Options have been decoded. If -C was used, its value is used as a default
4139 for -A and -B. */
4140 
4141 if (both_context > 0)
4142   {
4143   if (after_context == 0) after_context = both_context;
4144   if (before_context == 0) before_context = both_context;
4145   }
4146 
4147 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4148 permitted. They display, each in their own way, only the data that has matched.
4149 */
4150 
4151 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4152   file_offsets + line_offsets;
4153 
4154 if (only_matching_count > 1)
4155   {
4156   fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4157     "--file-offsets and/or --line-offsets\n");
4158   pcre2grep_exit(usage(2));
4159   }
4160 
4161 
4162 /* Check that there is a big enough ovector for all -o settings. */
4163 
4164 for (om = only_matching; om != NULL; om = om->next)
4165   {
4166   int n = om->groupnum;
4167   if (n > (int)capture_max)
4168     {
4169     fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4170     fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4171     goto EXIT2;
4172     }
4173   }
4174 
4175 /* Check the text supplied to --output for errors. */
4176 
4177 if (output_text != NULL &&
4178     !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4179   goto EXIT2;
4180 
4181 /* Set up default compile and match contexts and a match data block. */
4182 
4183 offset_size = capture_max + 1;
4184 compile_context = pcre2_compile_context_create(NULL);
4185 match_context = pcre2_match_context_create(NULL);
4186 match_data = pcre2_match_data_create(offset_size, NULL);
4187 offsets = pcre2_get_ovector_pointer(match_data);
4188 
4189 /* If string (script) callouts are supported, set up the callout processing
4190 function. */
4191 
4192 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4193 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4194 #endif
4195 
4196 /* Put limits into the match data block. */
4197 
4198 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4199 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4200 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4201 
4202 /* If a locale has not been provided as an option, see if the LC_CTYPE or
4203 LC_ALL environment variable is set, and if so, use it. */
4204 
4205 if (locale == NULL)
4206   {
4207   locale = getenv("LC_ALL");
4208   locale_from = "LC_ALL";
4209   }
4210 
4211 if (locale == NULL)
4212   {
4213   locale = getenv("LC_CTYPE");
4214   locale_from = "LC_CTYPE";
4215   }
4216 
4217 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4218 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4219 
4220 if (locale != NULL)
4221   {
4222   if (setlocale(LC_CTYPE, locale) == NULL)
4223     {
4224     fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4225       locale, locale_from);
4226     goto EXIT2;
4227     }
4228   character_tables = pcre2_maketables(NULL);
4229   pcre2_set_character_tables(compile_context, character_tables);
4230   }
4231 
4232 /* Sort out colouring */
4233 
4234 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4235   {
4236   if (strcmp(colour_option, "always") == 0)
4237 #ifdef WIN32
4238     do_ansi = !is_stdout_tty(),
4239 #endif
4240     do_colour = TRUE;
4241   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4242   else
4243     {
4244     fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4245       colour_option);
4246     goto EXIT2;
4247     }
4248   if (do_colour)
4249     {
4250     char *cs = getenv("PCRE2GREP_COLOUR");
4251     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4252     if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4253     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4254     if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4255     if (cs == NULL) cs = getenv("GREP_COLOR");
4256     if (cs != NULL)
4257       {
4258       if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4259       }
4260 #ifdef WIN32
4261     init_colour_output();
4262 #endif
4263     }
4264   }
4265 
4266 /* Sort out a newline setting. */
4267 
4268 if (newline_arg != NULL)
4269   {
4270   for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4271        endlinetype++)
4272     {
4273     if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4274     }
4275   if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4276     pcre2_set_newline(compile_context, endlinetype);
4277   else
4278     {
4279     fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4280       newline_arg);
4281     goto EXIT2;
4282     }
4283   }
4284 
4285 /* Find default newline convention */
4286 
4287 else
4288   {
4289   (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4290   }
4291 
4292 /* Interpret the text values for -d and -D */
4293 
4294 if (dee_option != NULL)
4295   {
4296   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4297   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4298   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4299   else
4300     {
4301     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4302     goto EXIT2;
4303     }
4304   }
4305 
4306 if (DEE_option != NULL)
4307   {
4308   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4309   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4310   else
4311     {
4312     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4313     goto EXIT2;
4314     }
4315   }
4316 
4317 /* Set the extra options */
4318 
4319 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4320 
4321 /* Check the values for Jeffrey Friedl's debugging options. */
4322 
4323 #ifdef JFRIEDL_DEBUG
4324 if (S_arg > 9)
4325   {
4326   fprintf(stderr, "pcre2grep: bad value for -S option\n");
4327   return 2;
4328   }
4329 if (jfriedl_XT != 0 || jfriedl_XR != 0)
4330   {
4331   if (jfriedl_XT == 0) jfriedl_XT = 1;
4332   if (jfriedl_XR == 0) jfriedl_XR = 1;
4333   }
4334 #endif
4335 
4336 /* If use_jit is set, check whether JIT is available. If not, do not try
4337 to use JIT. */
4338 
4339 if (use_jit)
4340   {
4341   uint32_t answer;
4342   (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4343   if (!answer) use_jit = FALSE;
4344   }
4345 
4346 /* Get memory for the main buffer. */
4347 
4348 if (bufthird <= 0)
4349   {
4350   fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4351   goto EXIT2;
4352   }
4353 
4354 bufsize = 3*bufthird;
4355 main_buffer = (char *)malloc(bufsize);
4356 
4357 if (main_buffer == NULL)
4358   {
4359   fprintf(stderr, "pcre2grep: malloc failed\n");
4360   goto EXIT2;
4361   }
4362 
4363 /* If no patterns were provided by -e, and there are no files provided by -f,
4364 the first argument is the one and only pattern, and it must exist. */
4365 
4366 if (patterns == NULL && pattern_files == NULL)
4367   {
4368   if (i >= argc) return usage(2);
4369   patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4370     NULL);
4371   i++;
4372   if (patterns == NULL) goto EXIT2;
4373   }
4374 
4375 /* Compile the patterns that were provided on the command line, either by
4376 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4377 after all the command-line options are read so that we know which PCRE options
4378 to use. When -F is used, compile_pattern() may add another block into the
4379 chain, so we must not access the next pointer till after the compile. */
4380 
4381 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4382   {
4383   if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4384        (j == 1 && patterns->next == NULL)? 0 : j))
4385     goto EXIT2;
4386   }
4387 
4388 /* Read and compile the regular expressions that are provided in files. */
4389 
4390 for (fn = pattern_files; fn != NULL; fn = fn->next)
4391   {
4392   if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4393   }
4394 
4395 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4396 
4397 #ifdef SUPPORT_PCRE2GREP_JIT
4398 if (use_jit)
4399   {
4400   jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4401   if (jit_stack != NULL                        )
4402     pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4403   }
4404 #endif
4405 
4406 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4407 adjust the options. */
4408 
4409 pcre2_options &= ~PCRE2_LITERAL;
4410 (void)pcre2_set_compile_extra_options(compile_context, 0);
4411 
4412 /* If there are include or exclude patterns read from the command line, compile
4413 them. */
4414 
4415 for (j = 0; j < 4; j++)
4416   {
4417   int k;
4418   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4419     {
4420     if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4421          (k == 1 && cp->next == NULL)? 0 : k))
4422       goto EXIT2;
4423     }
4424   }
4425 
4426 /* Read and compile include/exclude patterns from files. */
4427 
4428 for (fn = include_from; fn != NULL; fn = fn->next)
4429   {
4430   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4431     goto EXIT2;
4432   }
4433 
4434 for (fn = exclude_from; fn != NULL; fn = fn->next)
4435   {
4436   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4437     goto EXIT2;
4438   }
4439 
4440 /* If there are no files that contain lists of files to search, and there are
4441 no file arguments, search stdin, and then exit. */
4442 
4443 if (file_lists == NULL && i >= argc)
4444   {
4445   rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4446     (filenames > FN_DEFAULT)? stdin_name : NULL);
4447   goto EXIT;
4448   }
4449 
4450 /* If any files that contains a list of files to search have been specified,
4451 read them line by line and search the given files. */
4452 
4453 for (fn = file_lists; fn != NULL; fn = fn->next)
4454   {
4455   char buffer[FNBUFSIZ];
4456   FILE *fl;
4457   if (strcmp(fn->name, "-") == 0) fl = stdin; else
4458     {
4459     fl = fopen(fn->name, "rb");
4460     if (fl == NULL)
4461       {
4462       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4463         strerror(errno));
4464       goto EXIT2;
4465       }
4466     }
4467   while (fgets(buffer, sizeof(buffer), fl) != NULL)
4468     {
4469     int frc;
4470     char *end = buffer + (int)strlen(buffer);
4471     while (end > buffer && isspace(end[-1])) end--;
4472     *end = 0;
4473     if (*buffer != 0)
4474       {
4475       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4476       if (frc > 1) rc = frc;
4477         else if (frc == 0 && rc == 1) rc = 0;
4478       }
4479     }
4480   if (fl != stdin) fclose(fl);
4481   }
4482 
4483 /* After handling file-list, work through remaining arguments. Pass in the fact
4484 that there is only one argument at top level - this suppresses the file name if
4485 the argument is not a directory and filenames are not otherwise forced. */
4486 
4487 only_one_at_top = i == argc - 1 && file_lists == NULL;
4488 
4489 for (; i < argc; i++)
4490   {
4491   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4492     only_one_at_top);
4493   if (frc > 1) rc = frc;
4494     else if (frc == 0 && rc == 1) rc = 0;
4495   }
4496 
4497 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4498 /* If separating builtin echo callouts by implicit newline, add one more for
4499 the final item. */
4500 
4501 if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4502   fprintf(stdout, STDOUT_NL);
4503 #endif
4504 
4505 /* Show the total number of matches if requested, but not if only one file's
4506 count was printed. */
4507 
4508 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4509   {
4510   if (counts_printed != 0 && filenames >= FN_DEFAULT)
4511     fprintf(stdout, "TOTAL:");
4512   fprintf(stdout, "%lu" STDOUT_NL, total_count);
4513   }
4514 
4515 EXIT:
4516 #ifdef SUPPORT_PCRE2GREP_JIT
4517 pcre2_jit_free_unused_memory(NULL);
4518 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4519 #endif
4520 
4521 free(main_buffer);
4522 if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4523 
4524 pcre2_compile_context_free(compile_context);
4525 pcre2_match_context_free(match_context);
4526 pcre2_match_data_free(match_data);
4527 
4528 free_pattern_chain(patterns);
4529 free_pattern_chain(include_patterns);
4530 free_pattern_chain(include_dir_patterns);
4531 free_pattern_chain(exclude_patterns);
4532 free_pattern_chain(exclude_dir_patterns);
4533 
4534 free_file_chain(exclude_from);
4535 free_file_chain(include_from);
4536 free_file_chain(pattern_files);
4537 free_file_chain(file_lists);
4538 
4539 while (only_matching != NULL)
4540   {
4541   omstr *this = only_matching;
4542   only_matching = this->next;
4543   free(this);
4544   }
4545 
4546 pcre2grep_exit(rc);
4547 
4548 EXIT2:
4549 rc = 2;
4550 goto EXIT;
4551 }
4552 
4553 /* End of pcre2grep */
4554