1 /*************************************************
2 *               pcre2grep program                *
3 *************************************************/
4 
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9 
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15 
16            Copyright (c) 1997-2018 University of Cambridge
17 
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21 
22     * Redistributions of source code must retain the above copyright notice,
23       this list of conditions and the following disclaimer.
24 
25     * Redistributions in binary form must reproduce the above copyright
26       notice, this list of conditions and the following disclaimer in the
27       documentation and/or other materials provided with the distribution.
28 
29     * Neither the name of the University of Cambridge nor the names of its
30       contributors may be used to endorse or promote products derived from
31       this software without specific prior written permission.
32 
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46 
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50 
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57 
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62   && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65 
66 /* Some cmake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70 
71 #ifdef WIN32
72 #include <io.h>                /* For _setmode() */
73 #include <fcntl.h>             /* For _O_BINARY */
74 #endif
75 
76 #ifdef SUPPORT_PCRE2GREP_CALLOUT
77 #ifdef WIN32
78 #include <process.h>
79 #else
80 #include <sys/wait.h>
81 #endif
82 #endif
83 
84 #ifdef HAVE_UNISTD_H
85 #include <unistd.h>
86 #endif
87 
88 #ifdef SUPPORT_LIBZ
89 #include <zlib.h>
90 #endif
91 
92 #ifdef SUPPORT_LIBBZ2
93 #include <bzlib.h>
94 #endif
95 
96 #define PCRE2_CODE_UNIT_WIDTH 8
97 #include "pcre2.h"
98 
99 /* Older versions of MSVC lack snprintf(). This define allows for
100 warning/error-free compilation and testing with MSVC compilers back to at least
101 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
102 
103 #if defined(_MSC_VER) && (_MSC_VER < 1900)
104 #define snprintf _snprintf
105 #endif
106 
107 #define FALSE 0
108 #define TRUE 1
109 
110 typedef int BOOL;
111 
112 #define OFFSET_SIZE 33
113 
114 #if BUFSIZ > 8192
115 #define MAXPATLEN BUFSIZ
116 #else
117 #define MAXPATLEN 8192
118 #endif
119 
120 #define FNBUFSIZ 2048
121 #define ERRBUFSIZ 256
122 
123 /* Values for the "filenames" variable, which specifies options for file name
124 output. The order is important; it is assumed that a file name is wanted for
125 all values greater than FN_DEFAULT. */
126 
127 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
128 
129 /* File reading styles */
130 
131 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
132 
133 /* Actions for the -d and -D options */
134 
135 enum { dee_READ, dee_SKIP, dee_RECURSE };
136 enum { DEE_READ, DEE_SKIP };
137 
138 /* Actions for special processing options (flag bits) */
139 
140 #define PO_WORD_MATCH     0x0001
141 #define PO_LINE_MATCH     0x0002
142 #define PO_FIXED_STRINGS  0x0004
143 
144 /* Binary file options */
145 
146 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
147 
148 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
149 environments), a warning is issued if the value of fwrite() is ignored.
150 Unfortunately, casting to (void) does not suppress the warning. To get round
151 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
152 apply to fprintf(). */
153 
154 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
155 
156 /* Under Windows, we have to set stdout to be binary, so that it does not
157 convert \r\n at the ends of output lines to \r\r\n. However, that means that
158 any messages written to stdout must have \r\n as their line terminator. This is
159 handled by using STDOUT_NL as the newline string. We also use a normal double
160 quote for the example, as single quotes aren't usually available. */
161 
162 #ifdef WIN32
163 #define STDOUT_NL  "\r\n"
164 #define QUOT       "\""
165 #else
166 #define STDOUT_NL  "\n"
167 #define QUOT       "'"
168 #endif
169 
170 
171 
172 /*************************************************
173 *               Global variables                 *
174 *************************************************/
175 
176 /* Jeffrey Friedl has some debugging requirements that are not part of the
177 regular code. */
178 
179 #ifdef JFRIEDL_DEBUG
180 static int S_arg = -1;
181 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
182 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
183 static const char *jfriedl_prefix = "";
184 static const char *jfriedl_postfix = "";
185 #endif
186 
187 static const char *colour_string = "1;31";
188 static const char *colour_option = NULL;
189 static const char *dee_option = NULL;
190 static const char *DEE_option = NULL;
191 static const char *locale = NULL;
192 static const char *newline_arg = NULL;
193 static const char *om_separator = NULL;
194 static const char *stdin_name = "(standard input)";
195 static const char *output_text = NULL;
196 
197 static char *main_buffer = NULL;
198 
199 static int after_context = 0;
200 static int before_context = 0;
201 static int binary_files = BIN_BINARY;
202 static int both_context = 0;
203 static int bufthird = PCRE2GREP_BUFSIZE;
204 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
205 static int bufsize = 3*PCRE2GREP_BUFSIZE;
206 static int endlinetype;
207 
208 static unsigned long int total_count = 0;
209 static unsigned long int counts_printed = 0;
210 
211 #ifdef WIN32
212 static int dee_action = dee_SKIP;
213 #else
214 static int dee_action = dee_READ;
215 #endif
216 
217 static int DEE_action = DEE_READ;
218 static int error_count = 0;
219 static int filenames = FN_DEFAULT;
220 
221 #ifdef SUPPORT_PCRE2GREP_JIT
222 static BOOL use_jit = TRUE;
223 #else
224 static BOOL use_jit = FALSE;
225 #endif
226 
227 static const uint8_t *character_tables = NULL;
228 
229 static uint32_t pcre2_options = 0;
230 static uint32_t extra_options = 0;
231 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
232 static uint32_t match_limit = 0;
233 static uint32_t depth_limit = 0;
234 
235 static pcre2_compile_context *compile_context;
236 static pcre2_match_context *match_context;
237 static pcre2_match_data *match_data;
238 static PCRE2_SIZE *offsets;
239 
240 static BOOL count_only = FALSE;
241 static BOOL do_colour = FALSE;
242 #ifdef WIN32
243 static BOOL do_ansi = FALSE;
244 #endif
245 static BOOL file_offsets = FALSE;
246 static BOOL hyphenpending = FALSE;
247 static BOOL invert = FALSE;
248 static BOOL line_buffered = FALSE;
249 static BOOL line_offsets = FALSE;
250 static BOOL multiline = FALSE;
251 static BOOL number = FALSE;
252 static BOOL omit_zero_count = FALSE;
253 static BOOL resource_error = FALSE;
254 static BOOL quiet = FALSE;
255 static BOOL show_total_count = FALSE;
256 static BOOL silent = FALSE;
257 static BOOL utf = FALSE;
258 
259 /* Structure for list of --only-matching capturing numbers. */
260 
261 typedef struct omstr {
262   struct omstr *next;
263   int groupnum;
264 } omstr;
265 
266 static omstr *only_matching = NULL;
267 static omstr *only_matching_last = NULL;
268 static int only_matching_count;
269 
270 /* Structure for holding the two variables that describe a number chain. */
271 
272 typedef struct omdatastr {
273   omstr **anchor;
274   omstr **lastptr;
275 } omdatastr;
276 
277 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
278 
279 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
280 
281 typedef struct fnstr {
282   struct fnstr *next;
283   char *name;
284 } fnstr;
285 
286 static fnstr *exclude_from = NULL;
287 static fnstr *exclude_from_last = NULL;
288 static fnstr *include_from = NULL;
289 static fnstr *include_from_last = NULL;
290 
291 static fnstr *file_lists = NULL;
292 static fnstr *file_lists_last = NULL;
293 static fnstr *pattern_files = NULL;
294 static fnstr *pattern_files_last = NULL;
295 
296 /* Structure for holding the two variables that describe a file name chain. */
297 
298 typedef struct fndatastr {
299   fnstr **anchor;
300   fnstr **lastptr;
301 } fndatastr;
302 
303 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
304 static fndatastr include_from_data = { &include_from, &include_from_last };
305 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
306 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
307 
308 /* Structure for pattern and its compiled form; used for matching patterns and
309 also for include/exclude patterns. */
310 
311 typedef struct patstr {
312   struct patstr *next;
313   char *string;
314   PCRE2_SIZE length;
315   pcre2_code *compiled;
316 } patstr;
317 
318 static patstr *patterns = NULL;
319 static patstr *patterns_last = NULL;
320 static patstr *include_patterns = NULL;
321 static patstr *include_patterns_last = NULL;
322 static patstr *exclude_patterns = NULL;
323 static patstr *exclude_patterns_last = NULL;
324 static patstr *include_dir_patterns = NULL;
325 static patstr *include_dir_patterns_last = NULL;
326 static patstr *exclude_dir_patterns = NULL;
327 static patstr *exclude_dir_patterns_last = NULL;
328 
329 /* Structure holding the two variables that describe a pattern chain. A pointer
330 to such structures is used for each appropriate option. */
331 
332 typedef struct patdatastr {
333   patstr **anchor;
334   patstr **lastptr;
335 } patdatastr;
336 
337 static patdatastr match_patdata = { &patterns, &patterns_last };
338 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
339 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
340 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
341 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
342 
343 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
344                                  &include_dir_patterns, &exclude_dir_patterns };
345 
346 static const char *incexname[4] = { "--include", "--exclude",
347                                     "--include-dir", "--exclude-dir" };
348 
349 /* Structure for options and list of them */
350 
351 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
352        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
353 
354 typedef struct option_item {
355   int type;
356   int one_char;
357   void *dataptr;
358   const char *long_name;
359   const char *help_text;
360 } option_item;
361 
362 /* Options without a single-letter equivalent get a negative value. This can be
363 used to identify them. */
364 
365 #define N_COLOUR       (-1)
366 #define N_EXCLUDE      (-2)
367 #define N_EXCLUDE_DIR  (-3)
368 #define N_HELP         (-4)
369 #define N_INCLUDE      (-5)
370 #define N_INCLUDE_DIR  (-6)
371 #define N_LABEL        (-7)
372 #define N_LOCALE       (-8)
373 #define N_NULL         (-9)
374 #define N_LOFFSETS     (-10)
375 #define N_FOFFSETS     (-11)
376 #define N_LBUFFER      (-12)
377 #define N_H_LIMIT      (-13)
378 #define N_M_LIMIT      (-14)
379 #define N_M_LIMIT_DEP  (-15)
380 #define N_BUFSIZE      (-16)
381 #define N_NOJIT        (-17)
382 #define N_FILE_LIST    (-18)
383 #define N_BINARY_FILES (-19)
384 #define N_EXCLUDE_FROM (-20)
385 #define N_INCLUDE_FROM (-21)
386 #define N_OM_SEPARATOR (-22)
387 #define N_MAX_BUFSIZE  (-23)
388 
389 static option_item optionlist[] = {
390   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
391   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
392   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
393   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
394   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
395   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
396   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer starting size" },
397   { OP_NUMBER,     N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number",  "set processing buffer maximum size" },
398   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
399   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
400   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
401   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
402   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
403   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
404   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
405   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
406   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
407   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
408   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
409   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
410   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
411   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
412   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
413   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
414   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
415   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
416   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
417   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
418   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
419   { OP_SIZE,       N_H_LIMIT, &heap_limit,      "heap-limit=number",  "set PCRE2 heap limit option (kibibytes)" },
420   { OP_U32NUMBER,  N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE2 match limit option" },
421   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
422   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
423   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
424   { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
425   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
426 #ifdef SUPPORT_PCRE2GREP_JIT
427   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
428 #else
429   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcre2grep does not support JIT" },
430 #endif
431   { OP_STRING,     'O',      &output_text,       "output=text",   "show only this text (possibly expanded)" },
432   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
433   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
434   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
435   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
436   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
437   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
438   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
439   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
440   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
441   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
442 #ifdef JFRIEDL_DEBUG
443   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
444 #endif
445   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
446   { OP_NODATA,    't',      NULL,              "total-count",   "print total count of matching lines" },
447   { OP_NODATA,    'u',      NULL,              "utf",           "use UTF mode" },
448   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
449   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
450   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
451   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
452   { OP_NODATA,    0,        NULL,               NULL,            NULL }
453 };
454 
455 /* Table of names for newline types. Must be kept in step with the definitions
456 of PCRE2_NEWLINE_xx in pcre2.h. */
457 
458 static const char *newlines[] = {
459   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
460 
461 /* UTF-8 tables - used only when the newline setting is "any". */
462 
463 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
464 
465 const char utf8_table4[] = {
466   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
467   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
468   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
469   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
470 
471 
472 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
473 /*************************************************
474 *    Emulated memmove() for systems without it   *
475 *************************************************/
476 
477 /* This function can make use of bcopy() if it is available. Otherwise do it by
478 steam, as there are some non-Unix environments that lack both memmove() and
479 bcopy(). */
480 
481 static void *
emulated_memmove(void * d,const void * s,size_t n)482 emulated_memmove(void *d, const void *s, size_t n)
483 {
484 #ifdef HAVE_BCOPY
485 bcopy(s, d, n);
486 return d;
487 #else
488 size_t i;
489 unsigned char *dest = (unsigned char *)d;
490 const unsigned char *src = (const unsigned char *)s;
491 if (dest > src)
492   {
493   dest += n;
494   src += n;
495   for (i = 0; i < n; ++i) *(--dest) = *(--src);
496   return (void *)dest;
497   }
498 else
499   {
500   for (i = 0; i < n; ++i) *dest++ = *src++;
501   return (void *)(dest - n);
502   }
503 #endif   /* not HAVE_BCOPY */
504 }
505 #undef memmove
506 #define memmove(d,s,n) emulated_memmove(d,s,n)
507 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
508 
509 
510 /*************************************************
511 *         Case-independent string compare        *
512 *************************************************/
513 
514 static int
strcmpic(const char * str1,const char * str2)515 strcmpic(const char *str1, const char *str2)
516 {
517 unsigned int c1, c2;
518 while (*str1 != '\0' || *str2 != '\0')
519   {
520   c1 = tolower(*str1++);
521   c2 = tolower(*str2++);
522   if (c1 != c2) return ((c1 > c2) << 1) - 1;
523   }
524 return 0;
525 }
526 
527 
528 /*************************************************
529 *         Parse GREP_COLORS                      *
530 *************************************************/
531 
532 /* Extract ms or mt from GREP_COLORS.
533 
534 Argument:  the string, possibly NULL
535 Returns:   the value of ms or mt, or NULL if neither present
536 */
537 
538 static char *
parse_grep_colors(const char * gc)539 parse_grep_colors(const char *gc)
540 {
541 static char seq[16];
542 char *col;
543 uint32_t len;
544 if (gc == NULL) return NULL;
545 col = strstr(gc, "ms=");
546 if (col == NULL) col = strstr(gc, "mt=");
547 if (col == NULL) return NULL;
548 len = 0;
549 col += 3;
550 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
551   seq[len++] = *col++;
552 seq[len] = 0;
553 return seq;
554 }
555 
556 
557 /*************************************************
558 *         Exit from the program                  *
559 *************************************************/
560 
561 /* If there has been a resource error, give a suitable message.
562 
563 Argument:  the return code
564 Returns:   does not return
565 */
566 
567 static void
pcre2grep_exit(int rc)568 pcre2grep_exit(int rc)
569 {
570 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
571 status of 1, which is not helpful. To help with this problem, define a symbol
572 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
573 therein. */
574 
575 #ifdef __VMS
576 #include descrip
577 #include lib$routines
578   char val_buf[4];
579   $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
580   $DESCRIPTOR(sym_val, val_buf);
581   sprintf(val_buf, "%d", rc);
582   sym_val.dsc$w_length = strlen(val_buf);
583   lib$set_symbol(&sym_nam, &sym_val);
584 #endif
585 
586 if (resource_error)
587   {
588   fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
589     "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
590     PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
591   fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
592   }
593 exit(rc);
594 }
595 
596 
597 /*************************************************
598 *          Add item to chain of patterns         *
599 *************************************************/
600 
601 /* Used to add an item onto a chain, or just return an unconnected item if the
602 "after" argument is NULL.
603 
604 Arguments:
605   s          pattern string to add
606   patlen     length of pattern
607   after      if not NULL points to item to insert after
608 
609 Returns:     new pattern block or NULL on error
610 */
611 
612 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)613 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
614 {
615 patstr *p = (patstr *)malloc(sizeof(patstr));
616 if (p == NULL)
617   {
618   fprintf(stderr, "pcre2grep: malloc failed\n");
619   pcre2grep_exit(2);
620   }
621 if (patlen > MAXPATLEN)
622   {
623   fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
624     MAXPATLEN);
625   free(p);
626   return NULL;
627   }
628 p->next = NULL;
629 p->string = s;
630 p->length = patlen;
631 p->compiled = NULL;
632 
633 if (after != NULL)
634   {
635   p->next = after->next;
636   after->next = p;
637   }
638 return p;
639 }
640 
641 
642 /*************************************************
643 *           Free chain of patterns               *
644 *************************************************/
645 
646 /* Used for several chains of patterns.
647 
648 Argument: pointer to start of chain
649 Returns:  nothing
650 */
651 
652 static void
free_pattern_chain(patstr * pc)653 free_pattern_chain(patstr *pc)
654 {
655 while (pc != NULL)
656   {
657   patstr *p = pc;
658   pc = p->next;
659   if (p->compiled != NULL) pcre2_code_free(p->compiled);
660   free(p);
661   }
662 }
663 
664 
665 /*************************************************
666 *           Free chain of file names             *
667 *************************************************/
668 
669 /*
670 Argument: pointer to start of chain
671 Returns:  nothing
672 */
673 
674 static void
free_file_chain(fnstr * fn)675 free_file_chain(fnstr *fn)
676 {
677 while (fn != NULL)
678   {
679   fnstr *f = fn;
680   fn = f->next;
681   free(f);
682   }
683 }
684 
685 
686 /*************************************************
687 *            OS-specific functions               *
688 *************************************************/
689 
690 /* These definitions are needed in all Windows environments, even those where
691 Unix-style directory scanning can be used (see below). */
692 
693 #ifdef WIN32
694 
695 #ifndef STRICT
696 # define STRICT
697 #endif
698 #ifndef WIN32_LEAN_AND_MEAN
699 # define WIN32_LEAN_AND_MEAN
700 #endif
701 
702 #include <windows.h>
703 
704 #define iswild(name) (strpbrk(name, "*?") != NULL)
705 
706 /* Convert ANSI BGR format to RGB used by Windows */
707 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
708 
709 static HANDLE hstdout;
710 static CONSOLE_SCREEN_BUFFER_INFO csbi;
711 static WORD match_colour;
712 
713 static WORD
decode_ANSI_colour(const char * cs)714 decode_ANSI_colour(const char *cs)
715 {
716 WORD result = csbi.wAttributes;
717 while (*cs)
718   {
719   if (isdigit(*cs))
720     {
721     int code = atoi(cs);
722     if (code == 1) result |= 0x08;
723     else if (code == 4) result |= 0x8000;
724     else if (code == 5) result |= 0x80;
725     else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
726     else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
727     else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
728     else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
729     /* aixterm high intensity colour codes */
730     else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
731     else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
732 
733     while (isdigit(*cs)) cs++;
734     }
735   if (*cs) cs++;
736   }
737 return result;
738 }
739 
740 
741 static void
init_colour_output()742 init_colour_output()
743 {
744 if (do_colour)
745   {
746   hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
747   /* This fails when redirected to con; try again if so. */
748   if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
749     {
750     HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
751       FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
752     GetConsoleScreenBufferInfo(hcon, &csbi);
753     CloseHandle(hcon);
754     }
755   match_colour = decode_ANSI_colour(colour_string);
756   /* No valid colour found - turn off colouring */
757   if (!match_colour) do_colour = FALSE;
758   }
759 }
760 
761 #endif  /* WIN32 */
762 
763 
764 /* The following sets of functions are defined so that they can be made system
765 specific. At present there are versions for Unix-style environments, Windows,
766 native z/OS, and "no support". */
767 
768 
769 /************* Directory scanning Unix-style and z/OS ***********/
770 
771 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
772 #include <sys/types.h>
773 #include <sys/stat.h>
774 #include <dirent.h>
775 
776 #if defined NATIVE_ZOS
777 /************* Directory and PDS/E scanning for z/OS ***********/
778 /************* z/OS looks mostly like Unix with USS ************/
779 /* However, z/OS needs the #include statements in this header */
780 #include "pcrzosfs.h"
781 /* That header is not included in the main PCRE distribution because
782    other apparatus is needed to compile pcre2grep for z/OS. The header
783    can be found in the special z/OS distribution, which is available
784    from www.zaconsultants.net or from www.cbttape.org. */
785 #endif
786 
787 typedef DIR directory_type;
788 #define FILESEP '/'
789 
790 static int
isdirectory(char * filename)791 isdirectory(char *filename)
792 {
793 struct stat statbuf;
794 if (stat(filename, &statbuf) < 0)
795   return 0;        /* In the expectation that opening as a file will fail */
796 return S_ISDIR(statbuf.st_mode);
797 }
798 
799 static directory_type *
opendirectory(char * filename)800 opendirectory(char *filename)
801 {
802 return opendir(filename);
803 }
804 
805 static char *
readdirectory(directory_type * dir)806 readdirectory(directory_type *dir)
807 {
808 for (;;)
809   {
810   struct dirent *dent = readdir(dir);
811   if (dent == NULL) return NULL;
812   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
813     return dent->d_name;
814   }
815 /* Control never reaches here */
816 }
817 
818 static void
closedirectory(directory_type * dir)819 closedirectory(directory_type *dir)
820 {
821 closedir(dir);
822 }
823 
824 
825 /************* Test for regular file, Unix-style **********/
826 
827 static int
isregfile(char * filename)828 isregfile(char *filename)
829 {
830 struct stat statbuf;
831 if (stat(filename, &statbuf) < 0)
832   return 1;        /* In the expectation that opening as a file will fail */
833 return S_ISREG(statbuf.st_mode);
834 }
835 
836 
837 #if defined NATIVE_ZOS
838 /************* Test for a terminal in z/OS **********/
839 /* isatty() does not work in a TSO environment, so always give FALSE.*/
840 
841 static BOOL
is_stdout_tty(void)842 is_stdout_tty(void)
843 {
844 return FALSE;
845 }
846 
847 static BOOL
is_file_tty(FILE * f)848 is_file_tty(FILE *f)
849 {
850 return FALSE;
851 }
852 
853 
854 /************* Test for a terminal, Unix-style **********/
855 
856 #else
857 static BOOL
is_stdout_tty(void)858 is_stdout_tty(void)
859 {
860 return isatty(fileno(stdout));
861 }
862 
863 static BOOL
is_file_tty(FILE * f)864 is_file_tty(FILE *f)
865 {
866 return isatty(fileno(f));
867 }
868 #endif
869 
870 
871 /************* Print optionally coloured match Unix-style and z/OS **********/
872 
873 static void
print_match(const void * buf,int length)874 print_match(const void *buf, int length)
875 {
876 if (length == 0) return;
877 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
878 FWRITE_IGNORE(buf, 1, length, stdout);
879 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
880 }
881 
882 /* End of Unix-style or native z/OS environment functions. */
883 
884 
885 /************* Directory scanning in Windows ***********/
886 
887 /* I (Philip Hazel) have no means of testing this code. It was contributed by
888 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
889 when it did not exist. David Byron added a patch that moved the #include of
890 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
891 */
892 
893 #elif defined WIN32
894 
895 #ifndef INVALID_FILE_ATTRIBUTES
896 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
897 #endif
898 
899 typedef struct directory_type
900 {
901 HANDLE handle;
902 BOOL first;
903 WIN32_FIND_DATA data;
904 } directory_type;
905 
906 #define FILESEP '/'
907 
908 int
isdirectory(char * filename)909 isdirectory(char *filename)
910 {
911 DWORD attr = GetFileAttributes(filename);
912 if (attr == INVALID_FILE_ATTRIBUTES)
913   return 0;
914 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
915 }
916 
917 directory_type *
opendirectory(char * filename)918 opendirectory(char *filename)
919 {
920 size_t len;
921 char *pattern;
922 directory_type *dir;
923 DWORD err;
924 len = strlen(filename);
925 pattern = (char *)malloc(len + 3);
926 dir = (directory_type *)malloc(sizeof(*dir));
927 if ((pattern == NULL) || (dir == NULL))
928   {
929   fprintf(stderr, "pcre2grep: malloc failed\n");
930   pcre2grep_exit(2);
931   }
932 memcpy(pattern, filename, len);
933 if (iswild(filename))
934   pattern[len] = 0;
935 else
936   memcpy(&(pattern[len]), "\\*", 3);
937 dir->handle = FindFirstFile(pattern, &(dir->data));
938 if (dir->handle != INVALID_HANDLE_VALUE)
939   {
940   free(pattern);
941   dir->first = TRUE;
942   return dir;
943   }
944 err = GetLastError();
945 free(pattern);
946 free(dir);
947 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
948 return NULL;
949 }
950 
951 char *
readdirectory(directory_type * dir)952 readdirectory(directory_type *dir)
953 {
954 for (;;)
955   {
956   if (!dir->first)
957     {
958     if (!FindNextFile(dir->handle, &(dir->data)))
959       return NULL;
960     }
961   else
962     {
963     dir->first = FALSE;
964     }
965   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
966     return dir->data.cFileName;
967   }
968 #ifndef _MSC_VER
969 return NULL;   /* Keep compiler happy; never executed */
970 #endif
971 }
972 
973 void
closedirectory(directory_type * dir)974 closedirectory(directory_type *dir)
975 {
976 FindClose(dir->handle);
977 free(dir);
978 }
979 
980 
981 /************* Test for regular file in Windows **********/
982 
983 /* I don't know how to do this, or if it can be done; assume all paths are
984 regular if they are not directories. */
985 
isregfile(char * filename)986 int isregfile(char *filename)
987 {
988 return !isdirectory(filename);
989 }
990 
991 
992 /************* Test for a terminal in Windows **********/
993 
994 static BOOL
is_stdout_tty(void)995 is_stdout_tty(void)
996 {
997 return _isatty(_fileno(stdout));
998 }
999 
1000 static BOOL
is_file_tty(FILE * f)1001 is_file_tty(FILE *f)
1002 {
1003 return _isatty(_fileno(f));
1004 }
1005 
1006 
1007 /************* Print optionally coloured match in Windows **********/
1008 
1009 static void
print_match(const void * buf,int length)1010 print_match(const void *buf, int length)
1011 {
1012 if (length == 0) return;
1013 if (do_colour)
1014   {
1015   if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1016     else SetConsoleTextAttribute(hstdout, match_colour);
1017   }
1018 FWRITE_IGNORE(buf, 1, length, stdout);
1019 if (do_colour)
1020   {
1021   if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1022     else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1023   }
1024 }
1025 
1026 /* End of Windows functions */
1027 
1028 
1029 /************* Directory scanning when we can't do it ***********/
1030 
1031 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1032 
1033 #else
1034 
1035 #define FILESEP 0
1036 typedef void directory_type;
1037 
isdirectory(char * filename)1038 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1039 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1040 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1041 void closedirectory(directory_type *dir) {}
1042 
1043 
1044 /************* Test for regular file when we can't do it **********/
1045 
1046 /* Assume all files are regular. */
1047 
isregfile(char * filename)1048 int isregfile(char *filename) { return 1; }
1049 
1050 
1051 /************* Test for a terminal when we can't do it **********/
1052 
1053 static BOOL
is_stdout_tty(void)1054 is_stdout_tty(void)
1055 {
1056 return FALSE;
1057 }
1058 
1059 static BOOL
is_file_tty(FILE * f)1060 is_file_tty(FILE *f)
1061 {
1062 return FALSE;
1063 }
1064 
1065 
1066 /************* Print optionally coloured match when we can't do it **********/
1067 
1068 static void
print_match(const void * buf,int length)1069 print_match(const void *buf, int length)
1070 {
1071 if (length == 0) return;
1072 FWRITE_IGNORE(buf, 1, length, stdout);
1073 }
1074 
1075 #endif  /* End of system-specific functions */
1076 
1077 
1078 
1079 #ifndef HAVE_STRERROR
1080 /*************************************************
1081 *     Provide strerror() for non-ANSI libraries  *
1082 *************************************************/
1083 
1084 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1085 in their libraries, but can provide the same facility by this simple
1086 alternative function. */
1087 
1088 extern int   sys_nerr;
1089 extern char *sys_errlist[];
1090 
1091 char *
strerror(int n)1092 strerror(int n)
1093 {
1094 if (n < 0 || n >= sys_nerr) return "unknown error number";
1095 return sys_errlist[n];
1096 }
1097 #endif /* HAVE_STRERROR */
1098 
1099 
1100 
1101 /*************************************************
1102 *                Usage function                  *
1103 *************************************************/
1104 
1105 static int
usage(int rc)1106 usage(int rc)
1107 {
1108 option_item *op;
1109 fprintf(stderr, "Usage: pcre2grep [-");
1110 for (op = optionlist; op->one_char != 0; op++)
1111   {
1112   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1113   }
1114 fprintf(stderr, "] [long options] [pattern] [files]\n");
1115 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1116   "options.\n");
1117 return rc;
1118 }
1119 
1120 
1121 
1122 /*************************************************
1123 *                Help function                   *
1124 *************************************************/
1125 
1126 static void
help(void)1127 help(void)
1128 {
1129 option_item *op;
1130 
1131 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1132 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1133 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1134 
1135 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1136 printf("Callout scripts in patterns are supported." STDOUT_NL);
1137 #else
1138 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1139 #endif
1140 
1141 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1142 
1143 #ifdef SUPPORT_LIBZ
1144 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1145 #endif
1146 
1147 #ifdef SUPPORT_LIBBZ2
1148 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1149 #endif
1150 
1151 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1152 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1153 #else
1154 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1155 #endif
1156 
1157 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1158 printf("Options:" STDOUT_NL);
1159 
1160 for (op = optionlist; op->one_char != 0; op++)
1161   {
1162   int n;
1163   char s[4];
1164 
1165   if (op->one_char > 0 && (op->long_name)[0] == 0)
1166     n = 31 - printf("  -%c", op->one_char);
1167   else
1168     {
1169     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1170       else strcpy(s, "   ");
1171     n = 31 - printf("  %s --%s", s, op->long_name);
1172     }
1173 
1174   if (n < 1) n = 1;
1175   printf("%.*s%s" STDOUT_NL, n, "                           ", op->help_text);
1176   }
1177 
1178 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1179 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1180 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1181 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1182 printf("space is removed and blank lines are ignored." STDOUT_NL);
1183 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1184 
1185 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1186 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1187 }
1188 
1189 
1190 
1191 /*************************************************
1192 *            Test exclude/includes               *
1193 *************************************************/
1194 
1195 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1196 there are no includes, the path must match an include pattern.
1197 
1198 Arguments:
1199   path      the path to be matched
1200   ip        the chain of include patterns
1201   ep        the chain of exclude patterns
1202 
1203 Returns:    TRUE if the path is not excluded
1204 */
1205 
1206 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1207 test_incexc(char *path, patstr *ip, patstr *ep)
1208 {
1209 int plen = strlen((const char *)path);
1210 
1211 for (; ep != NULL; ep = ep->next)
1212   {
1213   if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1214     return FALSE;
1215   }
1216 
1217 if (ip == NULL) return TRUE;
1218 
1219 for (; ip != NULL; ip = ip->next)
1220   {
1221   if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1222     return TRUE;
1223   }
1224 
1225 return FALSE;
1226 }
1227 
1228 
1229 
1230 /*************************************************
1231 *         Decode integer argument value          *
1232 *************************************************/
1233 
1234 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1235 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1236 just keep it simple.
1237 
1238 Arguments:
1239   option_data   the option data string
1240   op            the option item (for error messages)
1241   longop        TRUE if option given in long form
1242 
1243 Returns:        a long integer
1244 */
1245 
1246 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1247 decode_number(char *option_data, option_item *op, BOOL longop)
1248 {
1249 unsigned long int n = 0;
1250 char *endptr = option_data;
1251 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1252 while (isdigit((unsigned char)(*endptr)))
1253   n = n * 10 + (int)(*endptr++ - '0');
1254 if (toupper(*endptr) == 'K')
1255   {
1256   n *= 1024;
1257   endptr++;
1258   }
1259 else if (toupper(*endptr) == 'M')
1260   {
1261   n *= 1024*1024;
1262   endptr++;
1263   }
1264 
1265 if (*endptr != 0)   /* Error */
1266   {
1267   if (longop)
1268     {
1269     char *equals = strchr(op->long_name, '=');
1270     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1271       (int)(equals - op->long_name);
1272     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1273       option_data, nlen, op->long_name);
1274     }
1275   else
1276     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1277       option_data, op->one_char);
1278   pcre2grep_exit(usage(2));
1279   }
1280 
1281 return n;
1282 }
1283 
1284 
1285 
1286 /*************************************************
1287 *       Add item to a chain of numbers           *
1288 *************************************************/
1289 
1290 /* Used to add an item onto a chain, or just return an unconnected item if the
1291 "after" argument is NULL.
1292 
1293 Arguments:
1294   n          the number to add
1295   after      if not NULL points to item to insert after
1296 
1297 Returns:     new number block
1298 */
1299 
1300 static omstr *
add_number(int n,omstr * after)1301 add_number(int n, omstr *after)
1302 {
1303 omstr *om = (omstr *)malloc(sizeof(omstr));
1304 
1305 if (om == NULL)
1306   {
1307   fprintf(stderr, "pcre2grep: malloc failed\n");
1308   pcre2grep_exit(2);
1309   }
1310 om->next = NULL;
1311 om->groupnum = n;
1312 
1313 if (after != NULL)
1314   {
1315   om->next = after->next;
1316   after->next = om;
1317   }
1318 return om;
1319 }
1320 
1321 
1322 
1323 /*************************************************
1324 *            Read one line of input              *
1325 *************************************************/
1326 
1327 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1328 BZ2_read) into a large buffer, so many lines may be read at once. However,
1329 doing this for tty input means that no output appears until a lot of input has
1330 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1331 for this, because it does not stop at a binary zero, and therefore there is no
1332 way of telling how many characters it has read, because there may be binary
1333 zeros embedded in the data. This function is also used for reading patterns
1334 from files (the -f option).
1335 
1336 Arguments:
1337   buffer     the buffer to read into
1338   length     the maximum number of characters to read
1339   f          the file
1340 
1341 Returns:     the number of characters read, zero at end of file
1342 */
1343 
1344 static PCRE2_SIZE
read_one_line(char * buffer,int length,FILE * f)1345 read_one_line(char *buffer, int length, FILE *f)
1346 {
1347 int c;
1348 int yield = 0;
1349 while ((c = fgetc(f)) != EOF)
1350   {
1351   buffer[yield++] = c;
1352   if (c == '\n' || yield >= length) break;
1353   }
1354 return yield;
1355 }
1356 
1357 
1358 
1359 /*************************************************
1360 *             Find end of line                   *
1361 *************************************************/
1362 
1363 /* The length of the endline sequence that is found is set via lenptr. This may
1364 be zero at the very end of the file if there is no line-ending sequence there.
1365 
1366 Arguments:
1367   p         current position in line
1368   endptr    end of available data
1369   lenptr    where to put the length of the eol sequence
1370 
1371 Returns:    pointer after the last byte of the line,
1372             including the newline byte(s)
1373 */
1374 
1375 static char *
end_of_line(char * p,char * endptr,int * lenptr)1376 end_of_line(char *p, char *endptr, int *lenptr)
1377 {
1378 switch(endlinetype)
1379   {
1380   default:      /* Just in case */
1381   case PCRE2_NEWLINE_LF:
1382   while (p < endptr && *p != '\n') p++;
1383   if (p < endptr)
1384     {
1385     *lenptr = 1;
1386     return p + 1;
1387     }
1388   *lenptr = 0;
1389   return endptr;
1390 
1391   case PCRE2_NEWLINE_CR:
1392   while (p < endptr && *p != '\r') p++;
1393   if (p < endptr)
1394     {
1395     *lenptr = 1;
1396     return p + 1;
1397     }
1398   *lenptr = 0;
1399   return endptr;
1400 
1401   case PCRE2_NEWLINE_NUL:
1402   while (p < endptr && *p != '\0') p++;
1403   if (p < endptr)
1404     {
1405     *lenptr = 1;
1406     return p + 1;
1407     }
1408   *lenptr = 0;
1409   return endptr;
1410 
1411   case PCRE2_NEWLINE_CRLF:
1412   for (;;)
1413     {
1414     while (p < endptr && *p != '\r') p++;
1415     if (++p >= endptr)
1416       {
1417       *lenptr = 0;
1418       return endptr;
1419       }
1420     if (*p == '\n')
1421       {
1422       *lenptr = 2;
1423       return p + 1;
1424       }
1425     }
1426   break;
1427 
1428   case PCRE2_NEWLINE_ANYCRLF:
1429   while (p < endptr)
1430     {
1431     int extra = 0;
1432     int c = *((unsigned char *)p);
1433 
1434     if (utf && c >= 0xc0)
1435       {
1436       int gcii, gcss;
1437       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1438       gcss = 6*extra;
1439       c = (c & utf8_table3[extra]) << gcss;
1440       for (gcii = 1; gcii <= extra; gcii++)
1441         {
1442         gcss -= 6;
1443         c |= (p[gcii] & 0x3f) << gcss;
1444         }
1445       }
1446 
1447     p += 1 + extra;
1448 
1449     switch (c)
1450       {
1451       case '\n':
1452       *lenptr = 1;
1453       return p;
1454 
1455       case '\r':
1456       if (p < endptr && *p == '\n')
1457         {
1458         *lenptr = 2;
1459         p++;
1460         }
1461       else *lenptr = 1;
1462       return p;
1463 
1464       default:
1465       break;
1466       }
1467     }   /* End of loop for ANYCRLF case */
1468 
1469   *lenptr = 0;  /* Must have hit the end */
1470   return endptr;
1471 
1472   case PCRE2_NEWLINE_ANY:
1473   while (p < endptr)
1474     {
1475     int extra = 0;
1476     int c = *((unsigned char *)p);
1477 
1478     if (utf && c >= 0xc0)
1479       {
1480       int gcii, gcss;
1481       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1482       gcss = 6*extra;
1483       c = (c & utf8_table3[extra]) << gcss;
1484       for (gcii = 1; gcii <= extra; gcii++)
1485         {
1486         gcss -= 6;
1487         c |= (p[gcii] & 0x3f) << gcss;
1488         }
1489       }
1490 
1491     p += 1 + extra;
1492 
1493     switch (c)
1494       {
1495       case '\n':    /* LF */
1496       case '\v':    /* VT */
1497       case '\f':    /* FF */
1498       *lenptr = 1;
1499       return p;
1500 
1501       case '\r':    /* CR */
1502       if (p < endptr && *p == '\n')
1503         {
1504         *lenptr = 2;
1505         p++;
1506         }
1507       else *lenptr = 1;
1508       return p;
1509 
1510 #ifndef EBCDIC
1511       case 0x85:    /* Unicode NEL */
1512       *lenptr = utf? 2 : 1;
1513       return p;
1514 
1515       case 0x2028:  /* Unicode LS */
1516       case 0x2029:  /* Unicode PS */
1517       *lenptr = 3;
1518       return p;
1519 #endif  /* Not EBCDIC */
1520 
1521       default:
1522       break;
1523       }
1524     }   /* End of loop for ANY case */
1525 
1526   *lenptr = 0;  /* Must have hit the end */
1527   return endptr;
1528   }     /* End of overall switch */
1529 }
1530 
1531 
1532 
1533 /*************************************************
1534 *         Find start of previous line            *
1535 *************************************************/
1536 
1537 /* This is called when looking back for before lines to print.
1538 
1539 Arguments:
1540   p         start of the subsequent line
1541   startptr  start of available data
1542 
1543 Returns:    pointer to the start of the previous line
1544 */
1545 
1546 static char *
previous_line(char * p,char * startptr)1547 previous_line(char *p, char *startptr)
1548 {
1549 switch(endlinetype)
1550   {
1551   default:      /* Just in case */
1552   case PCRE2_NEWLINE_LF:
1553   p--;
1554   while (p > startptr && p[-1] != '\n') p--;
1555   return p;
1556 
1557   case PCRE2_NEWLINE_CR:
1558   p--;
1559   while (p > startptr && p[-1] != '\n') p--;
1560   return p;
1561 
1562   case PCRE2_NEWLINE_NUL:
1563   p--;
1564   while (p > startptr && p[-1] != '\0') p--;
1565   return p;
1566 
1567   case PCRE2_NEWLINE_CRLF:
1568   for (;;)
1569     {
1570     p -= 2;
1571     while (p > startptr && p[-1] != '\n') p--;
1572     if (p <= startptr + 1 || p[-2] == '\r') return p;
1573     }
1574   /* Control can never get here */
1575 
1576   case PCRE2_NEWLINE_ANY:
1577   case PCRE2_NEWLINE_ANYCRLF:
1578   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1579   if (utf) while ((*p & 0xc0) == 0x80) p--;
1580 
1581   while (p > startptr)
1582     {
1583     unsigned int c;
1584     char *pp = p - 1;
1585 
1586     if (utf)
1587       {
1588       int extra = 0;
1589       while ((*pp & 0xc0) == 0x80) pp--;
1590       c = *((unsigned char *)pp);
1591       if (c >= 0xc0)
1592         {
1593         int gcii, gcss;
1594         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1595         gcss = 6*extra;
1596         c = (c & utf8_table3[extra]) << gcss;
1597         for (gcii = 1; gcii <= extra; gcii++)
1598           {
1599           gcss -= 6;
1600           c |= (pp[gcii] & 0x3f) << gcss;
1601           }
1602         }
1603       }
1604     else c = *((unsigned char *)pp);
1605 
1606     if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1607       {
1608       case '\n':    /* LF */
1609       case '\r':    /* CR */
1610       return p;
1611 
1612       default:
1613       break;
1614       }
1615 
1616     else switch (c)
1617       {
1618       case '\n':    /* LF */
1619       case '\v':    /* VT */
1620       case '\f':    /* FF */
1621       case '\r':    /* CR */
1622 #ifndef EBCDIC
1623       case 0x85:    /* Unicode NEL */
1624       case 0x2028:  /* Unicode LS */
1625       case 0x2029:  /* Unicode PS */
1626 #endif  /* Not EBCDIC */
1627       return p;
1628 
1629       default:
1630       break;
1631       }
1632 
1633     p = pp;  /* Back one character */
1634     }        /* End of loop for ANY case */
1635 
1636   return startptr;  /* Hit start of data */
1637   }     /* End of overall switch */
1638 }
1639 
1640 
1641 
1642 /*************************************************
1643 *       Print the previous "after" lines         *
1644 *************************************************/
1645 
1646 /* This is called if we are about to lose said lines because of buffer filling,
1647 and at the end of the file. The data in the line is written using fwrite() so
1648 that a binary zero does not terminate it.
1649 
1650 Arguments:
1651   lastmatchnumber   the number of the last matching line, plus one
1652   lastmatchrestart  where we restarted after the last match
1653   endptr            end of available data
1654   printname         filename for printing
1655 
1656 Returns:            nothing
1657 */
1658 
1659 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1660 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1661   char *endptr, const char *printname)
1662 {
1663 if (after_context > 0 && lastmatchnumber > 0)
1664   {
1665   int count = 0;
1666   while (lastmatchrestart < endptr && count < after_context)
1667     {
1668     int ellength;
1669     char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1670     if (ellength == 0 && pp == main_buffer + bufsize) break;
1671     if (printname != NULL) fprintf(stdout, "%s-", printname);
1672     if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1673     FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1674     lastmatchrestart = pp;
1675     count++;
1676     }
1677   if (count > 0) hyphenpending = TRUE;
1678   }
1679 }
1680 
1681 
1682 
1683 /*************************************************
1684 *   Apply patterns to subject till one matches   *
1685 *************************************************/
1686 
1687 /* This function is called to run through all patterns, looking for a match. It
1688 is used multiple times for the same subject when colouring is enabled, in order
1689 to find all possible matches.
1690 
1691 Arguments:
1692   matchptr     the start of the subject
1693   length       the length of the subject to match
1694   options      options for pcre_exec
1695   startoffset  where to start matching
1696   mrc          address of where to put the result of pcre2_match()
1697 
1698 Returns:      TRUE if there was a match
1699               FALSE if there was no match
1700               invert if there was a non-fatal error
1701 */
1702 
1703 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1704 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1705   PCRE2_SIZE startoffset, int *mrc)
1706 {
1707 int i;
1708 PCRE2_SIZE slen = length;
1709 patstr *p = patterns;
1710 const char *msg = "this text:\n\n";
1711 
1712 if (slen > 200)
1713   {
1714   slen = 200;
1715   msg = "text that starts:\n\n";
1716   }
1717 for (i = 1; p != NULL; p = p->next, i++)
1718   {
1719   *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1720     startoffset, options, match_data, match_context);
1721   if (*mrc >= 0) return TRUE;
1722   if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1723   fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1724   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1725   fprintf(stderr, "%s", msg);
1726   FWRITE_IGNORE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1727   fprintf(stderr, "\n\n");
1728   if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1729       *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1730     resource_error = TRUE;
1731   if (error_count++ > 20)
1732     {
1733     fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1734     pcre2grep_exit(2);
1735     }
1736   return invert;    /* No more matching; don't show the line again */
1737   }
1738 
1739 return FALSE;  /* No match, no errors */
1740 }
1741 
1742 
1743 /*************************************************
1744 *          Check output text for errors          *
1745 *************************************************/
1746 
1747 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)1748 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
1749 {
1750 PCRE2_SPTR begin = string;
1751 for (; *string != 0; string++)
1752   {
1753   if (*string == '$')
1754     {
1755     PCRE2_SIZE capture_id = 0;
1756     BOOL brace = FALSE;
1757 
1758     string++;
1759 
1760     /* Syntax error: a character must be present after $. */
1761     if (*string == 0)
1762       {
1763       if (!callout)
1764         fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1765           (int)(string - begin), "no character after $");
1766       return FALSE;
1767       }
1768 
1769     if (*string == '{')
1770       {
1771       /* Must be a decimal number in braces, e.g: {5} or {38} */
1772       string++;
1773 
1774       brace = TRUE;
1775       }
1776 
1777     if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
1778       {
1779       do
1780         {
1781         /* Maximum capture id is 65535. */
1782         if (capture_id <= 65535)
1783           capture_id = capture_id * 10 + (*string - '0');
1784 
1785         string++;
1786         }
1787       while (*string >= '0' && *string <= '9');
1788 
1789       if (brace)
1790         {
1791         /* Syntax error: closing brace is missing. */
1792         if (*string != '}')
1793           {
1794           if (!callout)
1795             fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1796               (int)(string - begin), "missing closing brace");
1797           return FALSE;
1798           }
1799         }
1800       else
1801         {
1802         /* To negate the effect of the for. */
1803         string--;
1804         }
1805       }
1806     else if (brace)
1807       {
1808       /* Syntax error: a decimal number required. */
1809       if (!callout)
1810         fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1811           (int)(string - begin), "decimal number expected");
1812       return FALSE;
1813       }
1814     else if (*string == 'o')
1815       {
1816       string++;
1817 
1818       if (*string < '0' || *string > '7')
1819         {
1820         /* Syntax error: an octal number required. */
1821         if (!callout)
1822           fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1823             (int)(string - begin), "octal number expected");
1824         return FALSE;
1825         }
1826       }
1827     else if (*string == 'x')
1828       {
1829       string++;
1830 
1831       if (!isxdigit((unsigned char)*string))
1832         {
1833         /* Syntax error: a hexdecimal number required. */
1834         if (!callout)
1835           fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1836             (int)(string - begin), "hexadecimal number expected");
1837         return FALSE;
1838         }
1839       }
1840     }
1841   }
1842 
1843   return TRUE;
1844 }
1845 
1846 
1847 /*************************************************
1848 *              Display output text               *
1849 *************************************************/
1850 
1851 /* Display the output text, which is assumed to have already been syntax
1852 checked. Output may contain escape sequences started by the dollar sign. The
1853 escape sequences are substituted as follows:
1854 
1855   $<digits> or ${<digits>} is replaced by the captured substring of the given
1856   decimal number; zero will substitute the whole match. If the number is
1857   greater than the number of capturing substrings, or if the capture is unset,
1858   the replacement is empty.
1859 
1860   $a is replaced by bell.
1861   $b is replaced by backspace.
1862   $e is replaced by escape.
1863   $f is replaced by form feed.
1864   $n is replaced by newline.
1865   $r is replaced by carriage return.
1866   $t is replaced by tab.
1867   $v is replaced by vertical tab.
1868 
1869   $o<digits> is replaced by the character represented by the given octal
1870   number; up to three digits are processed.
1871 
1872   $x<digits> is replaced by the character represented by the given hexadecimal
1873   number; up to two digits are processed.
1874 
1875   Any other character is substituted by itself. E.g: $$ is replaced by a single
1876   dollar.
1877 
1878 Arguments:
1879   string:       the output text
1880   callout:      TRUE for the builtin callout, FALSE for --output
1881   subject       the start of the subject
1882   ovector:      capture offsets
1883   capture_top:  number of captures
1884 
1885 Returns:        TRUE if something was output, other than newline
1886                 FALSE if nothing was output, or newline was last output
1887 */
1888 
1889 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)1890 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
1891   PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
1892 {
1893 BOOL printed = FALSE;
1894 
1895 for (; *string != 0; string++)
1896   {
1897   int ch = EOF;
1898   if (*string == '$')
1899     {
1900     PCRE2_SIZE capture_id = 0;
1901     BOOL brace = FALSE;
1902 
1903     string++;
1904 
1905     if (*string == '{')
1906       {
1907       /* Must be a decimal number in braces, e.g: {5} or {38} */
1908       string++;
1909 
1910       brace = TRUE;
1911       }
1912 
1913     if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
1914       {
1915       do
1916         {
1917         /* Maximum capture id is 65535. */
1918         if (capture_id <= 65535)
1919           capture_id = capture_id * 10 + (*string - '0');
1920 
1921         string++;
1922         }
1923       while (*string >= '0' && *string <= '9');
1924 
1925       if (!brace)
1926         {
1927         /* To negate the effect of the for. */
1928         string--;
1929         }
1930 
1931       if (capture_id < capture_top)
1932         {
1933         PCRE2_SIZE capturesize;
1934         capture_id *= 2;
1935 
1936         capturesize = ovector[capture_id + 1] - ovector[capture_id];
1937         if (capturesize > 0)
1938           {
1939           print_match(subject + ovector[capture_id], capturesize);
1940           printed = TRUE;
1941           }
1942         }
1943       }
1944     else if (*string == 'a') ch = '\a';
1945     else if (*string == 'b') ch = '\b';
1946 #ifndef EBCDIC
1947     else if (*string == 'e') ch = '\033';
1948 #else
1949     else if (*string == 'e') ch = '\047';
1950 #endif
1951     else if (*string == 'f') ch = '\f';
1952     else if (*string == 'r') ch = '\r';
1953     else if (*string == 't') ch = '\t';
1954     else if (*string == 'v') ch = '\v';
1955     else if (*string == 'n')
1956       {
1957       fprintf(stdout, STDOUT_NL);
1958       printed = FALSE;
1959       }
1960     else if (*string == 'o')
1961       {
1962       string++;
1963 
1964       ch = *string - '0';
1965       if (string[1] >= '0' && string[1] <= '7')
1966         {
1967         string++;
1968         ch = ch * 8 + (*string - '0');
1969         }
1970       if (string[1] >= '0' && string[1] <= '7')
1971         {
1972         string++;
1973         ch = ch * 8 + (*string - '0');
1974         }
1975       }
1976     else if (*string == 'x')
1977       {
1978       string++;
1979 
1980       if (*string >= '0' && *string <= '9')
1981         ch = *string - '0';
1982       else
1983         ch = (*string | 0x20) - 'a' + 10;
1984       if (isxdigit((unsigned char)string[1]))
1985         {
1986         string++;
1987         ch *= 16;
1988         if (*string >= '0' && *string <= '9')
1989           ch += *string - '0';
1990         else
1991           ch += (*string | 0x20) - 'a' + 10;
1992         }
1993       }
1994     else
1995       {
1996       ch = *string;
1997       }
1998     }
1999   else
2000     {
2001     ch = *string;
2002     }
2003   if (ch != EOF)
2004     {
2005     fprintf(stdout, "%c", ch);
2006     printed = TRUE;
2007     }
2008   }
2009 
2010 return printed;
2011 }
2012 
2013 
2014 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2015 
2016 /*************************************************
2017 *        Parse and execute callout scripts       *
2018 *************************************************/
2019 
2020 /* This function parses a callout string block and executes the
2021 program specified by the string. The string is a list of substrings
2022 separated by pipe characters. The first substring represents the
2023 executable name, and the following substrings specify the arguments:
2024 
2025   program_name|param1|param2|...
2026 
2027 Any substring (including the program name) can contain escape sequences
2028 started by the dollar character. The escape sequences are substituted as
2029 follows:
2030 
2031   $<digits> or ${<digits>} is replaced by the captured substring of the given
2032   decimal number, which must be greater than zero. If the number is greater
2033   than the number of capturing substrings, or if the capture is unset, the
2034   replacement is empty.
2035 
2036   Any other character is substituted by itself. E.g: $$ is replaced by a single
2037   dollar or $| replaced by a pipe character.
2038 
2039 Alternatively, if string starts with pipe, the remainder is taken as an output
2040 string, same as --output. In this case, --om-separator is used to separate each
2041 callout, defaulting to newline.
2042 
2043 Example:
2044 
2045   echo -e "abcde\n12345" | pcre2grep \
2046     '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2047 
2048   Output:
2049 
2050     Arg1: [a] [bcd] [d] Arg2: |a| ()
2051     abcde
2052     Arg1: [1] [234] [4] Arg2: |1| ()
2053     12345
2054 
2055 Arguments:
2056   blockptr     the callout block
2057 
2058 Returns:       currently it always returns with 0
2059 */
2060 
2061 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2062 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2063 {
2064 PCRE2_SIZE length = calloutptr->callout_string_length;
2065 PCRE2_SPTR string = calloutptr->callout_string;
2066 PCRE2_SPTR subject = calloutptr->subject;
2067 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2068 PCRE2_SIZE capture_top = calloutptr->capture_top;
2069 PCRE2_SIZE argsvectorlen = 2;
2070 PCRE2_SIZE argslen = 1;
2071 char *args;
2072 char *argsptr;
2073 char **argsvector;
2074 char **argsvectorptr;
2075 #ifndef WIN32
2076 pid_t pid;
2077 #endif
2078 int result = 0;
2079 
2080 (void)unused;   /* Avoid compiler warning */
2081 
2082 /* Only callout with strings are supported. */
2083 if (string == NULL || length == 0) return 0;
2084 
2085 /* If there's no command, output the remainder directly. */
2086 
2087 if (*string == '|')
2088   {
2089   string++;
2090   if (!syntax_check_output_text(string, TRUE)) return 0;
2091   (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2092   return 0;
2093   }
2094 
2095 /* Checking syntax and compute the number of string fragments. Callout strings
2096 are ignored in case of a syntax error. */
2097 
2098 while (length > 0)
2099   {
2100   if (*string == '|')
2101     {
2102     argsvectorlen++;
2103 
2104     /* Maximum 10000 arguments allowed. */
2105     if (argsvectorlen > 10000) return 0;
2106     }
2107   else if (*string == '$')
2108     {
2109     PCRE2_SIZE capture_id = 0;
2110 
2111     string++;
2112     length--;
2113 
2114     /* Syntax error: a character must be present after $. */
2115     if (length == 0) return 0;
2116 
2117     if (*string >= '1' && *string <= '9')
2118       {
2119       do
2120         {
2121         /* Maximum capture id is 65535. */
2122         if (capture_id <= 65535)
2123           capture_id = capture_id * 10 + (*string - '0');
2124 
2125         string++;
2126         length--;
2127         }
2128       while (length > 0 && *string >= '0' && *string <= '9');
2129 
2130       /* To negate the effect of string++ below. */
2131       string--;
2132       length++;
2133       }
2134     else if (*string == '{')
2135       {
2136       /* Must be a decimal number in braces, e.g: {5} or {38} */
2137       string++;
2138       length--;
2139 
2140       /* Syntax error: a decimal number required. */
2141       if (length == 0) return 0;
2142       if (*string < '1' || *string > '9') return 0;
2143 
2144       do
2145         {
2146         /* Maximum capture id is 65535. */
2147         if (capture_id <= 65535)
2148           capture_id = capture_id * 10 + (*string - '0');
2149 
2150         string++;
2151         length--;
2152 
2153         /* Syntax error: no more characters */
2154         if (length == 0) return 0;
2155         }
2156       while (*string >= '0' && *string <= '9');
2157 
2158       /* Syntax error: closing brace is missing. */
2159       if (*string != '}') return 0;
2160       }
2161 
2162     if (capture_id > 0)
2163       {
2164       if (capture_id < capture_top)
2165         {
2166         capture_id *= 2;
2167         argslen += ovector[capture_id + 1] - ovector[capture_id];
2168         }
2169 
2170       /* To negate the effect of argslen++ below. */
2171       argslen--;
2172       }
2173     }
2174 
2175   string++;
2176   length--;
2177   argslen++;
2178   }
2179 
2180 args = (char*)malloc(argslen);
2181 if (args == NULL) return 0;
2182 
2183 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2184 if (argsvector == NULL)
2185   {
2186   free(args);
2187   return 0;
2188   }
2189 
2190 argsptr = args;
2191 argsvectorptr = argsvector;
2192 
2193 *argsvectorptr++ = argsptr;
2194 
2195 length = calloutptr->callout_string_length;
2196 string = calloutptr->callout_string;
2197 
2198 while (length > 0)
2199   {
2200   if (*string == '|')
2201     {
2202     *argsptr++ = '\0';
2203     *argsvectorptr++ = argsptr;
2204     }
2205   else if (*string == '$')
2206     {
2207     string++;
2208     length--;
2209 
2210     if ((*string >= '1' && *string <= '9') || *string == '{')
2211       {
2212       PCRE2_SIZE capture_id = 0;
2213 
2214       if (*string != '{')
2215         {
2216         do
2217           {
2218           /* Maximum capture id is 65535. */
2219           if (capture_id <= 65535)
2220             capture_id = capture_id * 10 + (*string - '0');
2221 
2222           string++;
2223           length--;
2224           }
2225         while (length > 0 && *string >= '0' && *string <= '9');
2226 
2227         /* To negate the effect of string++ below. */
2228         string--;
2229         length++;
2230         }
2231       else
2232         {
2233         string++;
2234         length--;
2235 
2236         do
2237           {
2238           /* Maximum capture id is 65535. */
2239           if (capture_id <= 65535)
2240             capture_id = capture_id * 10 + (*string - '0');
2241 
2242           string++;
2243           length--;
2244           }
2245         while (*string != '}');
2246         }
2247 
2248         if (capture_id < capture_top)
2249           {
2250           PCRE2_SIZE capturesize;
2251           capture_id *= 2;
2252 
2253           capturesize = ovector[capture_id + 1] - ovector[capture_id];
2254           memcpy(argsptr, subject + ovector[capture_id], capturesize);
2255           argsptr += capturesize;
2256           }
2257       }
2258     else
2259       {
2260       *argsptr++ = *string;
2261       }
2262     }
2263   else
2264     {
2265     *argsptr++ = *string;
2266     }
2267 
2268   string++;
2269   length--;
2270   }
2271 
2272 *argsptr++ = '\0';
2273 *argsvectorptr = NULL;
2274 
2275 #ifdef WIN32
2276 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2277 #else
2278 pid = fork();
2279 
2280 if (pid == 0)
2281   {
2282   (void)execv(argsvector[0], argsvector);
2283   /* Control gets here if there is an error, e.g. a non-existent program */
2284   exit(1);
2285   }
2286 else if (pid > 0)
2287   (void)waitpid(pid, &result, 0);
2288 #endif
2289 
2290 free(args);
2291 free(argsvector);
2292 
2293 /* Currently negative return values are not supported, only zero (match
2294 continues) or non-zero (match fails). */
2295 
2296 return result != 0;
2297 }
2298 
2299 #endif
2300 
2301 
2302 
2303 /*************************************************
2304 *     Read a portion of the file into buffer     *
2305 *************************************************/
2306 
2307 static int
fill_buffer(void * handle,int frtype,char * buffer,int length,BOOL input_line_buffered)2308 fill_buffer(void *handle, int frtype, char *buffer, int length,
2309   BOOL input_line_buffered)
2310 {
2311 (void)frtype;  /* Avoid warning when not used */
2312 
2313 #ifdef SUPPORT_LIBZ
2314 if (frtype == FR_LIBZ)
2315   return gzread((gzFile)handle, buffer, length);
2316 else
2317 #endif
2318 
2319 #ifdef SUPPORT_LIBBZ2
2320 if (frtype == FR_LIBBZ2)
2321   return BZ2_bzread((BZFILE *)handle, buffer, length);
2322 else
2323 #endif
2324 
2325 return (input_line_buffered ?
2326   read_one_line(buffer, length, (FILE *)handle) :
2327   fread(buffer, 1, length, (FILE *)handle));
2328 }
2329 
2330 
2331 
2332 /*************************************************
2333 *            Grep an individual file             *
2334 *************************************************/
2335 
2336 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2337 times the value of bufthird. The matching point is never allowed to stray into
2338 the top third of the buffer, thus keeping more of the file available for
2339 context printing or for multiline scanning. For large files, the pointer will
2340 be in the middle third most of the time, so the bottom third is available for
2341 "before" context printing.
2342 
2343 Arguments:
2344   handle       the fopened FILE stream for a normal file
2345                the gzFile pointer when reading is via libz
2346                the BZFILE pointer when reading is via libbz2
2347   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2348   filename     the file name or NULL (for errors)
2349   printname    the file name if it is to be printed for each match
2350                or NULL if the file name is not to be printed
2351                it cannot be NULL if filenames[_nomatch]_only is set
2352 
2353 Returns:       0 if there was at least one match
2354                1 otherwise (no matches)
2355                2 if an overlong line is encountered
2356                3 if there is a read error on a .bz2 file
2357 */
2358 
2359 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2360 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2361 {
2362 int rc = 1;
2363 int filepos = 0;
2364 unsigned long int linenumber = 1;
2365 unsigned long int lastmatchnumber = 0;
2366 unsigned long int count = 0;
2367 char *lastmatchrestart = main_buffer;
2368 char *ptr = main_buffer;
2369 char *endptr;
2370 PCRE2_SIZE bufflength;
2371 BOOL binary = FALSE;
2372 BOOL endhyphenpending = FALSE;
2373 BOOL input_line_buffered = line_buffered;
2374 FILE *in = NULL;                    /* Ensure initialized */
2375 
2376 /* Do the first read into the start of the buffer and set up the pointer to end
2377 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2378 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2379 fail. */
2380 
2381 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2382   {
2383   in = (FILE *)handle;
2384   if (is_file_tty(in)) input_line_buffered = TRUE;
2385   }
2386 else input_line_buffered = FALSE;
2387 
2388 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2389   input_line_buffered);
2390 
2391 #ifdef SUPPORT_LIBBZ2
2392 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2;   /* Gotcha: bufflength is PCRE2_SIZE; */
2393 #endif
2394 
2395 endptr = main_buffer + bufflength;
2396 
2397 /* Unless binary-files=text, see if we have a binary file. This uses the same
2398 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2399 file. However, when the newline convention is binary zero, we can't do this. */
2400 
2401 if (binary_files != BIN_TEXT)
2402   {
2403   if (endlinetype != PCRE2_NEWLINE_NUL)
2404     binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2405       != NULL;
2406   if (binary && binary_files == BIN_NOMATCH) return 1;
2407   }
2408 
2409 /* Loop while the current pointer is not at the end of the file. For large
2410 files, endptr will be at the end of the buffer when we are in the middle of the
2411 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2412 way, the buffer is shifted left and re-filled. */
2413 
2414 while (ptr < endptr)
2415   {
2416   int endlinelength;
2417   int mrc = 0;
2418   unsigned int options = 0;
2419   BOOL match;
2420   char *t = ptr;
2421   PCRE2_SIZE length, linelength;
2422   PCRE2_SIZE startoffset = 0;
2423 
2424   /* At this point, ptr is at the start of a line. We need to find the length
2425   of the subject string to pass to pcre2_match(). In multiline mode, it is the
2426   length remainder of the data in the buffer. Otherwise, it is the length of
2427   the next line, excluding the terminating newline. After matching, we always
2428   advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2429   option is used for compiling, so that any match is constrained to be in the
2430   first line. */
2431 
2432   t = end_of_line(t, endptr, &endlinelength);
2433   linelength = t - ptr - endlinelength;
2434   length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2435 
2436   /* Check to see if the line we are looking at extends right to the very end
2437   of the buffer without a line terminator. This means the line is too long to
2438   handle at the current buffer size. Until the buffer reaches its maximum size,
2439   try doubling it and reading more data. */
2440 
2441   if (endlinelength == 0 && t == main_buffer + bufsize)
2442     {
2443     if (bufthird < max_bufthird)
2444       {
2445       char *new_buffer;
2446       int new_bufthird = 2*bufthird;
2447 
2448       if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2449       new_buffer = (char *)malloc(3*new_bufthird);
2450 
2451       if (new_buffer == NULL)
2452         {
2453         fprintf(stderr,
2454           "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2455           "pcre2grep: not enough memory to increase the buffer size to %d\n",
2456           linenumber,
2457           (filename == NULL)? "" : " of file ",
2458           (filename == NULL)? "" : filename,
2459           new_bufthird);
2460         return 2;
2461         }
2462 
2463       /* Copy the data and adjust pointers to the new buffer location. */
2464 
2465       memcpy(new_buffer, main_buffer, bufsize);
2466       bufthird = new_bufthird;
2467       bufsize = 3*bufthird;
2468       ptr = new_buffer + (ptr - main_buffer);
2469       lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2470       free(main_buffer);
2471       main_buffer = new_buffer;
2472 
2473       /* Read more data into the buffer and then try to find the line ending
2474       again. */
2475 
2476       bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2477         bufsize - bufflength, input_line_buffered);
2478       endptr = main_buffer + bufflength;
2479       continue;
2480       }
2481     else
2482       {
2483       fprintf(stderr,
2484         "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2485         "pcre2grep: the maximum buffer size is %d\n"
2486         "pcre2grep: use the --max-buffer-size option to change it\n",
2487         linenumber,
2488         (filename == NULL)? "" : " of file ",
2489         (filename == NULL)? "" : filename,
2490         bufthird);
2491       return 2;
2492       }
2493     }
2494 
2495   /* Extra processing for Jeffrey Friedl's debugging. */
2496 
2497 #ifdef JFRIEDL_DEBUG
2498   if (jfriedl_XT || jfriedl_XR)
2499   {
2500 #     include <sys/time.h>
2501 #     include <time.h>
2502       struct timeval start_time, end_time;
2503       struct timezone dummy;
2504       int i;
2505 
2506       if (jfriedl_XT)
2507       {
2508           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
2509           const char *orig = ptr;
2510           ptr = malloc(newlen + 1);
2511           if (!ptr) {
2512                   printf("out of memory");
2513                   pcre2grep_exit(2);
2514           }
2515           endptr = ptr;
2516           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
2517           for (i = 0; i < jfriedl_XT; i++) {
2518                   strncpy(endptr, orig,  length);
2519                   endptr += length;
2520           }
2521           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
2522           length = newlen;
2523       }
2524 
2525       if (gettimeofday(&start_time, &dummy) != 0)
2526               perror("bad gettimeofday");
2527 
2528 
2529       for (i = 0; i < jfriedl_XR; i++)
2530           match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
2531               PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
2532 
2533       if (gettimeofday(&end_time, &dummy) != 0)
2534               perror("bad gettimeofday");
2535 
2536       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
2537                       -
2538                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
2539 
2540       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
2541       return 0;
2542   }
2543 #endif
2544 
2545   /* We come back here after a match when only_matching_count is non-zero, in
2546   order to find any further matches in the same line. This applies to
2547   --only-matching, --file-offsets, and --line-offsets. */
2548 
2549   ONLY_MATCHING_RESTART:
2550 
2551   /* Run through all the patterns until one matches or there is an error other
2552   than NOMATCH. This code is in a subroutine so that it can be re-used for
2553   finding subsequent matches when colouring matched lines. After finding one
2554   match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2555   this line. */
2556 
2557   match = match_patterns(ptr, length, options, startoffset, &mrc);
2558   options = PCRE2_NOTEMPTY;
2559 
2560   /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2561   only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2562   return code - to output data lines, so that binary zeroes are treated as just
2563   another data character. */
2564 
2565   if (match != invert)
2566     {
2567     BOOL hyphenprinted = FALSE;
2568 
2569     /* We've failed if we want a file that doesn't have any matches. */
2570 
2571     if (filenames == FN_NOMATCH_ONLY) return 1;
2572 
2573     /* If all we want is a yes/no answer, we can return immediately. */
2574 
2575     if (quiet) return 0;
2576 
2577     /* Just count if just counting is wanted. */
2578 
2579     else if (count_only || show_total_count) count++;
2580 
2581     /* When handling a binary file and binary-files==binary, the "binary"
2582     variable will be set true (it's false in all other cases). In this
2583     situation we just want to output the file name. No need to scan further. */
2584 
2585     else if (binary)
2586       {
2587       fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2588       return 0;
2589       }
2590 
2591     /* Likewise, if all we want is a file name, there is no need to scan any
2592     more lines in the file. */
2593 
2594     else if (filenames == FN_MATCH_ONLY)
2595       {
2596       fprintf(stdout, "%s" STDOUT_NL, printname);
2597       return 0;
2598       }
2599 
2600     /* The --only-matching option prints just the substring that matched,
2601     and/or one or more captured portions of it, as long as these strings are
2602     not empty. The --file-offsets and --line-offsets options output offsets for
2603     the matching substring (all three set only_matching_count non-zero). None
2604     of these mutually exclusive options prints any context. Afterwards, adjust
2605     the start and then jump back to look for further matches in the same line.
2606     If we are in invert mode, however, nothing is printed and we do not restart
2607     - this could still be useful because the return code is set. */
2608 
2609     else if (only_matching_count != 0)
2610       {
2611       if (!invert)
2612         {
2613         PCRE2_SIZE oldstartoffset;
2614 
2615         if (printname != NULL) fprintf(stdout, "%s:", printname);
2616         if (number) fprintf(stdout, "%lu:", linenumber);
2617 
2618         /* Handle --line-offsets */
2619 
2620         if (line_offsets)
2621           fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2622             (int)(offsets[1] - offsets[0]));
2623 
2624         /* Handle --file-offsets */
2625 
2626         else if (file_offsets)
2627           fprintf(stdout, "%d,%d" STDOUT_NL,
2628             (int)(filepos + ptr + offsets[0] - ptr),
2629             (int)(offsets[1] - offsets[0]));
2630 
2631         /* Handle --output (which has already been syntax checked) */
2632 
2633         else if (output_text != NULL)
2634           {
2635           if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2636               (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2637               number)
2638             fprintf(stdout, STDOUT_NL);
2639           }
2640 
2641         /* Handle --only-matching, which may occur many times */
2642 
2643         else
2644           {
2645           BOOL printed = FALSE;
2646           omstr *om;
2647 
2648           for (om = only_matching; om != NULL; om = om->next)
2649             {
2650             int n = om->groupnum;
2651             if (n < mrc)
2652               {
2653               int plen = offsets[2*n + 1] - offsets[2*n];
2654               if (plen > 0)
2655                 {
2656                 if (printed && om_separator != NULL)
2657                   fprintf(stdout, "%s", om_separator);
2658                 print_match(ptr + offsets[n*2], plen);
2659                 printed = TRUE;
2660                 }
2661               }
2662             }
2663 
2664           if (printed || printname != NULL || number)
2665             fprintf(stdout, STDOUT_NL);
2666           }
2667 
2668         /* Prepare to repeat to find the next match in the line. */
2669 
2670         match = FALSE;
2671         if (line_buffered) fflush(stdout);
2672         rc = 0;                      /* Had some success */
2673 
2674         /* If the pattern contained a lookbehind that included \K, it is
2675         possible that the end of the match might be at or before the actual
2676         starting offset we have just used. In this case, start one character
2677         further on. */
2678 
2679         startoffset = offsets[1];    /* Restart after the match */
2680         oldstartoffset = pcre2_get_startchar(match_data);
2681         if (startoffset <= oldstartoffset)
2682           {
2683           if (startoffset >= length) goto END_ONE_MATCH;  /* Were at end */
2684           startoffset = oldstartoffset + 1;
2685           if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2686           }
2687 
2688         /* If the current match ended past the end of the line (only possible
2689         in multiline mode), we must move on to the line in which it did end
2690         before searching for more matches. */
2691 
2692         while (startoffset > linelength)
2693           {
2694           ptr += linelength + endlinelength;
2695           filepos += (int)(linelength + endlinelength);
2696           linenumber++;
2697           startoffset -= (int)(linelength + endlinelength);
2698           t = end_of_line(ptr, endptr, &endlinelength);
2699           linelength = t - ptr - endlinelength;
2700           length = (PCRE2_SIZE)(endptr - ptr);
2701           }
2702 
2703         goto ONLY_MATCHING_RESTART;
2704         }
2705       }
2706 
2707     /* This is the default case when none of the above options is set. We print
2708     the matching lines(s), possibly preceded and/or followed by other lines of
2709     context. */
2710 
2711     else
2712       {
2713       /* See if there is a requirement to print some "after" lines from a
2714       previous match. We never print any overlaps. */
2715 
2716       if (after_context > 0 && lastmatchnumber > 0)
2717         {
2718         int ellength;
2719         int linecount = 0;
2720         char *p = lastmatchrestart;
2721 
2722         while (p < ptr && linecount < after_context)
2723           {
2724           p = end_of_line(p, ptr, &ellength);
2725           linecount++;
2726           }
2727 
2728         /* It is important to advance lastmatchrestart during this printing so
2729         that it interacts correctly with any "before" printing below. Print
2730         each line's data using fwrite() in case there are binary zeroes. */
2731 
2732         while (lastmatchrestart < p)
2733           {
2734           char *pp = lastmatchrestart;
2735           if (printname != NULL) fprintf(stdout, "%s-", printname);
2736           if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2737           pp = end_of_line(pp, endptr, &ellength);
2738           FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2739           lastmatchrestart = pp;
2740           }
2741         if (lastmatchrestart != ptr) hyphenpending = TRUE;
2742         }
2743 
2744       /* If there were non-contiguous lines printed above, insert hyphens. */
2745 
2746       if (hyphenpending)
2747         {
2748         fprintf(stdout, "--" STDOUT_NL);
2749         hyphenpending = FALSE;
2750         hyphenprinted = TRUE;
2751         }
2752 
2753       /* See if there is a requirement to print some "before" lines for this
2754       match. Again, don't print overlaps. */
2755 
2756       if (before_context > 0)
2757         {
2758         int linecount = 0;
2759         char *p = ptr;
2760 
2761         while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
2762                linecount < before_context)
2763           {
2764           linecount++;
2765           p = previous_line(p, main_buffer);
2766           }
2767 
2768         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2769           fprintf(stdout, "--" STDOUT_NL);
2770 
2771         while (p < ptr)
2772           {
2773           int ellength;
2774           char *pp = p;
2775           if (printname != NULL) fprintf(stdout, "%s-", printname);
2776           if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2777           pp = end_of_line(pp, endptr, &ellength);
2778           FWRITE_IGNORE(p, 1, pp - p, stdout);
2779           p = pp;
2780           }
2781         }
2782 
2783       /* Now print the matching line(s); ensure we set hyphenpending at the end
2784       of the file if any context lines are being output. */
2785 
2786       if (after_context > 0 || before_context > 0)
2787         endhyphenpending = TRUE;
2788 
2789       if (printname != NULL) fprintf(stdout, "%s:", printname);
2790       if (number) fprintf(stdout, "%lu:", linenumber);
2791 
2792       /* This extra option, for Jeffrey Friedl's debugging requirements,
2793       replaces the matched string, or a specific captured string if it exists,
2794       with X. When this happens, colouring is ignored. */
2795 
2796 #ifdef JFRIEDL_DEBUG
2797       if (S_arg >= 0 && S_arg < mrc)
2798         {
2799         int first = S_arg * 2;
2800         int last  = first + 1;
2801         FWRITE_IGNORE(ptr, 1, offsets[first], stdout);
2802         fprintf(stdout, "X");
2803         FWRITE_IGNORE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2804         }
2805       else
2806 #endif
2807 
2808       /* In multiline mode, or if colouring, we have to split the line(s) up
2809       and search for further matches, but not of course if the line is a
2810       non-match. In multiline mode this is necessary in case there is another
2811       match that spans the end of the current line. When colouring we want to
2812       colour all matches. */
2813 
2814       if ((multiline || do_colour) && !invert)
2815         {
2816         int plength;
2817         PCRE2_SIZE endprevious;
2818 
2819         /* The use of \K may make the end offset earlier than the start. In
2820         this situation, swap them round. */
2821 
2822         if (offsets[0] > offsets[1])
2823           {
2824           PCRE2_SIZE temp = offsets[0];
2825           offsets[0] = offsets[1];
2826           offsets[1] = temp;
2827           }
2828 
2829         FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
2830         print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2831 
2832         for (;;)
2833           {
2834           PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
2835 
2836           endprevious = offsets[1];
2837           startoffset = endprevious;  /* Advance after previous match. */
2838 
2839           /* If the pattern contained a lookbehind that included \K, it is
2840           possible that the end of the match might be at or before the actual
2841           starting offset we have just used. In this case, start one character
2842           further on. */
2843 
2844           if (startoffset <= oldstartoffset)
2845             {
2846             startoffset = oldstartoffset + 1;
2847             if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2848             }
2849 
2850           /* If the current match ended past the end of the line (only possible
2851           in multiline mode), we must move on to the line in which it did end
2852           before searching for more matches. Because the PCRE2_FIRSTLINE option
2853           is set, the start of the match will always be before the first
2854           newline sequence. */
2855 
2856           while (startoffset > linelength + endlinelength)
2857             {
2858             ptr += linelength + endlinelength;
2859             filepos += (int)(linelength + endlinelength);
2860             linenumber++;
2861             startoffset -= (int)(linelength + endlinelength);
2862             endprevious -= (int)(linelength + endlinelength);
2863             t = end_of_line(ptr, endptr, &endlinelength);
2864             linelength = t - ptr - endlinelength;
2865             length = (PCRE2_SIZE)(endptr - ptr);
2866             }
2867 
2868           /* If startoffset is at the exact end of the line it means this
2869           complete line was the final part of the match, so there is nothing
2870           more to do. */
2871 
2872           if (startoffset == linelength + endlinelength) break;
2873 
2874           /* Otherwise, run a match from within the final line, and if found,
2875           loop for any that may follow. */
2876 
2877           if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
2878 
2879           /* The use of \K may make the end offset earlier than the start. In
2880           this situation, swap them round. */
2881 
2882           if (offsets[0] > offsets[1])
2883             {
2884             PCRE2_SIZE temp = offsets[0];
2885             offsets[0] = offsets[1];
2886             offsets[1] = temp;
2887             }
2888 
2889           FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
2890           print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2891           }
2892 
2893         /* In multiline mode, we may have already printed the complete line
2894         and its line-ending characters (if they matched the pattern), so there
2895         may be no more to print. */
2896 
2897         plength = (int)((linelength + endlinelength) - endprevious);
2898         if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
2899         }
2900 
2901       /* Not colouring or multiline; no need to search for further matches. */
2902 
2903       else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
2904       }
2905 
2906     /* End of doing what has to be done for a match. If --line-buffered was
2907     given, flush the output. */
2908 
2909     if (line_buffered) fflush(stdout);
2910     rc = 0;    /* Had some success */
2911 
2912     /* Remember where the last match happened for after_context. We remember
2913     where we are about to restart, and that line's number. */
2914 
2915     lastmatchrestart = ptr + linelength + endlinelength;
2916     lastmatchnumber = linenumber + 1;
2917     }
2918 
2919   /* For a match in multiline inverted mode (which of course did not cause
2920   anything to be printed), we have to move on to the end of the match before
2921   proceeding. */
2922 
2923   if (multiline && invert && match)
2924     {
2925     int ellength;
2926     char *endmatch = ptr + offsets[1];
2927     t = ptr;
2928     while (t < endmatch)
2929       {
2930       t = end_of_line(t, endptr, &ellength);
2931       if (t <= endmatch) linenumber++; else break;
2932       }
2933     endmatch = end_of_line(endmatch, endptr, &ellength);
2934     linelength = endmatch - ptr - ellength;
2935     }
2936 
2937   /* Advance to after the newline and increment the line number. The file
2938   offset to the current line is maintained in filepos. */
2939 
2940   END_ONE_MATCH:
2941   ptr += linelength + endlinelength;
2942   filepos += (int)(linelength + endlinelength);
2943   linenumber++;
2944 
2945   /* If input is line buffered, and the buffer is not yet full, read another
2946   line and add it into the buffer. */
2947 
2948   if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
2949     {
2950     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2951     bufflength += add;
2952     endptr += add;
2953     }
2954 
2955   /* If we haven't yet reached the end of the file (the buffer is full), and
2956   the current point is in the top 1/3 of the buffer, slide the buffer down by
2957   1/3 and refill it. Before we do this, if some unprinted "after" lines are
2958   about to be lost, print them. */
2959 
2960   if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
2961     {
2962     if (after_context > 0 &&
2963         lastmatchnumber > 0 &&
2964         lastmatchrestart < main_buffer + bufthird)
2965       {
2966       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2967       lastmatchnumber = 0;  /* Indicates no after lines pending */
2968       }
2969 
2970     /* Now do the shuffle */
2971 
2972     (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2973     ptr -= bufthird;
2974 
2975     bufflength = 2*bufthird + fill_buffer(handle, frtype,
2976       main_buffer + 2*bufthird, bufthird, input_line_buffered);
2977     endptr = main_buffer + bufflength;
2978 
2979     /* Adjust any last match point */
2980 
2981     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2982     }
2983   }     /* Loop through the whole file */
2984 
2985 /* End of file; print final "after" lines if wanted; do_after_lines sets
2986 hyphenpending if it prints something. */
2987 
2988 if (only_matching_count == 0 && !(count_only|show_total_count))
2989   {
2990   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2991   hyphenpending |= endhyphenpending;
2992   }
2993 
2994 /* Print the file name if we are looking for those without matches and there
2995 were none. If we found a match, we won't have got this far. */
2996 
2997 if (filenames == FN_NOMATCH_ONLY)
2998   {
2999   fprintf(stdout, "%s" STDOUT_NL, printname);
3000   return 0;
3001   }
3002 
3003 /* Print the match count if wanted */
3004 
3005 if (count_only && !quiet)
3006   {
3007   if (count > 0 || !omit_zero_count)
3008     {
3009     if (printname != NULL && filenames != FN_NONE)
3010       fprintf(stdout, "%s:", printname);
3011     fprintf(stdout, "%lu" STDOUT_NL, count);
3012     counts_printed++;
3013     }
3014   }
3015 
3016 total_count += count;   /* Can be set without count_only */
3017 return rc;
3018 }
3019 
3020 
3021 
3022 /*************************************************
3023 *     Grep a file or recurse into a directory    *
3024 *************************************************/
3025 
3026 /* Given a path name, if it's a directory, scan all the files if we are
3027 recursing; if it's a file, grep it.
3028 
3029 Arguments:
3030   pathname          the path to investigate
3031   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
3032   only_one_at_top   TRUE if the path is the only one at toplevel
3033 
3034 Returns:  -1 the file/directory was skipped
3035            0 if there was at least one match
3036            1 if there were no matches
3037            2 there was some kind of error
3038 
3039 However, file opening failures are suppressed if "silent" is set.
3040 */
3041 
3042 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3043 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3044 {
3045 int rc = 1;
3046 int frtype;
3047 void *handle;
3048 char *lastcomp;
3049 FILE *in = NULL;           /* Ensure initialized */
3050 
3051 #ifdef SUPPORT_LIBZ
3052 gzFile ingz = NULL;
3053 #endif
3054 
3055 #ifdef SUPPORT_LIBBZ2
3056 BZFILE *inbz2 = NULL;
3057 #endif
3058 
3059 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3060 int pathlen;
3061 #endif
3062 
3063 #if defined NATIVE_ZOS
3064 int zos_type;
3065 FILE *zos_test_file;
3066 #endif
3067 
3068 /* If the file name is "-" we scan stdin */
3069 
3070 if (strcmp(pathname, "-") == 0)
3071   {
3072   return pcre2grep(stdin, FR_PLAIN, stdin_name,
3073     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3074       stdin_name : NULL);
3075   }
3076 
3077 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3078 directories, whereas --include and --exclude apply to everything else. The test
3079 is against the final component of the path. */
3080 
3081 lastcomp = strrchr(pathname, FILESEP);
3082 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3083 
3084 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3085 Otherwise, scan the directory and recurse for each path within it. The scanning
3086 code is localized so it can be made system-specific. */
3087 
3088 
3089 /* For z/OS, determine the file type. */
3090 
3091 #if defined NATIVE_ZOS
3092 zos_test_file =  fopen(pathname,"rb");
3093 
3094 if (zos_test_file == NULL)
3095    {
3096    if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3097      pathname, strerror(errno));
3098    return -1;
3099    }
3100 zos_type = identifyzosfiletype (zos_test_file);
3101 fclose (zos_test_file);
3102 
3103 /* Handle a PDS in separate code */
3104 
3105 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3106    {
3107    return travelonpdsdir (pathname, only_one_at_top);
3108    }
3109 
3110 /* Deal with regular files in the normal way below. These types are:
3111    zos_type == __ZOS_PDS_MEMBER
3112    zos_type == __ZOS_PS
3113    zos_type == __ZOS_VSAM_KSDS
3114    zos_type == __ZOS_VSAM_ESDS
3115    zos_type == __ZOS_VSAM_RRDS
3116 */
3117 
3118 /* Handle a z/OS directory using common code. */
3119 
3120 else if (zos_type == __ZOS_HFS)
3121  {
3122 #endif  /* NATIVE_ZOS */
3123 
3124 
3125 /* Handle directories: common code for all OS */
3126 
3127 if (isdirectory(pathname))
3128   {
3129   if (dee_action == dee_SKIP ||
3130       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3131     return -1;
3132 
3133   if (dee_action == dee_RECURSE)
3134     {
3135     char buffer[FNBUFSIZ];
3136     char *nextfile;
3137     directory_type *dir = opendirectory(pathname);
3138 
3139     if (dir == NULL)
3140       {
3141       if (!silent)
3142         fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3143           strerror(errno));
3144       return 2;
3145       }
3146 
3147     while ((nextfile = readdirectory(dir)) != NULL)
3148       {
3149       int frc;
3150       int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3151       if (fnlength > FNBUFSIZ)
3152         {
3153         fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3154         rc = 2;
3155         break;
3156         }
3157       sprintf(buffer, "%s%c%s", pathname, FILESEP, nextfile);
3158       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3159       if (frc > 1) rc = frc;
3160        else if (frc == 0 && rc == 1) rc = 0;
3161       }
3162 
3163     closedirectory(dir);
3164     return rc;
3165     }
3166   }
3167 
3168 #ifdef WIN32
3169 if (iswild(pathname))
3170   {
3171   char buffer[1024];
3172   char *nextfile;
3173   char *name;
3174   directory_type *dir = opendirectory(pathname);
3175 
3176   if (dir == NULL)
3177     return 0;
3178 
3179   for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3180     if (*nextfile == '/' || *nextfile == '\\')
3181       name = nextfile + 1;
3182   *name = 0;
3183 
3184   while ((nextfile = readdirectory(dir)) != NULL)
3185     {
3186     int frc;
3187     sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3188     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3189     if (frc > 1) rc = frc;
3190      else if (frc == 0 && rc == 1) rc = 0;
3191     }
3192 
3193   closedirectory(dir);
3194   return rc;
3195   }
3196 #endif
3197 
3198 #if defined NATIVE_ZOS
3199  }
3200 #endif
3201 
3202 /* If the file is not a directory, check for a regular file, and if it is not,
3203 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3204 exclusion. */
3205 
3206 else if (
3207 #if defined NATIVE_ZOS
3208         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3209 #else  /* all other OS */
3210         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3211 #endif
3212         !test_incexc(lastcomp, include_patterns, exclude_patterns))
3213   return -1;  /* File skipped */
3214 
3215 /* Control reaches here if we have a regular file, or if we have a directory
3216 and recursion or skipping was not requested, or if we have anything else and
3217 skipping was not requested. The scan proceeds. If this is the first and only
3218 argument at top level, we don't show the file name, unless we are only showing
3219 the file name, or the filename was forced (-H). */
3220 
3221 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3222 pathlen = (int)(strlen(pathname));
3223 #endif
3224 
3225 /* Open using zlib if it is supported and the file name ends with .gz. */
3226 
3227 #ifdef SUPPORT_LIBZ
3228 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3229   {
3230   ingz = gzopen(pathname, "rb");
3231   if (ingz == NULL)
3232     {
3233     if (!silent)
3234       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3235         strerror(errno));
3236     return 2;
3237     }
3238   handle = (void *)ingz;
3239   frtype = FR_LIBZ;
3240   }
3241 else
3242 #endif
3243 
3244 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3245 
3246 #ifdef SUPPORT_LIBBZ2
3247 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3248   {
3249   inbz2 = BZ2_bzopen(pathname, "rb");
3250   handle = (void *)inbz2;
3251   frtype = FR_LIBBZ2;
3252   }
3253 else
3254 #endif
3255 
3256 /* Otherwise use plain fopen(). The label is so that we can come back here if
3257 an attempt to read a .bz2 file indicates that it really is a plain file. */
3258 
3259 #ifdef SUPPORT_LIBBZ2
3260 PLAIN_FILE:
3261 #endif
3262   {
3263   in = fopen(pathname, "rb");
3264   handle = (void *)in;
3265   frtype = FR_PLAIN;
3266   }
3267 
3268 /* All the opening methods return errno when they fail. */
3269 
3270 if (handle == NULL)
3271   {
3272   if (!silent)
3273     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3274       strerror(errno));
3275   return 2;
3276   }
3277 
3278 /* Now grep the file */
3279 
3280 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3281   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3282 
3283 /* Close in an appropriate manner. */
3284 
3285 #ifdef SUPPORT_LIBZ
3286 if (frtype == FR_LIBZ)
3287   gzclose(ingz);
3288 else
3289 #endif
3290 
3291 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3292 read failed. If the error indicates that the file isn't in fact bzipped, try
3293 again as a normal file. */
3294 
3295 #ifdef SUPPORT_LIBBZ2
3296 if (frtype == FR_LIBBZ2)
3297   {
3298   if (rc == 3)
3299     {
3300     int errnum;
3301     const char *err = BZ2_bzerror(inbz2, &errnum);
3302     if (errnum == BZ_DATA_ERROR_MAGIC)
3303       {
3304       BZ2_bzclose(inbz2);
3305       goto PLAIN_FILE;
3306       }
3307     else if (!silent)
3308       fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3309         pathname, err);
3310     rc = 2;    /* The normal "something went wrong" code */
3311     }
3312   BZ2_bzclose(inbz2);
3313   }
3314 else
3315 #endif
3316 
3317 /* Normal file close */
3318 
3319 fclose(in);
3320 
3321 /* Pass back the yield from pcre2grep(). */
3322 
3323 return rc;
3324 }
3325 
3326 
3327 
3328 /*************************************************
3329 *    Handle a single-letter, no data option      *
3330 *************************************************/
3331 
3332 static int
handle_option(int letter,int options)3333 handle_option(int letter, int options)
3334 {
3335 switch(letter)
3336   {
3337   case N_FOFFSETS: file_offsets = TRUE; break;
3338   case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3339   case N_LBUFFER: line_buffered = TRUE; break;
3340   case N_LOFFSETS: line_offsets = number = TRUE; break;
3341   case N_NOJIT: use_jit = FALSE; break;
3342   case 'a': binary_files = BIN_TEXT; break;
3343   case 'c': count_only = TRUE; break;
3344   case 'F': options |= PCRE2_LITERAL; break;
3345   case 'H': filenames = FN_FORCE; break;
3346   case 'I': binary_files = BIN_NOMATCH; break;
3347   case 'h': filenames = FN_NONE; break;
3348   case 'i': options |= PCRE2_CASELESS; break;
3349   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3350   case 'L': filenames = FN_NOMATCH_ONLY; break;
3351   case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3352   case 'n': number = TRUE; break;
3353 
3354   case 'o':
3355   only_matching_last = add_number(0, only_matching_last);
3356   if (only_matching == NULL) only_matching = only_matching_last;
3357   break;
3358 
3359   case 'q': quiet = TRUE; break;
3360   case 'r': dee_action = dee_RECURSE; break;
3361   case 's': silent = TRUE; break;
3362   case 't': show_total_count = TRUE; break;
3363   case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3364   case 'v': invert = TRUE; break;
3365   case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3366   case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3367 
3368   case 'V':
3369     {
3370     unsigned char buffer[128];
3371     (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3372     fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3373     }
3374   pcre2grep_exit(0);
3375   break;
3376 
3377   default:
3378   fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3379   pcre2grep_exit(usage(2));
3380   }
3381 
3382 return options;
3383 }
3384 
3385 
3386 
3387 /*************************************************
3388 *          Construct printed ordinal             *
3389 *************************************************/
3390 
3391 /* This turns a number into "1st", "3rd", etc. */
3392 
3393 static char *
ordin(int n)3394 ordin(int n)
3395 {
3396 static char buffer[14];
3397 char *p = buffer;
3398 sprintf(p, "%d", n);
3399 while (*p != 0) p++;
3400 n %= 100;
3401 if (n >= 11 && n <= 13) n = 0;
3402 switch (n%10)
3403   {
3404   case 1: strcpy(p, "st"); break;
3405   case 2: strcpy(p, "nd"); break;
3406   case 3: strcpy(p, "rd"); break;
3407   default: strcpy(p, "th"); break;
3408   }
3409 return buffer;
3410 }
3411 
3412 
3413 
3414 /*************************************************
3415 *          Compile a single pattern              *
3416 *************************************************/
3417 
3418 /* Do nothing if the pattern has already been compiled. This is the case for
3419 include/exclude patterns read from a file.
3420 
3421 When the -F option has been used, each "pattern" may be a list of strings,
3422 separated by line breaks. They will be matched literally. We split such a
3423 string and compile the first substring, inserting an additional block into the
3424 pattern chain.
3425 
3426 Arguments:
3427   p              points to the pattern block
3428   options        the PCRE options
3429   fromfile       TRUE if the pattern was read from a file
3430   fromtext       file name or identifying text (e.g. "include")
3431   count          0 if this is the only command line pattern, or
3432                  number of the command line pattern, or
3433                  linenumber for a pattern from a file
3434 
3435 Returns:         TRUE on success, FALSE after an error
3436 */
3437 
3438 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3439 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3440   int count)
3441 {
3442 char *ps;
3443 int errcode;
3444 PCRE2_SIZE patlen, erroffset;
3445 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3446 
3447 if (p->compiled != NULL) return TRUE;
3448 ps = p->string;
3449 patlen = p->length;
3450 
3451 if ((options & PCRE2_LITERAL) != 0)
3452   {
3453   int ellength;
3454   char *eop = ps + patlen;
3455   char *pe = end_of_line(ps, eop, &ellength);
3456 
3457   if (ellength != 0)
3458     {
3459     patlen = pe - ps - ellength;
3460     if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3461     }
3462   }
3463 
3464 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3465   &erroffset, compile_context);
3466 
3467 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3468 ignore any JIT compiler errors, relying falling back to interpreting if
3469 anything goes wrong with JIT. */
3470 
3471 if (p->compiled != NULL)
3472   {
3473 #ifdef SUPPORT_PCRE2GREP_JIT
3474   if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3475 #endif
3476   return TRUE;
3477   }
3478 
3479 /* Handle compile errors */
3480 
3481 if (erroffset > patlen) erroffset = patlen;
3482 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3483 
3484 if (fromfile)
3485   {
3486   fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3487     "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3488   }
3489 else
3490   {
3491   if (count == 0)
3492     fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3493       fromtext, (int)erroffset, errmessbuffer);
3494   else
3495     fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3496       ordin(count), fromtext, (int)erroffset, errmessbuffer);
3497   }
3498 
3499 return FALSE;
3500 }
3501 
3502 
3503 
3504 /*************************************************
3505 *     Read and compile a file of patterns        *
3506 *************************************************/
3507 
3508 /* This is used for --filelist, --include-from, and --exclude-from.
3509 
3510 Arguments:
3511   name         the name of the file; "-" is stdin
3512   patptr       pointer to the pattern chain anchor
3513   patlastptr   pointer to the last pattern pointer
3514 
3515 Returns:       TRUE if all went well
3516 */
3517 
3518 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3519 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3520 {
3521 int linenumber = 0;
3522 PCRE2_SIZE patlen;
3523 FILE *f;
3524 const char *filename;
3525 char buffer[MAXPATLEN+20];
3526 
3527 if (strcmp(name, "-") == 0)
3528   {
3529   f = stdin;
3530   filename = stdin_name;
3531   }
3532 else
3533   {
3534   f = fopen(name, "r");
3535   if (f == NULL)
3536     {
3537     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3538     return FALSE;
3539     }
3540   filename = name;
3541   }
3542 
3543 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3544   {
3545   while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3546   linenumber++;
3547   if (patlen == 0) continue;   /* Skip blank lines */
3548 
3549   /* Note: this call to add_pattern() puts a pointer to the local variable
3550   "buffer" into the pattern chain. However, that pointer is used only when
3551   compiling the pattern, which happens immediately below, so we flatten it
3552   afterwards, as a precaution against any later code trying to use it. */
3553 
3554   *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3555   if (*patlastptr == NULL)
3556     {
3557     if (f != stdin) fclose(f);
3558     return FALSE;
3559     }
3560   if (*patptr == NULL) *patptr = *patlastptr;
3561 
3562   /* This loop is needed because compiling a "pattern" when -F is set may add
3563   on additional literal patterns if the original contains a newline. In the
3564   common case, it never will, because read_one_line() stops at a newline.
3565   However, the -N option can be used to give pcre2grep a different newline
3566   setting. */
3567 
3568   for(;;)
3569     {
3570     if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3571         linenumber))
3572       {
3573       if (f != stdin) fclose(f);
3574       return FALSE;
3575       }
3576     (*patlastptr)->string = NULL;            /* Insurance */
3577     if ((*patlastptr)->next == NULL) break;
3578     *patlastptr = (*patlastptr)->next;
3579     }
3580   }
3581 
3582 if (f != stdin) fclose(f);
3583 return TRUE;
3584 }
3585 
3586 
3587 
3588 /*************************************************
3589 *                Main program                    *
3590 *************************************************/
3591 
3592 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3593 
3594 int
main(int argc,char ** argv)3595 main(int argc, char **argv)
3596 {
3597 int i, j;
3598 int rc = 1;
3599 BOOL only_one_at_top;
3600 patstr *cp;
3601 fnstr *fn;
3602 const char *locale_from = "--locale";
3603 
3604 #ifdef SUPPORT_PCRE2GREP_JIT
3605 pcre2_jit_stack *jit_stack = NULL;
3606 #endif
3607 
3608 /* In Windows, stdout is set up as a text stream, which means that \n is
3609 converted to \r\n. This causes output lines that are copied from the input to
3610 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3611 that stdout is a binary stream. Note that this means all other output to stdout
3612 must use STDOUT_NL to terminate lines. */
3613 
3614 #ifdef WIN32
3615 _setmode(_fileno(stdout), _O_BINARY);
3616 #endif
3617 
3618 /* Set up a default compile and match contexts and a match data block. */
3619 
3620 compile_context = pcre2_compile_context_create(NULL);
3621 match_context = pcre2_match_context_create(NULL);
3622 match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
3623 offsets = pcre2_get_ovector_pointer(match_data);
3624 
3625 /* If string (script) callouts are supported, set up the callout processing
3626 function. */
3627 
3628 #ifdef SUPPORT_PCRE2GREP_CALLOUT
3629 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
3630 #endif
3631 
3632 /* Process the options */
3633 
3634 for (i = 1; i < argc; i++)
3635   {
3636   option_item *op = NULL;
3637   char *option_data = (char *)"";    /* default to keep compiler happy */
3638   BOOL longop;
3639   BOOL longopwasequals = FALSE;
3640 
3641   if (argv[i][0] != '-') break;
3642 
3643   /* If we hit an argument that is just "-", it may be a reference to STDIN,
3644   but only if we have previously had -e or -f to define the patterns. */
3645 
3646   if (argv[i][1] == 0)
3647     {
3648     if (pattern_files != NULL || patterns != NULL) break;
3649       else pcre2grep_exit(usage(2));
3650     }
3651 
3652   /* Handle a long name option, or -- to terminate the options */
3653 
3654   if (argv[i][1] == '-')
3655     {
3656     char *arg = argv[i] + 2;
3657     char *argequals = strchr(arg, '=');
3658 
3659     if (*arg == 0)    /* -- terminates options */
3660       {
3661       i++;
3662       break;                /* out of the options-handling loop */
3663       }
3664 
3665     longop = TRUE;
3666 
3667     /* Some long options have data that follows after =, for example file=name.
3668     Some options have variations in the long name spelling: specifically, we
3669     allow "regexp" because GNU grep allows it, though I personally go along
3670     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3671     These options are entered in the table as "regex(p)". Options can be in
3672     both these categories. */
3673 
3674     for (op = optionlist; op->one_char != 0; op++)
3675       {
3676       char *opbra = strchr(op->long_name, '(');
3677       char *equals = strchr(op->long_name, '=');
3678 
3679       /* Handle options with only one spelling of the name */
3680 
3681       if (opbra == NULL)     /* Does not contain '(' */
3682         {
3683         if (equals == NULL)  /* Not thing=data case */
3684           {
3685           if (strcmp(arg, op->long_name) == 0) break;
3686           }
3687         else                 /* Special case xxx=data */
3688           {
3689           int oplen = (int)(equals - op->long_name);
3690           int arglen = (argequals == NULL)?
3691             (int)strlen(arg) : (int)(argequals - arg);
3692           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3693             {
3694             option_data = arg + arglen;
3695             if (*option_data == '=')
3696               {
3697               option_data++;
3698               longopwasequals = TRUE;
3699               }
3700             break;
3701             }
3702           }
3703         }
3704 
3705       /* Handle options with an alternate spelling of the name */
3706 
3707       else
3708         {
3709         char buff1[24];
3710         char buff2[24];
3711         int ret;
3712 
3713         int baselen = (int)(opbra - op->long_name);
3714         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3715         int arglen = (argequals == NULL || equals == NULL)?
3716           (int)strlen(arg) : (int)(argequals - arg);
3717 
3718         if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3719              ret < 0 || ret > (int)sizeof(buff1)) ||
3720             (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3721                      fulllen - baselen - 2, opbra + 1),
3722              ret < 0 || ret > (int)sizeof(buff2)))
3723           {
3724           fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3725             op->long_name);
3726           pcre2grep_exit(2);
3727           }
3728 
3729         if (strncmp(arg, buff1, arglen) == 0 ||
3730            strncmp(arg, buff2, arglen) == 0)
3731           {
3732           if (equals != NULL && argequals != NULL)
3733             {
3734             option_data = argequals;
3735             if (*option_data == '=')
3736               {
3737               option_data++;
3738               longopwasequals = TRUE;
3739               }
3740             }
3741           break;
3742           }
3743         }
3744       }
3745 
3746     if (op->one_char == 0)
3747       {
3748       fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3749       pcre2grep_exit(usage(2));
3750       }
3751     }
3752 
3753   /* Jeffrey Friedl's debugging harness uses these additional options which
3754   are not in the right form for putting in the option table because they use
3755   only one hyphen, yet are more than one character long. By putting them
3756   separately here, they will not get displayed as part of the help() output,
3757   but I don't think Jeffrey will care about that. */
3758 
3759 #ifdef JFRIEDL_DEBUG
3760   else if (strcmp(argv[i], "-pre") == 0) {
3761           jfriedl_prefix = argv[++i];
3762           continue;
3763   } else if (strcmp(argv[i], "-post") == 0) {
3764           jfriedl_postfix = argv[++i];
3765           continue;
3766   } else if (strcmp(argv[i], "-XT") == 0) {
3767           sscanf(argv[++i], "%d", &jfriedl_XT);
3768           continue;
3769   } else if (strcmp(argv[i], "-XR") == 0) {
3770           sscanf(argv[++i], "%d", &jfriedl_XR);
3771           continue;
3772   }
3773 #endif
3774 
3775 
3776   /* One-char options; many that have no data may be in a single argument; we
3777   continue till we hit the last one or one that needs data. */
3778 
3779   else
3780     {
3781     char *s = argv[i] + 1;
3782     longop = FALSE;
3783 
3784     while (*s != 0)
3785       {
3786       for (op = optionlist; op->one_char != 0; op++)
3787         {
3788         if (*s == op->one_char) break;
3789         }
3790       if (op->one_char == 0)
3791         {
3792         fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3793           *s, argv[i]);
3794         pcre2grep_exit(usage(2));
3795         }
3796 
3797       option_data = s+1;
3798 
3799       /* Break out if this is the last character in the string; it's handled
3800       below like a single multi-char option. */
3801 
3802       if (*option_data == 0) break;
3803 
3804       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3805       are used for ones that either have a numerical number or defaults, i.e.
3806       the data is optional. If a digit follows, there is data; if not, carry on
3807       with other single-character options in the same string. */
3808 
3809       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3810         {
3811         if (isdigit((unsigned char)s[1])) break;
3812         }
3813       else   /* Check for an option with data */
3814         {
3815         if (op->type != OP_NODATA) break;
3816         }
3817 
3818       /* Handle a single-character option with no data, then loop for the
3819       next character in the string. */
3820 
3821       pcre2_options = handle_option(*s++, pcre2_options);
3822       }
3823     }
3824 
3825   /* At this point we should have op pointing to a matched option. If the type
3826   is NO_DATA, it means that there is no data, and the option might set
3827   something in the PCRE options. */
3828 
3829   if (op->type == OP_NODATA)
3830     {
3831     pcre2_options = handle_option(op->one_char, pcre2_options);
3832     continue;
3833     }
3834 
3835   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
3836   either has a value or defaults to something. It cannot have data in a
3837   separate item. At the moment, the only such options are "colo(u)r",
3838   "only-matching", and Jeffrey Friedl's special -S debugging option. */
3839 
3840   if (*option_data == 0 &&
3841       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
3842        op->type == OP_OP_NUMBERS))
3843     {
3844     switch (op->one_char)
3845       {
3846       case N_COLOUR:
3847       colour_option = "auto";
3848       break;
3849 
3850       case 'o':
3851       only_matching_last = add_number(0, only_matching_last);
3852       if (only_matching == NULL) only_matching = only_matching_last;
3853       break;
3854 
3855 #ifdef JFRIEDL_DEBUG
3856       case 'S':
3857       S_arg = 0;
3858       break;
3859 #endif
3860       }
3861     continue;
3862     }
3863 
3864   /* Otherwise, find the data string for the option. */
3865 
3866   if (*option_data == 0)
3867     {
3868     if (i >= argc - 1 || longopwasequals)
3869       {
3870       fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
3871       pcre2grep_exit(usage(2));
3872       }
3873     option_data = argv[++i];
3874     }
3875 
3876   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
3877   added to a chain of numbers. */
3878 
3879   if (op->type == OP_OP_NUMBERS)
3880     {
3881     unsigned long int n = decode_number(option_data, op, longop);
3882     omdatastr *omd = (omdatastr *)op->dataptr;
3883     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
3884     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
3885     }
3886 
3887   /* If the option type is OP_PATLIST, it's the -e option, or one of the
3888   include/exclude options, which can be called multiple times to create lists
3889   of patterns. */
3890 
3891   else if (op->type == OP_PATLIST)
3892     {
3893     patdatastr *pd = (patdatastr *)op->dataptr;
3894     *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
3895       *(pd->lastptr));
3896     if (*(pd->lastptr) == NULL) goto EXIT2;
3897     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
3898     }
3899 
3900   /* If the option type is OP_FILELIST, it's one of the options that names a
3901   file. */
3902 
3903   else if (op->type == OP_FILELIST)
3904     {
3905     fndatastr *fd = (fndatastr *)op->dataptr;
3906     fn = (fnstr *)malloc(sizeof(fnstr));
3907     if (fn == NULL)
3908       {
3909       fprintf(stderr, "pcre2grep: malloc failed\n");
3910       goto EXIT2;
3911       }
3912     fn->next = NULL;
3913     fn->name = option_data;
3914     if (*(fd->anchor) == NULL)
3915       *(fd->anchor) = fn;
3916     else
3917       (*(fd->lastptr))->next = fn;
3918     *(fd->lastptr) = fn;
3919     }
3920 
3921   /* Handle OP_BINARY_FILES */
3922 
3923   else if (op->type == OP_BINFILES)
3924     {
3925     if (strcmp(option_data, "binary") == 0)
3926       binary_files = BIN_BINARY;
3927     else if (strcmp(option_data, "without-match") == 0)
3928       binary_files = BIN_NOMATCH;
3929     else if (strcmp(option_data, "text") == 0)
3930       binary_files = BIN_TEXT;
3931     else
3932       {
3933       fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
3934         option_data);
3935       pcre2grep_exit(usage(2));
3936       }
3937     }
3938 
3939   /* Otherwise, deal with a single string or numeric data value. */
3940 
3941   else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
3942            op->type != OP_OP_NUMBER && op->type != OP_SIZE)
3943     {
3944     *((char **)op->dataptr) = option_data;
3945     }
3946   else
3947     {
3948     unsigned long int n = decode_number(option_data, op, longop);
3949     if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
3950       else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
3951       else *((int *)op->dataptr) = n;
3952     }
3953   }
3954 
3955 /* Options have been decoded. If -C was used, its value is used as a default
3956 for -A and -B. */
3957 
3958 if (both_context > 0)
3959   {
3960   if (after_context == 0) after_context = both_context;
3961   if (before_context == 0) before_context = both_context;
3962   }
3963 
3964 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
3965 permitted. They display, each in their own way, only the data that has matched.
3966 */
3967 
3968 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
3969   file_offsets + line_offsets;
3970 
3971 if (only_matching_count > 1)
3972   {
3973   fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
3974     "--file-offsets and/or --line-offsets\n");
3975   pcre2grep_exit(usage(2));
3976   }
3977 
3978 /* Check the text supplied to --output for errors. */
3979 
3980 if (output_text != NULL &&
3981     !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
3982   goto EXIT2;
3983 
3984 /* Put limits into the match data block. */
3985 
3986 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
3987 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
3988 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
3989 
3990 /* If a locale has not been provided as an option, see if the LC_CTYPE or
3991 LC_ALL environment variable is set, and if so, use it. */
3992 
3993 if (locale == NULL)
3994   {
3995   locale = getenv("LC_ALL");
3996   locale_from = "LC_ALL";
3997   }
3998 
3999 if (locale == NULL)
4000   {
4001   locale = getenv("LC_CTYPE");
4002   locale_from = "LC_CTYPE";
4003   }
4004 
4005 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4006 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4007 
4008 if (locale != NULL)
4009   {
4010   if (setlocale(LC_CTYPE, locale) == NULL)
4011     {
4012     fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4013       locale, locale_from);
4014     goto EXIT2;
4015     }
4016   character_tables = pcre2_maketables(NULL);
4017   pcre2_set_character_tables(compile_context, character_tables);
4018   }
4019 
4020 /* Sort out colouring */
4021 
4022 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4023   {
4024   if (strcmp(colour_option, "always") == 0)
4025 #ifdef WIN32
4026     do_ansi = !is_stdout_tty(),
4027 #endif
4028     do_colour = TRUE;
4029   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4030   else
4031     {
4032     fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4033       colour_option);
4034     goto EXIT2;
4035     }
4036   if (do_colour)
4037     {
4038     char *cs = getenv("PCRE2GREP_COLOUR");
4039     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4040     if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4041     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4042     if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4043     if (cs == NULL) cs = getenv("GREP_COLOR");
4044     if (cs != NULL)
4045       {
4046       if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4047       }
4048 #ifdef WIN32
4049     init_colour_output();
4050 #endif
4051     }
4052   }
4053 
4054 /* Sort out a newline setting. */
4055 
4056 if (newline_arg != NULL)
4057   {
4058   for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4059        endlinetype++)
4060     {
4061     if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4062     }
4063   if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4064     pcre2_set_newline(compile_context, endlinetype);
4065   else
4066     {
4067     fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4068       newline_arg);
4069     goto EXIT2;
4070     }
4071   }
4072 
4073 /* Find default newline convention */
4074 
4075 else
4076   {
4077   (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4078   }
4079 
4080 /* Interpret the text values for -d and -D */
4081 
4082 if (dee_option != NULL)
4083   {
4084   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4085   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4086   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4087   else
4088     {
4089     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4090     goto EXIT2;
4091     }
4092   }
4093 
4094 if (DEE_option != NULL)
4095   {
4096   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4097   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4098   else
4099     {
4100     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4101     goto EXIT2;
4102     }
4103   }
4104 
4105 /* Set the extra options */
4106 
4107 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4108 
4109 /* Check the values for Jeffrey Friedl's debugging options. */
4110 
4111 #ifdef JFRIEDL_DEBUG
4112 if (S_arg > 9)
4113   {
4114   fprintf(stderr, "pcre2grep: bad value for -S option\n");
4115   return 2;
4116   }
4117 if (jfriedl_XT != 0 || jfriedl_XR != 0)
4118   {
4119   if (jfriedl_XT == 0) jfriedl_XT = 1;
4120   if (jfriedl_XR == 0) jfriedl_XR = 1;
4121   }
4122 #endif
4123 
4124 /* If use_jit is set, check whether JIT is available. If not, do not try
4125 to use JIT. */
4126 
4127 if (use_jit)
4128   {
4129   uint32_t answer;
4130   (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4131   if (!answer) use_jit = FALSE;
4132   }
4133 
4134 /* Get memory for the main buffer. */
4135 
4136 if (bufthird <= 0)
4137   {
4138   fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4139   goto EXIT2;
4140   }
4141 
4142 bufsize = 3*bufthird;
4143 main_buffer = (char *)malloc(bufsize);
4144 
4145 if (main_buffer == NULL)
4146   {
4147   fprintf(stderr, "pcre2grep: malloc failed\n");
4148   goto EXIT2;
4149   }
4150 
4151 /* If no patterns were provided by -e, and there are no files provided by -f,
4152 the first argument is the one and only pattern, and it must exist. */
4153 
4154 if (patterns == NULL && pattern_files == NULL)
4155   {
4156   if (i >= argc) return usage(2);
4157   patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4158     NULL);
4159   i++;
4160   if (patterns == NULL) goto EXIT2;
4161   }
4162 
4163 /* Compile the patterns that were provided on the command line, either by
4164 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4165 after all the command-line options are read so that we know which PCRE options
4166 to use. When -F is used, compile_pattern() may add another block into the
4167 chain, so we must not access the next pointer till after the compile. */
4168 
4169 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4170   {
4171   if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4172        (j == 1 && patterns->next == NULL)? 0 : j))
4173     goto EXIT2;
4174   }
4175 
4176 /* Read and compile the regular expressions that are provided in files. */
4177 
4178 for (fn = pattern_files; fn != NULL; fn = fn->next)
4179   {
4180   if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4181   }
4182 
4183 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4184 
4185 #ifdef SUPPORT_PCRE2GREP_JIT
4186 if (use_jit)
4187   {
4188   jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4189   if (jit_stack != NULL                        )
4190     pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4191   }
4192 #endif
4193 
4194 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4195 adjust the options. */
4196 
4197 pcre2_options &= ~PCRE2_LITERAL;
4198 (void)pcre2_set_compile_extra_options(compile_context, 0);
4199 
4200 /* If there are include or exclude patterns read from the command line, compile
4201 them. */
4202 
4203 for (j = 0; j < 4; j++)
4204   {
4205   int k;
4206   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4207     {
4208     if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4209          (k == 1 && cp->next == NULL)? 0 : k))
4210       goto EXIT2;
4211     }
4212   }
4213 
4214 /* Read and compile include/exclude patterns from files. */
4215 
4216 for (fn = include_from; fn != NULL; fn = fn->next)
4217   {
4218   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4219     goto EXIT2;
4220   }
4221 
4222 for (fn = exclude_from; fn != NULL; fn = fn->next)
4223   {
4224   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4225     goto EXIT2;
4226   }
4227 
4228 /* If there are no files that contain lists of files to search, and there are
4229 no file arguments, search stdin, and then exit. */
4230 
4231 if (file_lists == NULL && i >= argc)
4232   {
4233   rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4234     (filenames > FN_DEFAULT)? stdin_name : NULL);
4235   goto EXIT;
4236   }
4237 
4238 /* If any files that contains a list of files to search have been specified,
4239 read them line by line and search the given files. */
4240 
4241 for (fn = file_lists; fn != NULL; fn = fn->next)
4242   {
4243   char buffer[FNBUFSIZ];
4244   FILE *fl;
4245   if (strcmp(fn->name, "-") == 0) fl = stdin; else
4246     {
4247     fl = fopen(fn->name, "rb");
4248     if (fl == NULL)
4249       {
4250       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4251         strerror(errno));
4252       goto EXIT2;
4253       }
4254     }
4255   while (fgets(buffer, sizeof(buffer), fl) != NULL)
4256     {
4257     int frc;
4258     char *end = buffer + (int)strlen(buffer);
4259     while (end > buffer && isspace(end[-1])) end--;
4260     *end = 0;
4261     if (*buffer != 0)
4262       {
4263       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4264       if (frc > 1) rc = frc;
4265         else if (frc == 0 && rc == 1) rc = 0;
4266       }
4267     }
4268   if (fl != stdin) fclose(fl);
4269   }
4270 
4271 /* After handling file-list, work through remaining arguments. Pass in the fact
4272 that there is only one argument at top level - this suppresses the file name if
4273 the argument is not a directory and filenames are not otherwise forced. */
4274 
4275 only_one_at_top = i == argc - 1 && file_lists == NULL;
4276 
4277 for (; i < argc; i++)
4278   {
4279   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4280     only_one_at_top);
4281   if (frc > 1) rc = frc;
4282     else if (frc == 0 && rc == 1) rc = 0;
4283   }
4284 
4285 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4286 /* If separating builtin echo callouts by implicit newline, add one more for
4287 the final item. */
4288 
4289 if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4290   fprintf(stdout, STDOUT_NL);
4291 #endif
4292 
4293 /* Show the total number of matches if requested, but not if only one file's
4294 count was printed. */
4295 
4296 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4297   {
4298   if (counts_printed != 0 && filenames >= FN_DEFAULT)
4299     fprintf(stdout, "TOTAL:");
4300   fprintf(stdout, "%lu" STDOUT_NL, total_count);
4301   }
4302 
4303 EXIT:
4304 #ifdef SUPPORT_PCRE2GREP_JIT
4305 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4306 #endif
4307 
4308 free(main_buffer);
4309 free((void *)character_tables);
4310 
4311 pcre2_compile_context_free(compile_context);
4312 pcre2_match_context_free(match_context);
4313 pcre2_match_data_free(match_data);
4314 
4315 free_pattern_chain(patterns);
4316 free_pattern_chain(include_patterns);
4317 free_pattern_chain(include_dir_patterns);
4318 free_pattern_chain(exclude_patterns);
4319 free_pattern_chain(exclude_dir_patterns);
4320 
4321 free_file_chain(exclude_from);
4322 free_file_chain(include_from);
4323 free_file_chain(pattern_files);
4324 free_file_chain(file_lists);
4325 
4326 while (only_matching != NULL)
4327   {
4328   omstr *this = only_matching;
4329   only_matching = this->next;
4330   free(this);
4331   }
4332 
4333 pcre2grep_exit(rc);
4334 
4335 EXIT2:
4336 rc = 2;
4337 goto EXIT;
4338 }
4339 
4340 /* End of pcre2grep */
4341