1 /* grep.c - print lines what match given regular expression
2  *
3  * Copyright 2013 CE Strake <strake888 at gmail.com>
4  *
5  * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html
6  *
7  * Posix doesn't even specify -r, documenting deviations from it is silly.
8 * echo hello | grep -w ''
9 * echo '' | grep -w ''
10 * echo hello | grep -f </dev/null
11 *
12 
13 USE_GREP(NEWTOY(grep, "(color):;S(exclude)*M(include)*ZzEFHIab(byte-offset)h(no-filename)ino(only-matching)rsvwcl(files-with-matches)q(quiet)(silent)e*f*C#B#A#m#x[!wx][!EFw]", TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
14 USE_EGREP(OLDTOY(egrep, grep, TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
15 USE_FGREP(OLDTOY(fgrep, grep, TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
16 
17 config GREP
18   bool "grep"
19   default y
20   help
21     usage: grep [-EFrivwcloqsHbhn] [-ABC NUM] [-m MAX] [-e REGEX]... [-MS PATTERN]... [-f REGFILE] [FILE]...
22 
23     Show lines matching regular expressions. If no -e, first argument is
24     regular expression to match. With no files (or "-" filename) read stdin.
25     Returns 0 if matched, 1 if no match found, 2 for command errors.
26 
27     -e  Regex to match. (May be repeated.)
28     -f  File listing regular expressions to match.
29 
30     file search:
31     -r  Recurse into subdirectories (defaults FILE to ".")
32     -M  Match filename pattern (--include)
33     -S  Skip filename pattern (--exclude)
34     -I  Ignore binary files
35 
36     match type:
37     -A  Show NUM lines after     -B  Show NUM lines before match
38     -C  NUM lines context (A+B)  -E  extended regex syntax
39     -F  fixed (literal match)    -a  always text (not binary)
40     -i  case insensitive         -m  match MAX many lines
41     -v  invert match             -w  whole word (implies -E)
42     -x  whole line               -z  input NUL terminated
43 
44     display modes: (default: matched line)
45     -c  count of matching lines  -l  show only matching filenames
46     -o  only matching part       -q  quiet (errors only)
47     -s  silent (no error msg)    -Z  output NUL terminated
48 
49     output prefix (default: filename if checking more than 1 file)
50     -H  force filename           -b  byte offset of match
51     -h  hide filename            -n  line number of match
52 
53 config EGREP
54   bool
55   default y
56   depends on GREP
57 
58 config FGREP
59   bool
60   default y
61   depends on GREP
62 */
63 
64 #define FOR_grep
65 #include "toys.h"
66 #include <regex.h>
67 
68 GLOBALS(
69   long m, A, B, C;
70   struct arg_list *f, *e, *M, *S;
71   char *color;
72 
73   char *purple, *cyan, *red, *green, *grey;
74   struct double_list *reg;
75   char indelim, outdelim;
76   int found, tried;
77 )
78 
79 struct reg {
80   struct reg *next, *prev;
81   int rc;
82   regex_t r;
83   regmatch_t m;
84 };
85 
numdash(long num,char dash)86 static void numdash(long num, char dash)
87 {
88   printf("%s%ld%s%c", TT.green, num, TT.cyan, dash);
89 }
90 
91 // Emit line with various potential prefixes and delimiter
outline(char * line,char dash,char * name,long lcount,long bcount,unsigned trim)92 static void outline(char *line, char dash, char *name, long lcount, long bcount,
93   unsigned trim)
94 {
95   if (!trim && FLAG(o)) return;
96   if (name && FLAG(H)) printf("%s%s%s%c", TT.purple, name, TT.cyan, dash);
97   if (FLAG(c)) {
98     printf("%s%ld", TT.grey, lcount);
99     xputc(TT.outdelim);
100   } else if (lcount && FLAG(n)) numdash(lcount, dash);
101   if (bcount && FLAG(b)) numdash(bcount-1, dash);
102   if (line) {
103     if (FLAG(color)) xputsn(FLAG(o) ? TT.red : TT.grey);
104     // support embedded NUL bytes in output
105     xputsl(line, trim);
106     xputc(TT.outdelim);
107   }
108 }
109 
110 // Show matches in one file
do_grep(int fd,char * name)111 static void do_grep(int fd, char *name)
112 {
113   long lcount = 0, mcount = 0, offset = 0, after = 0, before = 0;
114   struct double_list *dlb = 0;
115   char *bars = 0;
116   FILE *file;
117   int bin = 0;
118 
119   if (!FLAG(r)) TT.tried++;
120   if (!fd) name = "(standard input)";
121 
122   // Only run binary file check on lseekable files.
123   if (!FLAG(a) && !lseek(fd, 0, SEEK_CUR)) {
124     char buf[256];
125     int len, i = 0;
126     wchar_t wc;
127 
128     // If the first 256 bytes don't parse as utf8, call it binary.
129     if (0<(len = read(fd, buf, 256))) {
130       lseek(fd, -len, SEEK_CUR);
131       while (i<len) {
132         bin = utf8towc(&wc, buf+i, len-i);
133         if (bin == -2) i = len;
134         if (bin<1) break;
135         i += bin;
136       }
137       bin = i!=len;
138     }
139     if (bin && FLAG(I)) return;
140   }
141 
142   if (!(file = fdopen(fd, "r"))) return perror_msg("%s", name);
143 
144   // Loop through lines of input
145   for (;;) {
146     char *line = 0, *start;
147     struct reg *shoe;
148     size_t ulen;
149     long len;
150     int matched = 0, rc = 1;
151 
152     // get next line, check and trim delimiter
153     lcount++;
154     errno = 0;
155     ulen = len = getdelim(&line, &ulen, TT.indelim, file);
156     if (errno) perror_msg("%s", name);
157     if (len<1) break;
158     if (line[ulen-1] == TT.indelim) line[--ulen] = 0;
159 
160     // Prepare for next line
161     start = line;
162     if (TT.reg) for (shoe = (void *)TT.reg; shoe; shoe = shoe->next)
163       shoe->rc = 0;
164 
165     // Loop to handle multiple matches in same line
166     do {
167       regmatch_t *mm = (void *)toybuf;
168 
169       // Handle "fixed" (literal) matches
170       if (FLAG(F)) {
171         struct arg_list *seek, fseek;
172         char *s = 0;
173 
174         for (seek = TT.e; seek; seek = seek->next) {
175           if (FLAG(x)) {
176             if ((FLAG(i) ? strcasecmp : strcmp)(seek->arg, line)) s = line;
177           } else if (!*seek->arg) {
178             seek = &fseek;
179             fseek.arg = s = line;
180             break;
181           }
182           if (FLAG(i)) s = strcasestr(line, seek->arg);
183           else s = strstr(line, seek->arg);
184           if (s) break;
185         }
186 
187         if (s) {
188           rc = 0;
189           mm->rm_so = (s-line);
190           mm->rm_eo = (s-line)+strlen(seek->arg);
191         } else rc = 1;
192 
193       // Handle regex matches
194       } else {
195         int baseline = mm->rm_eo;
196 
197         mm->rm_so = mm->rm_eo = INT_MAX;
198         rc = 1;
199         for (shoe = (void *)TT.reg; shoe; shoe = shoe->next) {
200 
201           // Do we need to re-check this regex?
202           if (!shoe->rc) {
203             shoe->m.rm_so -= baseline;
204             shoe->m.rm_eo -= baseline;
205             if (!matched || shoe->m.rm_so<0)
206               shoe->rc = regexec0(&shoe->r, start, ulen-(start-line), 1,
207                                   &shoe->m, start==line ? 0 : REG_NOTBOL);
208           }
209 
210           // If we got a match, is it a _better_ match?
211           if (!shoe->rc && (shoe->m.rm_so < mm->rm_so ||
212               (shoe->m.rm_so == mm->rm_so && shoe->m.rm_eo >= mm->rm_eo)))
213           {
214             mm = &shoe->m;
215             rc = 0;
216           }
217         }
218       }
219 
220       if (!rc && FLAG(x))
221         if (mm->rm_so || line[mm->rm_eo]) rc = 1;
222 
223       if (!rc && FLAG(w)) {
224         char c = 0;
225 
226         if ((start+mm->rm_so)!=line) {
227           c = start[mm->rm_so-1];
228           if (!isalnum(c) && c != '_') c = 0;
229         }
230         if (!c) {
231           c = start[mm->rm_eo];
232           if (!isalnum(c) && c != '_') c = 0;
233         }
234         if (c) {
235           start += mm->rm_so+1;
236           continue;
237         }
238       }
239 
240       if (FLAG(v)) {
241         if (FLAG(o)) {
242           if (rc) {
243             mm->rm_so = 0;
244             mm->rm_eo = ulen-(start-line);
245           } else if (!mm->rm_so) {
246             start += mm->rm_eo;
247             continue;
248           } else mm->rm_eo = mm->rm_so;
249         } else {
250           if (!rc) break;
251           mm->rm_eo = ulen-(start-line);
252         }
253         mm->rm_so = 0;
254       } else if (rc) break;
255 
256       // At least one line we didn't print since match while -ABC active
257       if (bars) {
258         xputs(bars);
259         bars = 0;
260       }
261       matched++;
262       TT.found = 1;
263       if (FLAG(q)) {
264         toys.exitval = 0;
265         xexit();
266       }
267       if (FLAG(l)) {
268         xprintf("%s%c", name, TT.outdelim);
269         free(line);
270         fclose(file);
271         return;
272       }
273 
274       if (!FLAG(c)) {
275         long bcount = 1 + offset + (start-line) + (FLAG(o) ? mm->rm_so : 0);
276 
277         if (bin) printf("Binary file %s matches\n", name);
278         else if (FLAG(o))
279           outline(start+mm->rm_so, ':', name, lcount, bcount,
280                   mm->rm_eo-mm->rm_so);
281         else {
282           while (dlb) {
283             struct double_list *dl = dlist_pop(&dlb);
284             unsigned *uu = (void *)(dl->data+((strlen(dl->data)+1)|3)+1);
285 
286             outline(dl->data, '-', name, lcount-before, uu[0]+1, uu[1]);
287             free(dl->data);
288             free(dl);
289             before--;
290           }
291 
292           if (matched==1)
293             outline(FLAG(color) ? 0 : line, ':', name, lcount, bcount, ulen);
294           if (FLAG(color)) {
295             xputsn(TT.grey);
296             if (mm->rm_so) xputsl(line, mm->rm_so);
297             xputsn(TT.red);
298             xputsl(line+mm->rm_so, mm->rm_eo-mm->rm_so);
299           }
300 
301           if (TT.A) after = TT.A+1;
302         }
303       }
304 
305       start += mm->rm_eo;
306       if (mm->rm_so == mm->rm_eo) break;
307       if (!FLAG(o) && FLAG(color)) break;
308     } while (*start);
309     offset += len;
310 
311     if (matched) {
312       // Finish off pending line color fragment.
313       if (FLAG(color) && !FLAG(o)) {
314         xputsn(TT.grey);
315         if (ulen > start-line) xputsl(start, ulen-(start-line));
316         xputc(TT.outdelim);
317       }
318       mcount++;
319     } else {
320       int discard = (after || TT.B);
321 
322       if (after && --after) {
323         outline(line, '-', name, lcount, 0, ulen);
324         discard = 0;
325       }
326       if (discard && TT.B) {
327         unsigned *uu, ul = (ulen+1)|3;
328 
329         line = xrealloc(line, ul+8);
330         uu = (void *)(line+ul+1);
331         uu[0] = offset-len;
332         uu[1] = ulen;
333         dlist_add(&dlb, line);
334         line = 0;
335         if (++before>TT.B) {
336           struct double_list *dl;
337 
338           dl = dlist_pop(&dlb);
339           free(dl->data);
340           free(dl);
341           before--;
342         } else discard = 0;
343       }
344       // If we discarded a line while displaying context, show bars before next
345       // line (but don't show them now in case that was last match in file)
346       if (discard && mcount) bars = "--";
347     }
348     free(line);
349 
350     if (FLAG(m) && mcount >= TT.m) break;
351   }
352 
353   if (FLAG(c)) outline(0, ':', name, mcount, 0, 1);
354 
355   // loopfiles will also close the fd, but this frees an (opaque) struct.
356   fclose(file);
357   while (dlb) {
358     struct double_list *dl = dlist_pop(&dlb);
359 
360     free(dl->data);
361     free(dl);
362   }
363 }
364 
parse_regex(void)365 static void parse_regex(void)
366 {
367   struct arg_list *al, *new, *list = NULL;
368   char *s, *ss;
369 
370   // Add all -f lines to -e list. (Yes, this is leaking allocation context for
371   // exit to free. Not supporting nofork for this command any time soon.)
372   al = TT.f ? TT.f : TT.e;
373   while (al) {
374     if (TT.f) s = ss = xreadfile(al->arg, 0, 0);
375     else s = ss = al->arg;
376 
377     // Split lines at \n, add individual lines to new list.
378     do {
379 // TODO: NUL terminated input shouldn't split -e at \n
380       ss = strchr(s, '\n');
381       if (ss) *(ss++) = 0;
382       new = xmalloc(sizeof(struct arg_list));
383       new->next = list;
384       new->arg = s;
385       list = new;
386       s = ss;
387     } while (ss && *s);
388 
389     // Advance, when we run out of -f switch to -e.
390     al = al->next;
391     if (!al && TT.f) {
392       TT.f = 0;
393       al = TT.e;
394     }
395   }
396   TT.e = list;
397 
398   if (!FLAG(F)) {
399     int i;
400 
401     // Convert regex list
402     for (al = TT.e; al; al = al->next) {
403       struct reg *shoe;
404 
405       if (FLAG(o) && !*al->arg) continue;
406       dlist_add_nomalloc(&TT.reg, (void *)(shoe = xmalloc(sizeof(struct reg))));
407       i = regcomp(&shoe->r, al->arg,
408                   (REG_EXTENDED*!!FLAG(E)) | (REG_ICASE*!!FLAG(i)));
409       if (i) {
410         regerror(i, &shoe->r, toybuf, sizeof(toybuf));
411         error_exit("bad REGEX '%s': %s", al->arg, toybuf);
412       }
413     }
414     dlist_terminate(TT.reg);
415   }
416 }
417 
do_grep_r(struct dirtree * new)418 static int do_grep_r(struct dirtree *new)
419 {
420   char *name;
421 
422   if (!new->parent) TT.tried++;
423   if (!dirtree_notdotdot(new)) return 0;
424   if (S_ISDIR(new->st.st_mode)) return DIRTREE_RECURSE;
425   if (TT.S || TT.M) {
426     struct arg_list *al;
427 
428     for (al = TT.S; al; al = al->next)
429       if (!fnmatch(al->arg, new->name, 0)) return 0;
430 
431     if (TT.M) {
432       for (al = TT.M; al; al = al->next)
433         if (!fnmatch(al->arg, new->name, 0)) break;
434 
435       if (!al) return 0;
436     }
437   }
438 
439   // "grep -r onefile" doesn't show filenames, but "grep -r onedir" should.
440   if (new->parent && !FLAG(h)) toys.optflags |= FLAG_H;
441 
442   name = dirtree_path(new, 0);
443   do_grep(openat(dirtree_parentfd(new), new->name, 0), name);
444   free(name);
445 
446   return 0;
447 }
448 
grep_main(void)449 void grep_main(void)
450 {
451   char **ss = toys.optargs;
452 
453   if (FLAG(color) && (!TT.color || !strcmp(TT.color, "auto")) && !isatty(1))
454     toys.optflags &= ~FLAG_color;
455 
456   if (FLAG(color)) {
457     TT.purple = "\033[35m";
458     TT.cyan = "\033[36m";
459     TT.red = "\033[1;31m";
460     TT.green = "\033[32m";
461     TT.grey = "\033[0m";
462   } else TT.purple = TT.cyan = TT.red = TT.green = TT.grey = "";
463 
464   // Grep exits with 2 for errors
465   toys.exitval = 2;
466 
467   if (!TT.A) TT.A = TT.C;
468   if (!TT.B) TT.B = TT.C;
469 
470   TT.indelim = '\n' * !FLAG(z);
471   TT.outdelim = '\n' * !FLAG(Z);
472 
473   // Handle egrep and fgrep
474   if (*toys.which->name == 'e') toys.optflags |= FLAG_E;
475   if (*toys.which->name == 'f') toys.optflags |= FLAG_F;
476 
477   if (!TT.e && !TT.f) {
478     if (!*ss) error_exit("no REGEX");
479     TT.e = xzalloc(sizeof(struct arg_list));
480     TT.e->arg = *(ss++);
481     toys.optc--;
482   }
483 
484   parse_regex();
485 
486   if (!FLAG(h) && toys.optc>1) toys.optflags |= FLAG_H;
487 
488   if (FLAG(s)) {
489     close(2);
490     xopen_stdio("/dev/null", O_RDWR);
491   }
492 
493   if (FLAG(r)) {
494     // Iterate through -r arguments. Use "." as default if none provided.
495     for (ss = *ss ? ss : (char *[]){".", 0}; *ss; ss++) {
496       if (!strcmp(*ss, "-")) do_grep(0, *ss);
497       else dirtree_read(*ss, do_grep_r);
498     }
499   } else loopfiles_rw(ss, O_RDONLY|WARN_ONLY, 0, do_grep);
500   if (TT.tried >= toys.optc || (FLAG(q)&&TT.found)) toys.exitval = !TT.found;
501 }
502