1 /* grep.c - print lines what match given regular expression
2 *
3 * Copyright 2013 CE Strake <strake888 at gmail.com>
4 *
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html
6 *
7 * Posix doesn't even specify -r, documenting deviations from it is silly.
8 * echo hello | grep -w ''
9 * echo '' | grep -w ''
10 * echo hello | grep -f </dev/null
11 *
12
13 USE_GREP(NEWTOY(grep, "(color):;S(exclude)*M(include)*ZzEFHIab(byte-offset)h(no-filename)ino(only-matching)rsvwcl(files-with-matches)q(quiet)(silent)e*f*C#B#A#m#x[!wx][!EFw]", TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
14 USE_EGREP(OLDTOY(egrep, grep, TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
15 USE_FGREP(OLDTOY(fgrep, grep, TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
16
17 config GREP
18 bool "grep"
19 default y
20 help
21 usage: grep [-EFrivwcloqsHbhn] [-ABC NUM] [-m MAX] [-e REGEX]... [-MS PATTERN]... [-f REGFILE] [FILE]...
22
23 Show lines matching regular expressions. If no -e, first argument is
24 regular expression to match. With no files (or "-" filename) read stdin.
25 Returns 0 if matched, 1 if no match found, 2 for command errors.
26
27 -e Regex to match. (May be repeated.)
28 -f File listing regular expressions to match.
29
30 file search:
31 -r Recurse into subdirectories (defaults FILE to ".")
32 -M Match filename pattern (--include)
33 -S Skip filename pattern (--exclude)
34 -I Ignore binary files
35
36 match type:
37 -A Show NUM lines after -B Show NUM lines before match
38 -C NUM lines context (A+B) -E extended regex syntax
39 -F fixed (literal match) -a always text (not binary)
40 -i case insensitive -m match MAX many lines
41 -v invert match -w whole word (implies -E)
42 -x whole line -z input NUL terminated
43
44 display modes: (default: matched line)
45 -c count of matching lines -l show only matching filenames
46 -o only matching part -q quiet (errors only)
47 -s silent (no error msg) -Z output NUL terminated
48
49 output prefix (default: filename if checking more than 1 file)
50 -H force filename -b byte offset of match
51 -h hide filename -n line number of match
52
53 config EGREP
54 bool
55 default y
56 depends on GREP
57
58 config FGREP
59 bool
60 default y
61 depends on GREP
62 */
63
64 #define FOR_grep
65 #include "toys.h"
66 #include <regex.h>
67
68 GLOBALS(
69 long m, A, B, C;
70 struct arg_list *f, *e, *M, *S;
71 char *color;
72
73 char *purple, *cyan, *red, *green, *grey;
74 struct double_list *reg;
75 char indelim, outdelim;
76 int found, tried;
77 )
78
79 struct reg {
80 struct reg *next, *prev;
81 int rc;
82 regex_t r;
83 regmatch_t m;
84 };
85
numdash(long num,char dash)86 static void numdash(long num, char dash)
87 {
88 printf("%s%ld%s%c", TT.green, num, TT.cyan, dash);
89 }
90
91 // Emit line with various potential prefixes and delimiter
outline(char * line,char dash,char * name,long lcount,long bcount,unsigned trim)92 static void outline(char *line, char dash, char *name, long lcount, long bcount,
93 unsigned trim)
94 {
95 if (!trim && FLAG(o)) return;
96 if (name && FLAG(H)) printf("%s%s%s%c", TT.purple, name, TT.cyan, dash);
97 if (FLAG(c)) {
98 printf("%s%ld", TT.grey, lcount);
99 xputc(TT.outdelim);
100 } else if (lcount && FLAG(n)) numdash(lcount, dash);
101 if (bcount && FLAG(b)) numdash(bcount-1, dash);
102 if (line) {
103 if (FLAG(color)) xputsn(FLAG(o) ? TT.red : TT.grey);
104 // support embedded NUL bytes in output
105 xputsl(line, trim);
106 xputc(TT.outdelim);
107 }
108 }
109
110 // Show matches in one file
do_grep(int fd,char * name)111 static void do_grep(int fd, char *name)
112 {
113 long lcount = 0, mcount = 0, offset = 0, after = 0, before = 0;
114 struct double_list *dlb = 0;
115 char *bars = 0;
116 FILE *file;
117 int bin = 0;
118
119 if (!FLAG(r)) TT.tried++;
120 if (!fd) name = "(standard input)";
121
122 // Only run binary file check on lseekable files.
123 if (!FLAG(a) && !lseek(fd, 0, SEEK_CUR)) {
124 char buf[256];
125 int len, i = 0;
126 wchar_t wc;
127
128 // If the first 256 bytes don't parse as utf8, call it binary.
129 if (0<(len = read(fd, buf, 256))) {
130 lseek(fd, -len, SEEK_CUR);
131 while (i<len) {
132 bin = utf8towc(&wc, buf+i, len-i);
133 if (bin == -2) i = len;
134 if (bin<1) break;
135 i += bin;
136 }
137 bin = i!=len;
138 }
139 if (bin && FLAG(I)) return;
140 }
141
142 if (!(file = fdopen(fd, "r"))) return perror_msg("%s", name);
143
144 // Loop through lines of input
145 for (;;) {
146 char *line = 0, *start;
147 struct reg *shoe;
148 size_t ulen;
149 long len;
150 int matched = 0, rc = 1;
151
152 // get next line, check and trim delimiter
153 lcount++;
154 errno = 0;
155 ulen = len = getdelim(&line, &ulen, TT.indelim, file);
156 if (errno) perror_msg("%s", name);
157 if (len<1) break;
158 if (line[ulen-1] == TT.indelim) line[--ulen] = 0;
159
160 // Prepare for next line
161 start = line;
162 if (TT.reg) for (shoe = (void *)TT.reg; shoe; shoe = shoe->next)
163 shoe->rc = 0;
164
165 // Loop to handle multiple matches in same line
166 do {
167 regmatch_t *mm = (void *)toybuf;
168
169 // Handle "fixed" (literal) matches
170 if (FLAG(F)) {
171 struct arg_list *seek, fseek;
172 char *s = 0;
173
174 for (seek = TT.e; seek; seek = seek->next) {
175 if (FLAG(x)) {
176 if ((FLAG(i) ? strcasecmp : strcmp)(seek->arg, line)) s = line;
177 } else if (!*seek->arg) {
178 seek = &fseek;
179 fseek.arg = s = line;
180 break;
181 }
182 if (FLAG(i)) s = strcasestr(line, seek->arg);
183 else s = strstr(line, seek->arg);
184 if (s) break;
185 }
186
187 if (s) {
188 rc = 0;
189 mm->rm_so = (s-line);
190 mm->rm_eo = (s-line)+strlen(seek->arg);
191 } else rc = 1;
192
193 // Handle regex matches
194 } else {
195 int baseline = mm->rm_eo;
196
197 mm->rm_so = mm->rm_eo = INT_MAX;
198 rc = 1;
199 for (shoe = (void *)TT.reg; shoe; shoe = shoe->next) {
200
201 // Do we need to re-check this regex?
202 if (!shoe->rc) {
203 shoe->m.rm_so -= baseline;
204 shoe->m.rm_eo -= baseline;
205 if (!matched || shoe->m.rm_so<0)
206 shoe->rc = regexec0(&shoe->r, start, ulen-(start-line), 1,
207 &shoe->m, start==line ? 0 : REG_NOTBOL);
208 }
209
210 // If we got a match, is it a _better_ match?
211 if (!shoe->rc && (shoe->m.rm_so < mm->rm_so ||
212 (shoe->m.rm_so == mm->rm_so && shoe->m.rm_eo >= mm->rm_eo)))
213 {
214 mm = &shoe->m;
215 rc = 0;
216 }
217 }
218 }
219
220 if (!rc && FLAG(x))
221 if (mm->rm_so || line[mm->rm_eo]) rc = 1;
222
223 if (!rc && FLAG(w)) {
224 char c = 0;
225
226 if ((start+mm->rm_so)!=line) {
227 c = start[mm->rm_so-1];
228 if (!isalnum(c) && c != '_') c = 0;
229 }
230 if (!c) {
231 c = start[mm->rm_eo];
232 if (!isalnum(c) && c != '_') c = 0;
233 }
234 if (c) {
235 start += mm->rm_so+1;
236 continue;
237 }
238 }
239
240 if (FLAG(v)) {
241 if (FLAG(o)) {
242 if (rc) {
243 mm->rm_so = 0;
244 mm->rm_eo = ulen-(start-line);
245 } else if (!mm->rm_so) {
246 start += mm->rm_eo;
247 continue;
248 } else mm->rm_eo = mm->rm_so;
249 } else {
250 if (!rc) break;
251 mm->rm_eo = ulen-(start-line);
252 }
253 mm->rm_so = 0;
254 } else if (rc) break;
255
256 // At least one line we didn't print since match while -ABC active
257 if (bars) {
258 xputs(bars);
259 bars = 0;
260 }
261 matched++;
262 TT.found = 1;
263 if (FLAG(q)) {
264 toys.exitval = 0;
265 xexit();
266 }
267 if (FLAG(l)) {
268 xprintf("%s%c", name, TT.outdelim);
269 free(line);
270 fclose(file);
271 return;
272 }
273
274 if (!FLAG(c)) {
275 long bcount = 1 + offset + (start-line) + (FLAG(o) ? mm->rm_so : 0);
276
277 if (bin) printf("Binary file %s matches\n", name);
278 else if (FLAG(o))
279 outline(start+mm->rm_so, ':', name, lcount, bcount,
280 mm->rm_eo-mm->rm_so);
281 else {
282 while (dlb) {
283 struct double_list *dl = dlist_pop(&dlb);
284 unsigned *uu = (void *)(dl->data+((strlen(dl->data)+1)|3)+1);
285
286 outline(dl->data, '-', name, lcount-before, uu[0]+1, uu[1]);
287 free(dl->data);
288 free(dl);
289 before--;
290 }
291
292 if (matched==1)
293 outline(FLAG(color) ? 0 : line, ':', name, lcount, bcount, ulen);
294 if (FLAG(color)) {
295 xputsn(TT.grey);
296 if (mm->rm_so) xputsl(line, mm->rm_so);
297 xputsn(TT.red);
298 xputsl(line+mm->rm_so, mm->rm_eo-mm->rm_so);
299 }
300
301 if (TT.A) after = TT.A+1;
302 }
303 }
304
305 start += mm->rm_eo;
306 if (mm->rm_so == mm->rm_eo) break;
307 if (!FLAG(o) && FLAG(color)) break;
308 } while (*start);
309 offset += len;
310
311 if (matched) {
312 // Finish off pending line color fragment.
313 if (FLAG(color) && !FLAG(o)) {
314 xputsn(TT.grey);
315 if (ulen > start-line) xputsl(start, ulen-(start-line));
316 xputc(TT.outdelim);
317 }
318 mcount++;
319 } else {
320 int discard = (after || TT.B);
321
322 if (after && --after) {
323 outline(line, '-', name, lcount, 0, ulen);
324 discard = 0;
325 }
326 if (discard && TT.B) {
327 unsigned *uu, ul = (ulen+1)|3;
328
329 line = xrealloc(line, ul+8);
330 uu = (void *)(line+ul+1);
331 uu[0] = offset-len;
332 uu[1] = ulen;
333 dlist_add(&dlb, line);
334 line = 0;
335 if (++before>TT.B) {
336 struct double_list *dl;
337
338 dl = dlist_pop(&dlb);
339 free(dl->data);
340 free(dl);
341 before--;
342 } else discard = 0;
343 }
344 // If we discarded a line while displaying context, show bars before next
345 // line (but don't show them now in case that was last match in file)
346 if (discard && mcount) bars = "--";
347 }
348 free(line);
349
350 if (FLAG(m) && mcount >= TT.m) break;
351 }
352
353 if (FLAG(c)) outline(0, ':', name, mcount, 0, 1);
354
355 // loopfiles will also close the fd, but this frees an (opaque) struct.
356 fclose(file);
357 while (dlb) {
358 struct double_list *dl = dlist_pop(&dlb);
359
360 free(dl->data);
361 free(dl);
362 }
363 }
364
parse_regex(void)365 static void parse_regex(void)
366 {
367 struct arg_list *al, *new, *list = NULL;
368 char *s, *ss;
369
370 // Add all -f lines to -e list. (Yes, this is leaking allocation context for
371 // exit to free. Not supporting nofork for this command any time soon.)
372 al = TT.f ? TT.f : TT.e;
373 while (al) {
374 if (TT.f) s = ss = xreadfile(al->arg, 0, 0);
375 else s = ss = al->arg;
376
377 // Split lines at \n, add individual lines to new list.
378 do {
379 // TODO: NUL terminated input shouldn't split -e at \n
380 ss = strchr(s, '\n');
381 if (ss) *(ss++) = 0;
382 new = xmalloc(sizeof(struct arg_list));
383 new->next = list;
384 new->arg = s;
385 list = new;
386 s = ss;
387 } while (ss && *s);
388
389 // Advance, when we run out of -f switch to -e.
390 al = al->next;
391 if (!al && TT.f) {
392 TT.f = 0;
393 al = TT.e;
394 }
395 }
396 TT.e = list;
397
398 if (!FLAG(F)) {
399 int i;
400
401 // Convert regex list
402 for (al = TT.e; al; al = al->next) {
403 struct reg *shoe;
404
405 if (FLAG(o) && !*al->arg) continue;
406 dlist_add_nomalloc(&TT.reg, (void *)(shoe = xmalloc(sizeof(struct reg))));
407 i = regcomp(&shoe->r, al->arg,
408 (REG_EXTENDED*!!FLAG(E)) | (REG_ICASE*!!FLAG(i)));
409 if (i) {
410 regerror(i, &shoe->r, toybuf, sizeof(toybuf));
411 error_exit("bad REGEX '%s': %s", al->arg, toybuf);
412 }
413 }
414 dlist_terminate(TT.reg);
415 }
416 }
417
do_grep_r(struct dirtree * new)418 static int do_grep_r(struct dirtree *new)
419 {
420 char *name;
421
422 if (!new->parent) TT.tried++;
423 if (!dirtree_notdotdot(new)) return 0;
424 if (S_ISDIR(new->st.st_mode)) return DIRTREE_RECURSE;
425 if (TT.S || TT.M) {
426 struct arg_list *al;
427
428 for (al = TT.S; al; al = al->next)
429 if (!fnmatch(al->arg, new->name, 0)) return 0;
430
431 if (TT.M) {
432 for (al = TT.M; al; al = al->next)
433 if (!fnmatch(al->arg, new->name, 0)) break;
434
435 if (!al) return 0;
436 }
437 }
438
439 // "grep -r onefile" doesn't show filenames, but "grep -r onedir" should.
440 if (new->parent && !FLAG(h)) toys.optflags |= FLAG_H;
441
442 name = dirtree_path(new, 0);
443 do_grep(openat(dirtree_parentfd(new), new->name, 0), name);
444 free(name);
445
446 return 0;
447 }
448
grep_main(void)449 void grep_main(void)
450 {
451 char **ss = toys.optargs;
452
453 if (FLAG(color) && (!TT.color || !strcmp(TT.color, "auto")) && !isatty(1))
454 toys.optflags &= ~FLAG_color;
455
456 if (FLAG(color)) {
457 TT.purple = "\033[35m";
458 TT.cyan = "\033[36m";
459 TT.red = "\033[1;31m";
460 TT.green = "\033[32m";
461 TT.grey = "\033[0m";
462 } else TT.purple = TT.cyan = TT.red = TT.green = TT.grey = "";
463
464 // Grep exits with 2 for errors
465 toys.exitval = 2;
466
467 if (!TT.A) TT.A = TT.C;
468 if (!TT.B) TT.B = TT.C;
469
470 TT.indelim = '\n' * !FLAG(z);
471 TT.outdelim = '\n' * !FLAG(Z);
472
473 // Handle egrep and fgrep
474 if (*toys.which->name == 'e') toys.optflags |= FLAG_E;
475 if (*toys.which->name == 'f') toys.optflags |= FLAG_F;
476
477 if (!TT.e && !TT.f) {
478 if (!*ss) error_exit("no REGEX");
479 TT.e = xzalloc(sizeof(struct arg_list));
480 TT.e->arg = *(ss++);
481 toys.optc--;
482 }
483
484 parse_regex();
485
486 if (!FLAG(h) && toys.optc>1) toys.optflags |= FLAG_H;
487
488 if (FLAG(s)) {
489 close(2);
490 xopen_stdio("/dev/null", O_RDWR);
491 }
492
493 if (FLAG(r)) {
494 // Iterate through -r arguments. Use "." as default if none provided.
495 for (ss = *ss ? ss : (char *[]){".", 0}; *ss; ss++) {
496 if (!strcmp(*ss, "-")) do_grep(0, *ss);
497 else dirtree_read(*ss, do_grep_r);
498 }
499 } else loopfiles_rw(ss, O_RDONLY|WARN_ONLY, 0, do_grep);
500 if (TT.tried >= toys.optc || (FLAG(q)&&TT.found)) toys.exitval = !TT.found;
501 }
502