1 /* find.c - Search directories for matching files.
2  *
3  * Copyright 2014 Rob Landley <rob@landley.net>
4  *
5  * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/find.c
6  *
7  * Our "unspecified" behavior for no paths is to use "."
8  * Parentheses can only stack 4096 deep
9  * Not treating two {} as an error, but only using last
10  *
11  * TODO: -empty (dirs too!)
12 
13 USE_FIND(NEWTOY(find, "?^HL[-HL]", TOYFLAG_USR|TOYFLAG_BIN))
14 
15 config FIND
16   bool "find"
17   default y
18   help
19     usage: find [-HL] [DIR...] [<options>]
20 
21     Search directories for matching files.
22     Default: search "." match all -print all matches.
23 
24     -H  Follow command line symlinks         -L  Follow all symlinks
25 
26     Match filters:
27     -name  PATTERN  filename with wildcards   -iname      case insensitive -name
28     -path  PATTERN  path name with wildcards  -ipath      case insensitive -path
29     -user  UNAME    belongs to user UNAME     -nouser     user ID not known
30     -group GROUP    belongs to group GROUP    -nogroup    group ID not known
31     -perm  [-/]MODE permissions (-=min /=any) -prune      ignore contents of dir
32     -size  N[c]     512 byte blocks (c=bytes) -xdev       only this filesystem
33     -links N        hardlink count            -atime N[u] accessed N units ago
34     -ctime N[u]     created N units ago       -mtime N[u] modified N units ago
35     -newer FILE     newer mtime than FILE     -mindepth # at least # dirs down
36     -depth          ignore contents of dir    -maxdepth # at most # dirs down
37     -inum  N        inode number N            -empty      empty files and dirs
38     -type [bcdflps] (block, char, dir, file, symlink, pipe, socket)
39 
40     Numbers N may be prefixed by a - (less than) or + (greater than). Units for
41     -Xtime are d (days, default), h (hours), m (minutes), or s (seconds).
42 
43     Combine matches with:
44     !, -a, -o, ( )    not, and, or, group expressions
45 
46     Actions:
47     -print   Print match with newline  -print0    Print match with null
48     -exec    Run command with path     -execdir   Run command in file's dir
49     -ok      Ask before exec           -okdir     Ask before execdir
50     -delete  Remove matching file/dir
51 
52     Commands substitute "{}" with matched file. End with ";" to run each file,
53     or "+" (next argument after "{}") to collect and run with multiple files.
54 */
55 
56 #define FOR_find
57 #include "toys.h"
58 
59 GLOBALS(
60   char **filter;
61   struct double_list *argdata;
62   int topdir, xdev, depth;
63   time_t now;
64 )
65 
66 struct execdir_data {
67   struct execdir_data *next;
68 
69   int namecount;
70   struct double_list *names;
71 };
72 
73 // None of this can go in TT because you can have more than one -exec
74 struct exec_range {
75   char *next, *prev;  // layout compatible with struct double_list
76 
77   int dir, plus, arglen, argsize, curly;
78   char **argstart;
79   struct execdir_data exec, *execdir;
80 };
81 
82 // Perform pending -exec (if any)
flush_exec(struct dirtree * new,struct exec_range * aa)83 static int flush_exec(struct dirtree *new, struct exec_range *aa)
84 {
85   struct execdir_data *bb = aa->execdir ? aa->execdir : &aa->exec;
86   char **newargs;
87   int rc, revert = 0;
88 
89   if (!bb->namecount) return 0;
90 
91   dlist_terminate(bb->names);
92 
93   // switch to directory for -execdir, or back to top if we have an -execdir
94   // _and_ a normal -exec, or are at top of tree in -execdir
95   if (TT.topdir != -1) {
96     if (aa->dir && new && new->parent) {
97       revert++;
98       rc = fchdir(new->parent->dirfd);
99     } else rc = fchdir(TT.topdir);
100     if (rc) {
101       perror_msg_raw(revert ? new->name : ".");
102 
103       return rc;
104     }
105   }
106 
107   // execdir: accumulated execs in this directory's children.
108   newargs = xmalloc(sizeof(char *)*(aa->arglen+bb->namecount+1));
109   if (aa->curly < 0) {
110     memcpy(newargs, aa->argstart, sizeof(char *)*aa->arglen);
111     newargs[aa->arglen] = 0;
112   } else {
113     int pos = aa->curly, rest = aa->arglen - aa->curly;
114     struct double_list *dl;
115 
116     // Collate argument list
117     memcpy(newargs, aa->argstart, sizeof(char *)*pos);
118     for (dl = bb->names; dl; dl = dl->next) newargs[pos++] = dl->data;
119     rest = aa->arglen - aa->curly - 1;
120     memcpy(newargs+pos, aa->argstart+aa->curly+1, sizeof(char *)*rest);
121     newargs[pos+rest] = 0;
122   }
123 
124   rc = xrun(newargs);
125 
126   llist_traverse(bb->names, llist_free_double);
127   bb->names = 0;
128   bb->namecount = 0;
129 
130   if (revert) revert = fchdir(TT.topdir);
131 
132   return rc;
133 }
134 
135 // Return numeric value with explicit sign
compare_numsign(long val,long units,char * str)136 static int compare_numsign(long val, long units, char *str)
137 {
138   char sign = 0;
139   long myval;
140 
141   if (*str == '+' || *str == '-') sign = *(str++);
142   else if (!isdigit(*str)) error_exit("%s not [+-]N", str);
143   myval = atolx(str);
144   if (units && isdigit(str[strlen(str)-1])) myval *= units;
145 
146   if (sign == '+') return val > myval;
147   if (sign == '-') return val < myval;
148   return val == myval;
149 }
150 
do_print(struct dirtree * new,char c)151 static void do_print(struct dirtree *new, char c)
152 {
153   char *s=dirtree_path(new, 0);
154 
155   xprintf("%s%c", s, c);
156   free(s);
157 }
158 
159 // Descend or ascend -execdir + directory level
execdir(struct dirtree * new,int flush)160 static void execdir(struct dirtree *new, int flush)
161 {
162   struct double_list *dl;
163   struct exec_range *aa;
164   struct execdir_data *bb;
165 
166   if (new && TT.topdir == -1) return;
167 
168   for (dl = TT.argdata; dl; dl = dl->next) {
169     if (dl->prev != (void *)1) continue;
170     aa = (void *)dl;
171     if (!aa->plus || (new && !aa->dir)) continue;
172 
173     if (flush) {
174 
175       // Flush pending "-execdir +" instances for this dir
176       // or flush everything for -exec at top
177       toys.exitval |= flush_exec(new, aa);
178 
179       // pop per-directory struct
180       if ((bb = aa->execdir)) {
181         aa->execdir = bb->next;
182         free(bb);
183       }
184     } else if (aa->dir) {
185 
186       // Push new per-directory struct for -execdir/okdir + codepath. (Can't
187       // use new->extra because command line may have multiple -execdir)
188       bb = xzalloc(sizeof(struct execdir_data));
189       bb->next = aa->execdir;
190       aa->execdir = bb;
191     }
192   }
193 }
194 
195 // Call this with 0 for first pass argument parsing and syntax checking (which
196 // populates argdata). Later commands traverse argdata (in order) when they
197 // need "do once" results.
do_find(struct dirtree * new)198 static int do_find(struct dirtree *new)
199 {
200   int pcount = 0, print = 0, not = 0, active = !!new, test = active, recurse;
201   struct double_list *argdata = TT.argdata;
202   char *s, **ss;
203 
204   recurse = DIRTREE_COMEAGAIN|(DIRTREE_SYMFOLLOW*!!(toys.optflags&FLAG_L));
205 
206   // skip . and .. below topdir, handle -xdev and -depth
207   if (new) {
208     if (new->parent) {
209       if (!dirtree_notdotdot(new)) return 0;
210       if (TT.xdev && new->st.st_dev != new->parent->st.st_dev) recurse = 0;
211     }
212 
213     if (S_ISDIR(new->st.st_mode)) {
214       // Descending into new directory
215       if (!new->again) {
216         struct dirtree *n;
217 
218         for (n = new->parent; n; n = n->parent) {
219           if (n->st.st_ino==new->st.st_ino && n->st.st_dev==new->st.st_dev) {
220             error_msg("'%s': loop detected", s = dirtree_path(new, 0));
221             free(s);
222 
223             return 0;
224           }
225         }
226 
227         if (TT.depth) {
228           execdir(new, 0);
229 
230           return recurse;
231         }
232       // Done with directory (COMEAGAIN call)
233       } else {
234         execdir(new, 1);
235         recurse = 0;
236         if (!TT.depth) return 0;
237       }
238     }
239   }
240 
241   // pcount: parentheses stack depth (using toybuf bytes, 4096 max depth)
242   // test: result of most recent test
243   // active: if 0 don't perform tests
244   // not: a pending ! applies to this test (only set if performing tests)
245   // print: saw one of print/ok/exec, no need for default -print
246 
247   if (TT.filter) for (ss = TT.filter; *ss; ss++) {
248     int check = active && test;
249 
250     s = *ss;
251 
252     // handle ! ( ) using toybuf as a stack
253     if (*s != '-') {
254       if (s[1]) goto error;
255 
256       if (*s == '!') {
257         // Don't invert if we're not making a decision
258         if (check) not = !not;
259 
260       // Save old "not" and "active" on toybuf stack.
261       // Deactivate this parenthetical if !test
262       // Note: test value should never change while !active
263       } else if (*s == '(') {
264         if (pcount == sizeof(toybuf)) goto error;
265         toybuf[pcount++] = not+(active<<1);
266         if (!check) active = 0;
267         not = 0;
268 
269       // Pop status, apply deferred not to test
270       } else if (*s == ')') {
271         if (--pcount < 0) goto error;
272         // Pop active state, apply deferred not (which was only set if checking)
273         active = (toybuf[pcount]>>1)&1;
274         if (active && (toybuf[pcount]&1)) test = !test;
275         not = 0;
276       } else goto error;
277 
278       continue;
279     } else s++;
280 
281     if (!strcmp(s, "xdev")) TT.xdev = 1;
282     else if (!strcmp(s, "delete")) {
283       // Delete forces depth first
284       TT.depth = 1;
285       if (new && check)
286         test = !unlinkat(dirtree_parentfd(new), new->name,
287           S_ISDIR(new->st.st_mode) ? AT_REMOVEDIR : 0);
288     } else if (!strcmp(s, "depth")) TT.depth = 1;
289     else if (!strcmp(s, "o") || !strcmp(s, "or")) {
290       if (not) goto error;
291       if (active) {
292         if (!test) test = 1;
293         else active = 0;     // decision has been made until next ")"
294       }
295     } else if (!strcmp(s, "not")) {
296       if (check) not = !not;
297       continue;
298     // Mostly ignore NOP argument
299     } else if (!strcmp(s, "a") || !strcmp(s, "and") || !strcmp(s, "noleaf")) {
300       if (not) goto error;
301 
302     } else if (!strcmp(s, "print") || !strcmp("print0", s)) {
303       print++;
304       if (check) do_print(new, s[5] ? 0 : '\n');
305 
306     } else if (!strcmp(s, "nouser")) {
307       if (check) if (bufgetpwuid(new->st.st_uid)) test = 0;
308     } else if (!strcmp(s, "nogroup")) {
309       if (check) if (bufgetgrgid(new->st.st_gid)) test = 0;
310     } else if (!strcmp(s, "prune")) {
311       if (check && S_ISDIR(new->st.st_mode) && !TT.depth) recurse = 0;
312 
313     // Remaining filters take an argument
314     } else {
315       if (!strcmp(s, "name") || !strcmp(s, "iname")
316         || !strcmp(s, "path") || !strcmp(s, "ipath"))
317       {
318         int i = (*s == 'i');
319         char *arg = ss[1], *path = 0, *name = new ? new->name : arg;
320 
321         // Handle path expansion and case flattening
322         if (new && s[i] == 'p') name = path = dirtree_path(new, 0);
323         if (i) {
324           if ((check || !new) && name) name = strlower(name);
325           if (!new) dlist_add(&TT.argdata, name);
326           else arg = ((struct double_list *)llist_pop(&argdata))->data;
327         }
328 
329         if (check) {
330           test = !fnmatch(arg, name, FNM_PATHNAME*(s[i] == 'p'));
331           if (i) free(name);
332         }
333         free(path);
334       } else if (!strcmp(s, "perm")) {
335         if (check) {
336           char *m = ss[1];
337           int match_min = *m == '-',
338               match_any = *m == '/';
339           mode_t m1 = string_to_mode(m+(match_min || match_any), 0),
340                  m2 = new->st.st_mode & 07777;
341 
342           if (match_min || match_any) m2 &= m1;
343           test = match_any ? !m1 || m2 : m1 == m2;
344         }
345       } else if (!strcmp(s, "type")) {
346         if (check) {
347           int types[] = {S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFIFO,
348                          S_IFREG, S_IFSOCK}, i = stridx("bcdlpfs", *ss[1]);
349 
350           if (i<0) error_exit("bad -type '%c'", *ss[1]);
351           if ((new->st.st_mode & S_IFMT) != types[i]) test = 0;
352         }
353 
354       } else if (strchr("acm", *s)
355         && (!strcmp(s+1, "time") || !strcmp(s+1, "min")))
356       {
357         if (check) {
358           char *copy = ss[1];
359           time_t thyme = (int []){new->st.st_atime, new->st.st_ctime,
360                                   new->st.st_mtime}[stridx("acm", *s)];
361           int len = strlen(copy), uu, units = (s[1]=='m') ? 60 : 86400;
362 
363           if (len && -1!=(uu = stridx("dhms",tolower(copy[len-1])))) {
364             copy = xstrdup(copy);
365             copy[--len] = 0;
366             units = (int []){86400, 3600, 60, 1}[uu];
367           }
368           test = compare_numsign(TT.now - thyme, units, copy);
369           if (copy != ss[1]) free(copy);
370         }
371       } else if (!strcmp(s, "size")) {
372         if (check)
373           test = compare_numsign(new->st.st_size, 512, ss[1]);
374       } else if (!strcmp(s, "links")) {
375         if (check) test = compare_numsign(new->st.st_nlink, 0, ss[1]);
376       } else if (!strcmp(s, "inum")) {
377         if (check)
378           test = compare_numsign(new->st.st_ino, 0, ss[1]);
379       } else if (!strcmp(s, "mindepth") || !strcmp(s, "maxdepth")) {
380         if (check) {
381           struct dirtree *dt = new;
382           int i = 0, d = atolx(ss[1]);
383 
384           while ((dt = dt->parent)) i++;
385           if (s[1] == 'i') {
386             test = i >= d;
387             if (i == d && not) recurse = 0;
388           } else {
389             test = i <= d;
390             if (i == d && !not) recurse = 0;
391           }
392         }
393       } else if (!strcmp(s, "user") || !strcmp(s, "group")
394               || !strcmp(s, "newer"))
395       {
396         struct {
397           void *next, *prev;
398           union {
399             uid_t uid;
400             gid_t gid;
401             struct timespec tm;
402           } u;
403         } *udl;
404 
405         if (!new) {
406           if (ss[1]) {
407             udl = xmalloc(sizeof(*udl));
408             dlist_add_nomalloc(&TT.argdata, (void *)udl);
409 
410             if (*s == 'u') udl->u.uid = xgetuid(ss[1]);
411             else if (*s == 'g') udl->u.gid = xgetgid(ss[1]);
412             else {
413               struct stat st;
414 
415               xstat(ss[1], &st);
416               udl->u.tm = st.st_mtim;
417             }
418           }
419         } else {
420           udl = (void *)llist_pop(&argdata);
421           if (check) {
422             if (*s == 'u') test = new->st.st_uid == udl->u.uid;
423             else if (*s == 'g') test = new->st.st_gid == udl->u.gid;
424             else {
425               test = new->st.st_mtim.tv_sec > udl->u.tm.tv_sec;
426               if (new->st.st_mtim.tv_sec == udl->u.tm.tv_sec)
427                 test = new->st.st_mtim.tv_nsec > udl->u.tm.tv_nsec;
428             }
429           }
430         }
431       } else if (!strcmp(s, "exec") || !strcmp("ok", s)
432               || !strcmp(s, "execdir") || !strcmp(s, "okdir"))
433       {
434         struct exec_range *aa;
435 
436         print++;
437 
438         // Initial argument parsing pass
439         if (!new) {
440           int len;
441 
442           // catch "-exec" with no args and "-exec \;"
443           if (!ss[1] || !strcmp(ss[1], ";")) error_exit("'%s' needs 1 arg", s);
444 
445           dlist_add_nomalloc(&TT.argdata, (void *)(aa = xzalloc(sizeof(*aa))));
446           aa->argstart = ++ss;
447           aa->curly = -1;
448 
449           // Record command line arguments to -exec
450           for (len = 0; ss[len]; len++) {
451             if (!strcmp(ss[len], ";")) break;
452             else if (!strcmp(ss[len], "{}")) {
453               aa->curly = len;
454               if (ss[len+1] && !strcmp(ss[len+1], "+")) {
455                 aa->plus++;
456                 len++;
457                 break;
458               }
459             } else aa->argsize += sizeof(char *) + strlen(ss[len]) + 1;
460           }
461           if (!ss[len]) error_exit("-exec without %s",
462             aa->curly!=-1 ? "\\;" : "{}");
463           ss += len;
464           aa->arglen = len;
465           aa->dir = !!strchr(s, 'd');
466           if (TT.topdir == -1) TT.topdir = xopenro(".");
467 
468         // collect names and execute commands
469         } else {
470           char *name, *ss1 = ss[1];
471           struct execdir_data *bb;
472 
473           // Grab command line exec argument list
474           aa = (void *)llist_pop(&argdata);
475           ss += aa->arglen + 1;
476 
477           if (!check) goto cont;
478           // name is always a new malloc, so we can always free it.
479           name = aa->dir ? xstrdup(new->name) : dirtree_path(new, 0);
480 
481           if (*s == 'o') {
482             fprintf(stderr, "[%s] %s", ss1, name);
483             if (!(test = yesno(0))) {
484               free(name);
485               goto cont;
486             }
487           }
488 
489           // Add next name to list (global list without -dir, local with)
490           bb = aa->execdir ? aa->execdir : &aa->exec;
491           dlist_add(&bb->names, name);
492           bb->namecount++;
493 
494           // -exec + collates and saves result in exitval
495           if (aa->plus) {
496             // Mark entry so COMEAGAIN can call flush_exec() in parent.
497             // This is never a valid pointer value for prev to have otherwise
498             // Done here vs argument parsing pass so it's after dlist_terminate
499             aa->prev = (void *)1;
500 
501             // Flush if we pass 16 megs of environment space.
502             // An insanely long path (>2 gigs) could wrap the counter and
503             // defeat this test, which could potentially trigger OOM killer.
504             if ((aa->plus += sizeof(char *)+strlen(name)+1) > 1<<24) {
505               aa->plus = 1;
506               toys.exitval |= flush_exec(new, aa);
507             }
508           } else test = flush_exec(new, aa);
509         }
510 
511         // Argument consumed, skip the check.
512         goto cont;
513       } else goto error;
514 
515       // This test can go at the end because we do a syntax checking
516       // pass first. Putting it here gets the error message (-unknown
517       // vs -known noarg) right.
518       if (!*++ss) error_exit("'%s' needs 1 arg", --s);
519     }
520 cont:
521     // Apply pending "!" to result
522     if (active && not) test = !test;
523     not = 0;
524   }
525 
526   if (new) {
527     // If there was no action, print
528     if (!print && test) do_print(new, '\n');
529 
530     if (S_ISDIR(new->st.st_mode)) execdir(new, 0);
531 
532   } else dlist_terminate(TT.argdata);
533 
534   return recurse;
535 
536 error:
537   error_exit("bad arg '%s'", *ss);
538 }
539 
find_main(void)540 void find_main(void)
541 {
542   int i, len;
543   char **ss = toys.optargs;
544 
545   TT.topdir = -1;
546 
547   // Distinguish paths from filters
548   for (len = 0; toys.optargs[len]; len++)
549     if (strchr("-!(", *toys.optargs[len])) break;
550   TT.filter = toys.optargs+len;
551 
552   // use "." if no paths
553   if (!len) {
554     ss = (char *[]){"."};
555     len = 1;
556   }
557 
558   // first pass argument parsing, verify args match up, handle "evaluate once"
559   TT.now = time(0);
560   do_find(0);
561 
562   // Loop through paths
563   for (i = 0; i < len; i++)
564     dirtree_flagread(ss[i], DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)),
565       do_find);
566 
567   execdir(0, 1);
568 
569   if (CFG_TOYBOX_FREE) {
570     close(TT.topdir);
571     llist_traverse(TT.argdata, free);
572   }
573 }
574