1 /* find.c - Search directories for matching files.
2  *
3  * Copyright 2014 Rob Landley <rob@landley.net>
4  *
5  * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/find.c
6  *
7  * Our "unspecified" behavior for no paths is to use "."
8  * Parentheses can only stack 4096 deep
9  * Not treating two {} as an error, but only using last
10 
11 USE_FIND(NEWTOY(find, "?^HL[-HL]", TOYFLAG_USR|TOYFLAG_BIN))
12 
13 config FIND
14   bool "find"
15   default y
16   help
17     usage: find [-HL] [DIR...] [<options>]
18 
19     Search directories for matching files.
20     Default: search "." match all -print all matches.
21 
22     -H  Follow command line symlinks         -L  Follow all symlinks
23 
24     Match filters:
25     -name  PATTERN filename with wildcards   -iname      case insensitive -name
26     -path  PATTERN path name with wildcards  -ipath      case insensitive -path
27     -user  UNAME   belongs to user UNAME     -nouser     user not in /etc/passwd
28     -group GROUP   belongs to group GROUP    -nogroup    group not in /etc/group
29     -perm  [-]MODE permissons (-=at least)   -prune      ignore contents of dir
30     -size  N[c]    512 byte blocks (c=bytes) -xdev       stay in this filesystem
31     -links N       hardlink count            -atime N    accessed N days ago
32     -ctime N       created N days ago        -mtime N    modified N days ago
33     -newer FILE    newer mtime than FILE     -mindepth # at least # dirs down
34     -depth         ignore contents of dir    -maxdepth # at most # dirs down
35     -type [bcdflps] (block, char, dir, file, symlink, pipe, socket)
36 
37     Numbers N may be prefixed by a - (less than) or + (greater than):
38 
39     Combine matches with:
40     !, -a, -o, ( )    not, and, or, group expressions
41 
42     Actions:
43     -print   Print match with newline  -print0    Print match with null
44     -exec    Run command with path     -execdir   Run command in file's dir
45     -ok      Ask before exec           -okdir     Ask before execdir
46 
47     Commands substitute "{}" with matched file. End with ";" to run each file,
48     or "+" (next argument after "{}") to collect and run with multiple files.
49 */
50 
51 #define FOR_find
52 #include "toys.h"
53 
54 GLOBALS(
55   char **filter;
56   struct double_list *argdata;
57   int topdir, xdev, depth, envsize;
58   time_t now;
59 )
60 
61 // None of this can go in TT because you can have more than one -exec
62 struct exec_range {
63   char *next, *prev;
64 
65   int dir, plus, arglen, argsize, curly, namecount, namesize;
66   char **argstart;
67   struct double_list *names;
68 };
69 
70 // Perform pending -exec (if any)
flush_exec(struct dirtree * new,struct exec_range * aa)71 static int flush_exec(struct dirtree *new, struct exec_range *aa)
72 {
73   struct double_list **dl;
74   char **newargs;
75   int rc = 0;
76 
77   if (!aa->namecount) return 0;
78 
79   if (aa->dir && new->parent) dl = (void *)&new->parent->extra;
80   else dl = &aa->names;
81   dlist_terminate(*dl);
82 
83   // switch to directory for -execdir, or back to top if we have an -execdir
84   // _and_ a normal -exec, or are at top of tree in -execdir
85   if (aa->dir && new->parent) rc = fchdir(new->parent->data);
86   else if (TT.topdir != -1) rc = fchdir(TT.topdir);
87   if (rc) {
88     perror_msg("%s", new->name);
89 
90     return rc;
91   }
92 
93   // execdir: accumulated execs in this directory's children.
94   newargs = xmalloc(sizeof(char *)*(aa->arglen+aa->namecount+1));
95   if (aa->curly < 0) {
96     memcpy(newargs, aa->argstart, sizeof(char *)*aa->arglen);
97     newargs[aa->arglen] = 0;
98   } else {
99     struct double_list *dl2 = *dl;
100     int pos = aa->curly, rest = aa->arglen - aa->curly;
101 
102     // Collate argument list
103     memcpy(newargs, aa->argstart, sizeof(char *)*pos);
104     for (dl2 = *dl; dl2; dl2 = dl2->next) newargs[pos++] = dl2->data;
105     rest = aa->arglen - aa->curly - 1;
106     memcpy(newargs+pos, aa->argstart+aa->curly+1, sizeof(char *)*rest);
107     newargs[pos+rest] = 0;
108   }
109 
110   rc = xrun(newargs);
111 
112   llist_traverse(*dl, llist_free_double);
113   *dl = 0;
114   aa->namecount = 0;
115 
116   return rc;
117 }
118 
119 // Return numeric value with explicit sign
compare_numsign(long val,long units,char * str)120 static int compare_numsign(long val, long units, char *str)
121 {
122   char sign = 0;
123   long myval;
124 
125   if (*str == '+' || *str == '-') sign = *(str++);
126   else if (!isdigit(*str)) error_exit("%s not [+-]N", str);
127   myval = atolx(str);
128   if (units && isdigit(str[strlen(str)-1])) myval *= units;
129 
130   if (sign == '+') return val > myval;
131   if (sign == '-') return val < myval;
132   return val == myval;
133 }
134 
do_print(struct dirtree * new,char c)135 static void do_print(struct dirtree *new, char c)
136 {
137   char *s=dirtree_path(new, 0);
138 
139   xprintf("%s%c", s, c);
140   free(s);
141 }
142 
strlower(char * s)143 char *strlower(char *s)
144 {
145   char *try, *new;
146 
147   if (!CFG_TOYBOX_I18N) {
148     try = new = xstrdup(s);
149     for (; *s; s++) *(new++) = tolower(*s);
150   } else {
151     // I can't guarantee the string _won't_ expand during reencoding, so...?
152     try = new = xmalloc(strlen(s)*2+1);
153 
154     while (*s) {
155       wchar_t c;
156       int len = mbrtowc(&c, s, MB_CUR_MAX, 0);
157 
158       if (len < 1) *(new++) = *(s++);
159       else {
160         s += len;
161         // squash title case too
162         c = towlower(c);
163 
164         // if we had a valid utf8 sequence, convert it to lower case, and can't
165         // encode back to utf8, something is wrong with your libc. But just
166         // in case somebody finds an exploit...
167         len = wcrtomb(new, c, 0);
168         if (len < 1) error_exit("bad utf8 %x", (int)c);
169         new += len;
170       }
171     }
172     *new = 0;
173   }
174 
175   return try;
176 }
177 
178 // Call this with 0 for first pass argument parsing and syntax checking (which
179 // populates argdata). Later commands traverse argdata (in order) when they
180 // need "do once" results.
do_find(struct dirtree * new)181 static int do_find(struct dirtree *new)
182 {
183   int pcount = 0, print = 0, not = 0, active = !!new, test = active, recurse;
184   struct double_list *argdata = TT.argdata;
185   char *s, **ss;
186 
187   recurse = DIRTREE_COMEAGAIN|(DIRTREE_SYMFOLLOW*!!(toys.optflags&FLAG_L));
188 
189   // skip . and .. below topdir, handle -xdev and -depth
190   if (new) {
191     if (new->parent) {
192       if (!dirtree_notdotdot(new)) return 0;
193       if (TT.xdev && new->st.st_dev != new->parent->st.st_dev) recurse = 0;
194     }
195     if (S_ISDIR(new->st.st_mode)) {
196       if (!new->again) {
197         struct dirtree *n;
198 
199         if (TT.depth) return recurse;
200         for (n = new->parent; n; n = n->parent) {
201           if (n->st.st_ino==new->st.st_ino && n->st.st_dev==new->st.st_dev) {
202             error_msg("'%s': loop detected", s = dirtree_path(new, 0));
203             free(s);
204 
205             return 0;
206           }
207         }
208       } else {
209         struct double_list *dl;
210 
211         if (TT.topdir != -1)
212           for (dl = TT.argdata; dl; dl = dl->next)
213             if (dl->prev == (void *)1 || !new->parent)
214               toys.exitval |= flush_exec(new, (void *)dl);
215 
216         return 0;
217       }
218     }
219   }
220 
221   // pcount: parentheses stack depth (using toybuf bytes, 4096 max depth)
222   // test: result of most recent test
223   // active: if 0 don't perform tests
224   // not: a pending ! applies to this test (only set if performing tests)
225   // print: saw one of print/ok/exec, no need for default -print
226 
227   if (TT.filter) for (ss = TT.filter; *ss; ss++) {
228     int check = active && test;
229 
230     s = *ss;
231 
232     // handle ! ( ) using toybuf as a stack
233     if (*s != '-') {
234       if (s[1]) goto error;
235 
236       if (*s == '!') {
237         // Don't invert if we're not making a decision
238         if (check) not = !not;
239 
240       // Save old "not" and "active" on toybuf stack.
241       // Deactivate this parenthetical if !test
242       // Note: test value should never change while !active
243       } else if (*s == '(') {
244         if (pcount == sizeof(toybuf)) goto error;
245         toybuf[pcount++] = not+(active<<1);
246         if (!check) active = 0;
247         not = 0;
248 
249       // Pop status, apply deferred not to test
250       } else if (*s == ')') {
251         if (--pcount < 0) goto error;
252         // Pop active state, apply deferred not (which was only set if checking)
253         active = (toybuf[pcount]>>1)&1;
254         if (active && (toybuf[pcount]&1)) test = !test;
255         not = 0;
256       } else goto error;
257 
258       continue;
259     } else s++;
260 
261     if (!strcmp(s, "xdev")) TT.xdev = 1;
262     else if (!strcmp(s, "depth")) TT.depth = 1;
263     else if (!strcmp(s, "o") || !strcmp(s, "or")) {
264       if (not) goto error;
265       if (active) {
266         if (!test) test = 1;
267         else active = 0;     // decision has been made until next ")"
268       }
269     } else if (!strcmp(s, "not")) {
270       if (check) not = !not;
271       continue;
272     // Mostly ignore NOP argument
273     } else if (!strcmp(s, "a") || !strcmp(s, "and")) {
274       if (not) goto error;
275 
276     } else if (!strcmp(s, "print") || !strcmp("print0", s)) {
277       print++;
278       if (check) do_print(new, s[5] ? 0 : '\n');
279 
280     } else if (!strcmp(s, "nouser")) {
281       if (check) if (getpwuid(new->st.st_uid)) test = 0;
282     } else if (!strcmp(s, "nogroup")) {
283       if (check) if (getgrgid(new->st.st_gid)) test = 0;
284     } else if (!strcmp(s, "prune")) {
285       if (check && S_ISDIR(new->st.st_dev) && !TT.depth) recurse = 0;
286 
287     // Remaining filters take an argument
288     } else {
289       if (!strcmp(s, "name") || !strcmp(s, "iname")
290         || !strcmp(s, "path") || !strcmp(s, "ipath"))
291       {
292         int i = (*s == 'i');
293         char *arg = ss[1], *path = 0, *name = new->name;
294 
295         // Handle path expansion and case flattening
296         if (new && s[i] == 'p') name = path = dirtree_path(new, 0);
297         if (i) {
298           if (check || !new) {
299             name = strlower(new ? name : arg);
300             if (!new) {
301               dlist_add(&TT.argdata, name);
302               free(path);
303             } else arg = ((struct double_list *)llist_pop(&argdata))->data;
304           }
305         }
306 
307         if (check) {
308           test = !fnmatch(arg, name, FNM_PATHNAME*(s[i] == 'p'));
309           free(path);
310           if (i) free(name);
311         }
312       } else if (!strcmp(s, "perm")) {
313         if (check) {
314           char *m = ss[1];
315           mode_t m1 = string_to_mode(m+(*m == '-'), 0),
316                  m2 = new->st.st_dev & 07777;
317 
318           if (*m != '-') m2 &= m1;
319           test = m1 == m2;
320         }
321       } else if (!strcmp(s, "type")) {
322         if (check) {
323           int types[] = {S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFIFO,
324                          S_IFREG, S_IFSOCK}, i = stridx("bcdlpfs", *ss[1]);
325 
326           if (i<0) error_exit("bad -type '%c'", *ss[1]);
327           if ((new->st.st_mode & S_IFMT) != types[i]) test = 0;
328         }
329 
330       } else if (!strcmp(s, "atime")) {
331         if (check)
332           test = compare_numsign(TT.now - new->st.st_atime, 86400, ss[1]);
333       } else if (!strcmp(s, "ctime")) {
334         if (check)
335           test = compare_numsign(TT.now - new->st.st_ctime, 86400, ss[1]);
336       } else if (!strcmp(s, "mtime")) {
337         if (check)
338           test = compare_numsign(TT.now - new->st.st_mtime, 86400, ss[1]);
339       } else if (!strcmp(s, "size")) {
340         if (check)
341           test = compare_numsign(new->st.st_size, 512, ss[1]);
342       } else if (!strcmp(s, "links")) {
343         if (check) test = compare_numsign(new->st.st_nlink, 0, ss[1]);
344       } else if (!strcmp(s, "mindepth") || !strcmp(s, "maxdepth")) {
345         if (check) {
346           struct dirtree *dt = new;
347           int i = 0, d = atolx(ss[1]);
348 
349           while ((dt = dt->parent)) i++;
350           if (s[1] == 'i') {
351             test = i >= d;
352             if (i == d && not) recurse = 0;
353           } else {
354             test = i <= d;
355             if (i == d && !not) recurse = 0;
356           }
357         }
358       } else if (!strcmp(s, "user") || !strcmp(s, "group")
359               || !strcmp(s, "newer"))
360       {
361         struct {
362           void *next, *prev;
363           union {
364             uid_t uid;
365             gid_t gid;
366             struct timespec tm;
367           } u;
368         } *udl;
369 
370         if (!new && ss[1]) {
371           udl = xmalloc(sizeof(*udl));
372           dlist_add_nomalloc(&TT.argdata, (void *)udl);
373 
374           if (*s == 'u') udl->u.uid = xgetpwnamid(ss[1])->pw_uid;
375           else if (*s == 'g') udl->u.gid = xgetgrnamid(ss[1])->gr_gid;
376           else {
377             struct stat st;
378 
379             xstat(ss[1], &st);
380             udl->u.tm = st.st_mtim;
381           }
382         } else if (check) {
383           udl = (void *)llist_pop(&argdata);
384           if (*s == 'u') test = new->st.st_uid == udl->u.uid;
385           else if (*s == 'g') test = new->st.st_gid == udl->u.gid;
386           else {
387             test = new->st.st_mtim.tv_sec > udl->u.tm.tv_sec;
388             if (new->st.st_mtim.tv_sec == udl->u.tm.tv_sec)
389               test = new->st.st_mtim.tv_nsec > udl->u.tm.tv_nsec;
390           }
391         }
392       } else if (!strcmp(s, "exec") || !strcmp("ok", s)
393               || !strcmp(s, "execdir") || !strcmp(s, "okdir"))
394       {
395         struct exec_range *aa;
396 
397         print++;
398 
399         // Initial argument parsing pass
400         if (!new) {
401           int len;
402 
403           // catch "-exec" with no args and "-exec \;"
404           if (!ss[1] || !strcmp(ss[1], ";")) error_exit("'%s' needs 1 arg", s);
405 
406           dlist_add_nomalloc(&TT.argdata, (void *)(aa = xzalloc(sizeof(*aa))));
407           aa->argstart = ++ss;
408           aa->curly = -1;
409 
410           // Record command line arguments to -exec
411           for (len = 0; ss[len]; len++) {
412             if (!strcmp(ss[len], ";")) break;
413             else if (!strcmp(ss[len], "{}")) {
414               aa->curly = len;
415               if (!strcmp(ss[len+1], "+")) {
416 
417                 // Measure environment space
418                 if (!TT.envsize) {
419                   char **env;
420 
421                   for (env = environ; *env; env++)
422                     TT.envsize += sizeof(char *) + strlen(*env) + 1;
423                   TT.envsize += sizeof(char *);
424                 }
425                 aa->plus++;
426                 len++;
427                 break;
428               }
429             } else aa->argsize += sizeof(char *) + strlen(ss[len]) + 1;
430           }
431           if (!ss[len]) error_exit("-exec without \\;");
432           ss += len;
433           aa->arglen = len;
434           aa->dir = !!strchr(s, 'd');
435           if (aa->dir && TT.topdir == -1) TT.topdir = xopen(".", 0);
436 
437         // collect names and execute commands
438         } else {
439           char *name, *ss1 = ss[1];
440           struct double_list **ddl;
441 
442           // Grab command line exec argument list
443           aa = (void *)llist_pop(&argdata);
444           ss += aa->arglen + 1;
445 
446           if (!check) goto cont;
447           // name is always a new malloc, so we can always free it.
448           name = aa->dir ? xstrdup(new->name) : dirtree_path(new, 0);
449 
450           // Mark entry so COMEAGAIN can call flush_exec() in parent.
451           // This is never a valid pointer value for prev to have otherwise
452           if (aa->dir) aa->prev = (void *)1;
453 
454           if (*s == 'o') {
455             char *prompt = xmprintf("[%s] %s", ss1, name);
456             test = yesno(prompt, 0);
457             free(prompt);
458             if (!test) {
459               free(name);
460               goto cont;
461             }
462           }
463 
464           // Add next name to list (global list without -dir, local with)
465           if (aa->dir && new->parent)
466             ddl = (struct double_list **)&new->parent->extra;
467           else ddl = &aa->names;
468 
469           // Is this + mode?
470           if (aa->plus) {
471             int size = sizeof(char *)+strlen(name)+1;
472 
473             // Linux caps environment space (env vars + args) at 32 4k pages.
474             // todo: is there a way to probe this instead of constant here?
475 
476             if (TT.envsize+aa->argsize+aa->namesize+size >= 131072)
477               toys.exitval |= flush_exec(new, aa);
478             aa->namesize += size;
479           }
480           dlist_add(ddl, name);
481           aa->namecount++;
482           if (!aa->plus) test = flush_exec(new, aa);
483         }
484 
485         // Argument consumed, skip the check.
486         goto cont;
487       } else goto error;
488 
489       // This test can go at the end because we do a syntax checking
490       // pass first. Putting it here gets the error message (-unknown
491       // vs -known noarg) right.
492       if (!*++ss) error_exit("'%s' needs 1 arg", --s);
493     }
494 cont:
495     // Apply pending "!" to result
496     if (active && not) test = !test;
497     not = 0;
498   }
499 
500   if (new) {
501     // If there was no action, print
502     if (!print && test) do_print(new, '\n');
503   } else dlist_terminate(TT.argdata);
504 
505   return recurse;
506 
507 error:
508   error_exit("bad arg '%s'", *ss);
509 }
510 
find_main(void)511 void find_main(void)
512 {
513   int i, len;
514   char **ss = toys.optargs;
515 
516   TT.topdir = -1;
517 
518   // Distinguish paths from filters
519   for (len = 0; toys.optargs[len]; len++)
520     if (strchr("-!(", *toys.optargs[len])) break;
521   TT.filter = toys.optargs+len;
522 
523   // use "." if no paths
524   if (!len) {
525     ss = (char *[]){"."};
526     len = 1;
527   }
528 
529   // first pass argument parsing, verify args match up, handle "evaluate once"
530   TT.now = time(0);
531   do_find(0);
532 
533   // Loop through paths
534   for (i = 0; i < len; i++)
535     dirtree_handle_callback(dirtree_start(ss[i], toys.optflags&(FLAG_H|FLAG_L)),
536       do_find);
537 
538   if (CFG_TOYBOX_FREE) {
539     close(TT.topdir);
540     llist_traverse(TT.argdata, free);
541   }
542 }
543