1 /* args.c - Command line argument parsing.
2  *
3  * Copyright 2006 Rob Landley <rob@landley.net>
4  */
5 
6 #include "toys.h"
7 
8 // Design goals:
9 //   Don't use getopt() out of libc.
10 //   Don't permute original arguments (screwing up ps/top output).
11 //   Integrated --long options "(noshort)a(along)b(blong1)(blong2)"
12 
13 /* This uses a getopt-like option string, but not getopt() itself. We call
14  * it the get_opt string.
15  *
16  * Each option in the get_opt string corresponds to a bit position in the
17  * return value. The rightmost argument is (1<<0), the next to last is (1<<1)
18  * and so on. If the option isn't seen in argv[], its bit remains 0.
19  *
20  * Options which have an argument fill in the corresponding slot in the global
21  * union "this" (see generated/globals.h), which it treats as an array of longs
22  * (note that sizeof(long)==sizeof(pointer) is guaranteed by LP64).
23  *
24  * You don't have to free the option strings, which point into the environment
25  * space. List objects should be freed by main() when command_main() returns.
26  *
27  * Example:
28  *   Calling get_optflags() when toys.which->options="ab:c:d" and
29  *   argv = ["command", "-b", "fruit", "-d", "walrus"] results in:
30  *
31  *     Changes to struct toys:
32  *       toys.optflags = 5 (I.E. 0101 so -b = 4 | -d = 1)
33  *       toys.optargs[0] = "walrus" (leftover argument)
34  *       toys.optargs[1] = NULL (end of list)
35  *       toys.optc = 1 (there was 1 leftover argument)
36  *
37  *     Changes to union this:
38  *       this[0]=NULL (because -c didn't get an argument this time)
39  *       this[1]="fruit" (argument to -b)
40  */
41 
42 // Enabling TOYBOX_DEBUG in .config adds syntax checks to option string parsing
43 // which aren't needed in the final code (your option string is hardwired and
44 // should be correct when you ship), but are useful for development.
45 
46 // What you can put in a get_opt string:
47 //   Any otherwise unused character (all letters, unprefixed numbers) specify
48 //   an option that sets a flag. The bit value is the same as the binary digit
49 //   if you string the option characters together in order.
50 //   So in "abcdefgh" a = 128, h = 1
51 //
52 //   Suffixes specify that this option takes an argument (stored in GLOBALS):
53 //       Note that pointer and long are always the same size, even on 64 bit.
54 //     : plus a string argument, keep most recent if more than one
55 //     * plus a string argument, appended to a list
56 //     # plus a signed long argument
57 //       <LOW     - die if less than LOW
58 //       >HIGH    - die if greater than HIGH
59 //       =DEFAULT - value if not specified
60 //     - plus a signed long argument defaulting to negative (say + for positive)
61 //     . plus a double precision floating point argument (with CFG_TOYBOX_FLOAT)
62 //       Chop this option out with USE_TOYBOX_FLOAT() in option string
63 //       Same <LOW>HIGH=DEFAULT as #
64 //     @ plus an occurrence counter (which is a long)
65 //     (longopt)
66 //     | this is required. If more than one marked, only one required.
67 //     ; long option's argument is optional (can only be supplied with --opt=)
68 //     ^ Stop parsing after encountering this argument
69 //    " " (space char) the "plus an argument" must be separate
70 //        I.E. "-j 3" not "-j3". So "kill -stop" != "kill -s top"
71 //
72 //   At the beginning of the get_opt string (before any options):
73 //     ^ stop at first nonoption argument
74 //     <0 die if less than # leftover arguments (default 0)
75 //     >9 die if > # leftover arguments (default MAX_INT)
76 //     ? Allow unknown arguments (pass them through to command).
77 //     & first argument has imaginary dash (ala tar/ps)
78 //       If given twice, all arguments have imaginary dash
79 //
80 //   At the end: [groups] of previously seen options
81 //     - Only one in group (switch off)    [-abc] means -ab=-b, -ba=-a, -abc=-c
82 //     + Synonyms (switch on all)          [+abc] means -ab=-abc, -c=-abc
83 //     ! More than one in group is error   [!abc] means -ab calls error_exit()
84 //       primarily useful if you can switch things back off again.
85 
86 // Notes from getopt man page
87 //   - and -- cannot be arguments.
88 //     -- force end of arguments
89 //     - is a synonym for stdin in file arguments
90 //   -abcd means -a -b -c -d (but if -b takes an argument, then it's -a -b cd)
91 
92 // Linked list of all known options (option string parsed into this).
93 // Hangs off getoptflagstate, freed at end of option parsing.
94 struct opts {
95   struct opts *next;
96   long *arg;         // Pointer into union "this" to store arguments at.
97   int c;             // Argument character to match
98   int flags;         // |=1, ^=2
99   unsigned dex[3];   // which bits to disable/enable/exclude in toys.optflags
100   char type;         // Type of arguments to store union "this"
101   union {
102     long l;
103     FLOAT f;
104   } val[3];          // low, high, default - range of allowed values
105 };
106 
107 // linked list of long options. (Hangs off getoptflagstate, free at end of
108 // option parsing, details about flag to set and global slot to fill out
109 // stored in related short option struct, but if opt->c = -1 the long option
110 // is "bare" (has no corresponding short option).
111 struct longopts {
112   struct longopts *next;
113   struct opts *opt;
114   char *str;
115   int len;
116 };
117 
118 // State during argument parsing.
119 struct getoptflagstate
120 {
121   int argc, minargs, maxargs, nodash;
122   char *arg;
123   struct opts *opts;
124   struct longopts *longopts;
125   int noerror, nodash_now, stopearly;
126   unsigned excludes, requires;
127 };
128 
129 // Use getoptflagstate to parse parse one command line option from argv
gotflag(struct getoptflagstate * gof,struct opts * opt)130 static int gotflag(struct getoptflagstate *gof, struct opts *opt)
131 {
132   int type;
133 
134   // Did we recognize this option?
135   if (!opt) {
136     if (gof->noerror) return 1;
137     error_exit("Unknown option %s", gof->arg);
138   }
139 
140   // Might enabling this switch off something else?
141   if (toys.optflags & opt->dex[0]) {
142     struct opts *clr;
143     unsigned i = 1;
144 
145     // Forget saved argument for flag we switch back off
146     for (clr=gof->opts, i=1; clr; clr = clr->next, i<<=1)
147       if (clr->arg && (i & toys.optflags & opt->dex[0])) *clr->arg = 0;
148     toys.optflags &= ~opt->dex[0];
149   }
150 
151   // Set flags
152   toys.optflags |= opt->dex[1];
153   gof->excludes |= opt->dex[2];
154   if (opt->flags&2) gof->stopearly=2;
155 
156   if (toys.optflags & gof->excludes) {
157     struct opts *bad;
158     unsigned i = 1;
159 
160     for (bad=gof->opts, i=1; ;bad = bad->next, i<<=1) {
161       if (opt == bad || !(i & toys.optflags)) continue;
162       if (toys.optflags & bad->dex[2]) break;
163     }
164     error_exit("No '%c' with '%c'", opt->c, bad->c);
165   }
166 
167   // Does this option take an argument?
168   if (!gof->arg) {
169     if (opt->flags & 8) return 0;
170     gof->arg = "";
171   } else gof->arg++;
172   type = opt->type;
173 
174   if (type == '@') ++*(opt->arg);
175   else if (type) {
176     char *arg = gof->arg;
177 
178     // Handle "-xblah" and "-x blah", but also a third case: "abxc blah"
179     // to make "tar xCjfv blah1 blah2 thingy" work like
180     // "tar -x -C blah1 -j -f blah2 -v thingy"
181 
182     if (gof->nodash_now || (!arg[0] && !(opt->flags & 8)))
183       arg = toys.argv[++gof->argc];
184     if (!arg) {
185       char *s = "Missing argument to ";
186       struct longopts *lo;
187 
188       if (opt->c != -1) error_exit("%s-%c", s, opt->c);
189 
190       for (lo = gof->longopts; lo->opt != opt; lo = lo->next);
191       error_exit("%s--%.*s", s, lo->len, lo->str);
192     }
193 
194     if (type == ':') *(opt->arg) = (long)arg;
195     else if (type == '*') {
196       struct arg_list **list;
197 
198       list = (struct arg_list **)opt->arg;
199       while (*list) list=&((*list)->next);
200       *list = xzalloc(sizeof(struct arg_list));
201       (*list)->arg = arg;
202     } else if (type == '#' || type == '-') {
203       long l = atolx(arg);
204       if (type == '-' && !ispunct(*arg)) l*=-1;
205       if (l < opt->val[0].l) error_exit("-%c < %ld", opt->c, opt->val[0].l);
206       if (l > opt->val[1].l) error_exit("-%c > %ld", opt->c, opt->val[1].l);
207 
208       *(opt->arg) = l;
209     } else if (CFG_TOYBOX_FLOAT && type == '.') {
210       FLOAT *f = (FLOAT *)(opt->arg);
211 
212       *f = strtod(arg, &arg);
213       if (opt->val[0].l != LONG_MIN && *f < opt->val[0].f)
214         error_exit("-%c < %lf", opt->c, (double)opt->val[0].f);
215       if (opt->val[1].l != LONG_MAX && *f > opt->val[1].f)
216         error_exit("-%c > %lf", opt->c, (double)opt->val[1].f);
217     }
218 
219     if (!gof->nodash_now) gof->arg = "";
220   }
221 
222   return 0;
223 }
224 
225 // Parse this command's options string into struct getoptflagstate, which
226 // includes a struct opts linked list in reverse order (I.E. right-to-left)
parse_optflaglist(struct getoptflagstate * gof)227 void parse_optflaglist(struct getoptflagstate *gof)
228 {
229   char *options = toys.which->options;
230   long *nextarg = (long *)&this;
231   struct opts *new = 0;
232   int idx;
233 
234   // Parse option format string
235   memset(gof, 0, sizeof(struct getoptflagstate));
236   gof->maxargs = INT_MAX;
237   if (!options) return;
238 
239   // Parse leading special behavior indicators
240   for (;;) {
241     if (*options == '^') gof->stopearly++;
242     else if (*options == '<') gof->minargs=*(++options)-'0';
243     else if (*options == '>') gof->maxargs=*(++options)-'0';
244     else if (*options == '?') gof->noerror++;
245     else if (*options == '&') gof->nodash++;
246     else break;
247     options++;
248   }
249 
250   // Parse option string into a linked list of options with attributes.
251 
252   if (!*options) gof->stopearly++;
253   while (*options) {
254     char *temp;
255 
256     // Option groups come after all options are defined
257     if (*options == '[') break;
258 
259     // Allocate a new list entry when necessary
260     if (!new) {
261       new = xzalloc(sizeof(struct opts));
262       new->next = gof->opts;
263       gof->opts = new;
264       new->val[0].l = LONG_MIN;
265       new->val[1].l = LONG_MAX;
266     }
267     // Each option must start with "(" or an option character.  (Bare
268     // longopts only come at the start of the string.)
269     if (*options == '(' && new->c != -1) {
270       char *end;
271       struct longopts *lo;
272 
273       // Find the end of the longopt
274       for (end = ++options; *end && *end != ')'; end++);
275       if (CFG_TOYBOX_DEBUG && !*end) error_exit("(longopt) didn't end");
276 
277       // init a new struct longopts
278       lo = xmalloc(sizeof(struct longopts));
279       lo->next = gof->longopts;
280       lo->opt = new;
281       lo->str = options;
282       lo->len = end-options;
283       gof->longopts = lo;
284       options = ++end;
285 
286       // Mark this struct opt as used, even when no short opt.
287       if (!new->c) new->c = -1;
288 
289       continue;
290 
291     // If this is the start of a new option that wasn't a longopt,
292 
293     } else if (strchr(":*#@.-", *options)) {
294       if (CFG_TOYBOX_DEBUG && new->type)
295         error_exit("multiple types %c:%c%c", new->c, new->type, *options);
296       new->type = *options;
297     } else if (-1 != (idx = stridx("|^ ;", *options))) new->flags |= 1<<idx;
298     // bounds checking
299     else if (-1 != (idx = stridx("<>=", *options))) {
300       if (new->type == '#') {
301         long l = strtol(++options, &temp, 10);
302         if (temp != options) new->val[idx].l = l;
303       } else if (CFG_TOYBOX_FLOAT && new->type == '.') {
304         FLOAT f = strtod(++options, &temp);
305         if (temp != options) new->val[idx].f = f;
306       } else if (CFG_TOYBOX_DEBUG) error_exit("<>= only after .#");
307       options = --temp;
308 
309     // At this point, we've hit the end of the previous option.  The
310     // current character is the start of a new option.  If we've already
311     // assigned an option to this struct, loop to allocate a new one.
312     // (It'll get back here afterwards and fall through to next else.)
313     } else if (new->c) {
314       new = 0;
315       continue;
316 
317     // Claim this option, loop to see what's after it.
318     } else new->c = *options;
319 
320     options++;
321   }
322 
323   // Initialize enable/disable/exclude masks and pointers to store arguments.
324   // (This goes right to left so we need the whole list before we can start.)
325   idx = 0;
326   for (new = gof->opts; new; new = new->next) {
327     unsigned u = 1<<idx++;
328 
329     if (new->c == 1) new->c = 0;
330     new->dex[1] = u;
331     if (new->flags & 1) gof->requires |= u;
332     if (new->type) {
333       new->arg = (void *)nextarg;
334       *(nextarg++) = new->val[2].l;
335     }
336   }
337 
338   // Parse trailing group indicators
339   while (*options) {
340     unsigned bits = 0;
341 
342     if (CFG_TOYBOX_DEBUG && *options != '[') error_exit("trailing %s", options);
343 
344     idx = stridx("-+!", *++options);
345     if (CFG_TOYBOX_DEBUG && idx == -1) error_exit("[ needs +-!");
346     if (CFG_TOYBOX_DEBUG && (options[1] == ']' || !options[1]))
347       error_exit("empty []");
348 
349     // Don't advance past ] but do process it once in loop.
350     while (*options++ != ']') {
351       struct opts *opt;
352       int i;
353 
354       if (CFG_TOYBOX_DEBUG && !*options) error_exit("[ without ]");
355       // Find this option flag (in previously parsed struct opt)
356       for (i=0, opt = gof->opts; ; i++, opt = opt->next) {
357         if (*options == ']') {
358           if (!opt) break;
359           if (bits&(1<<i)) opt->dex[idx] |= bits&~(1<<i);
360         } else {
361           if (CFG_TOYBOX_DEBUG && !opt)
362             error_exit("[] unknown target %c", *options);
363           if (opt->c == *options) {
364             bits |= 1<<i;
365             break;
366           }
367         }
368       }
369     }
370   }
371 }
372 
373 // Fill out toys.optflags, toys.optargs, and this[] from toys.argv
374 
get_optflags(void)375 void get_optflags(void)
376 {
377   struct getoptflagstate gof;
378   struct opts *catch;
379   long saveflags;
380   char *letters[]={"s",""};
381 
382   // Option parsing is a two stage process: parse the option string into
383   // a struct opts list, then use that list to process argv[];
384 
385   toys.exithelp++;
386   // Allocate memory for optargs
387   saveflags = 0;
388   while (toys.argv[saveflags++]);
389   toys.optargs = xzalloc(sizeof(char *)*saveflags);
390 
391   parse_optflaglist(&gof);
392 
393   // Iterate through command line arguments, skipping argv[0]
394   for (gof.argc=1; toys.argv[gof.argc]; gof.argc++) {
395     gof.arg = toys.argv[gof.argc];
396     catch = NULL;
397 
398     // Parse this argument
399     if (gof.stopearly>1) goto notflag;
400 
401     gof.nodash_now = 0;
402 
403     // Various things with dashes
404     if (*gof.arg == '-') {
405 
406       // Handle -
407       if (!gof.arg[1]) goto notflag;
408       gof.arg++;
409       if (*gof.arg=='-') {
410         struct longopts *lo;
411 
412         gof.arg++;
413         // Handle --
414         if (!*gof.arg) {
415           gof.stopearly += 2;
416           continue;
417         }
418 
419         // do we match a known --longopt?
420         for (lo = gof.longopts; lo; lo = lo->next) {
421           if (!strncmp(gof.arg, lo->str, lo->len)) {
422             if (!gof.arg[lo->len]) gof.arg = 0;
423             else if (gof.arg[lo->len] == '=' && lo->opt->type)
424               gof.arg += lo->len;
425             else continue;
426             // It's a match.
427             catch = lo->opt;
428             break;
429           }
430         }
431 
432         // Should we handle this --longopt as a non-option argument?
433         if (!lo && gof.noerror) {
434           gof.arg -= 2;
435           goto notflag;
436         }
437 
438         // Long option parsed, handle option.
439         gotflag(&gof, catch);
440         continue;
441       }
442 
443     // Handle things that don't start with a dash.
444     } else {
445       if (gof.nodash && (gof.nodash>1 || gof.argc == 1)) gof.nodash_now = 1;
446       else goto notflag;
447     }
448 
449     // At this point, we have the args part of -args.  Loop through
450     // each entry (could be -abc meaning -a -b -c)
451     saveflags = toys.optflags;
452     while (*gof.arg) {
453 
454       // Identify next option char.
455       for (catch = gof.opts; catch; catch = catch->next)
456         if (*gof.arg == catch->c)
457           if (!((catch->flags&4) && gof.arg[1])) break;
458 
459       // Handle option char (advancing past what was used)
460       if (gotflag(&gof, catch) ) {
461         toys.optflags = saveflags;
462         gof.arg = toys.argv[gof.argc];
463         goto notflag;
464       }
465     }
466     continue;
467 
468     // Not a flag, save value in toys.optargs[]
469 notflag:
470     if (gof.stopearly) gof.stopearly++;
471     toys.optargs[toys.optc++] = toys.argv[gof.argc];
472   }
473 
474   // Sanity check
475   if (toys.optc<gof.minargs)
476     error_exit("Need%s %d argument%s", letters[!!(gof.minargs-1)],
477       gof.minargs, letters[!(gof.minargs-1)]);
478   if (toys.optc>gof.maxargs)
479     error_exit("Max %d argument%s", gof.maxargs, letters[!(gof.maxargs-1)]);
480   if (gof.requires && !(gof.requires & toys.optflags)) {
481     struct opts *req;
482     char needs[32], *s = needs;
483 
484     for (req = gof.opts; req; req = req->next)
485       if (req->flags & 1) *(s++) = req->c;
486     *s = 0;
487 
488     error_exit("Needs %s-%s", s[1] ? "one of " : "", needs);
489   }
490   toys.exithelp = 0;
491 
492   if (CFG_TOYBOX_FREE) {
493     llist_traverse(gof.opts, free);
494     llist_traverse(gof.longopts, free);
495   }
496 }
497