1 /* args.c - Command line argument parsing.
2  *
3  * Copyright 2006 Rob Landley <rob@landley.net>
4  */
5 
6 // NOTE: If option parsing segfaults, switch on TOYBOX_DEBUG in menuconfig.
7 
8 // Enabling TOYBOX_DEBUG in .config adds syntax checks to option string parsing
9 // which aren't needed in the final code (your option string is hardwired and
10 // should be correct when you ship), but are useful for development.
11 
12 #include "toys.h"
13 
14 // Design goals:
15 //   Don't use getopt() out of libc.
16 //   Don't permute original arguments (screwing up ps/top output).
17 //   Integrated --long options "(noshort)a(along)b(blong1)(blong2)"
18 
19 /* This uses a getopt-like option string, but not getopt() itself. We call
20  * it the get_opt string.
21  *
22  * Each option in the get_opt string corresponds to a bit position in the
23  * return value. The rightmost argument is (1<<0), the next to last is (1<<1)
24  * and so on. If the option isn't seen in argv[], its bit remains 0.
25  *
26  * Options which have an argument fill in the corresponding slot in the global
27  * union "this" (see generated/globals.h), which it treats as an array of longs
28  * (note that sizeof(long)==sizeof(pointer) is guaranteed by LP64).
29  *
30  * You don't have to free the option strings, which point into the environment
31  * space. List objects should be freed by main() when command_main() returns.
32  *
33  * Example:
34  *   Calling get_optflags() when toys.which->options="ab:c:d" and
35  *   argv = ["command", "-b", "fruit", "-d", "walrus"] results in:
36  *
37  *     Changes to struct toys:
38  *       toys.optflags = 5 (I.E. 0101 so -b = 4 | -d = 1)
39  *       toys.optargs[0] = "walrus" (leftover argument)
40  *       toys.optargs[1] = NULL (end of list)
41  *       toys.optc = 1 (there was 1 leftover argument)
42  *
43  *     Changes to union this:
44  *       this[0]=NULL (because -c didn't get an argument this time)
45  *       this[1]="fruit" (argument to -b)
46  */
47 
48 // What you can put in a get_opt string:
49 //   Any otherwise unused character (all letters, unprefixed numbers) specify
50 //   an option that sets a flag. The bit value is the same as the binary digit
51 //   if you string the option characters together in order.
52 //   So in "abcdefgh" a = 128, h = 1
53 //
54 //   Suffixes specify that this option takes an argument (stored in GLOBALS):
55 //       Note that pointer and long are always the same size, even on 64 bit.
56 //     : string argument, keep most recent if more than one
57 //     * string argument, appended to a struct arg_list linked list.
58 //     # signed long argument
59 //       <LOW     - die if less than LOW
60 //       >HIGH    - die if greater than HIGH
61 //       =DEFAULT - value if not specified
62 //     - signed long argument defaulting to negative (say + for positive)
63 //     . double precision floating point argument (with CFG_TOYBOX_FLOAT)
64 //       Chop this option out with USE_TOYBOX_FLOAT() in option string
65 //       Same <LOW>HIGH=DEFAULT as #
66 //     @ occurrence counter (which is a long)
67 //     % time offset in milliseconds with optional s/m/h/d suffix
68 //     (longopt)
69 //     | this is required. If more than one marked, only one required.
70 //     ; long option's argument is optional (can only be supplied with --opt=)
71 //     ^ Stop parsing after encountering this argument
72 //    " " (space char) the "plus an argument" must be separate
73 //        I.E. "-j 3" not "-j3". So "kill -stop" != "kill -s top"
74 //
75 //   At the beginning of the get_opt string (before any options):
76 //     ^ stop at first nonoption argument
77 //     <0 die if less than # leftover arguments (default 0)
78 //     >9 die if > # leftover arguments (default MAX_INT)
79 //     ? Allow unknown arguments (pass them through to command).
80 //     & first arg has imaginary dash (ala tar/ps/ar) which sets FLAGS_NODASH
81 //
82 //   At the end: [groups] of previously seen options
83 //     - Only one in group (switch off)    [-abc] means -ab=-b, -ba=-a, -abc=-c
84 //     + Synonyms (switch on all)          [+abc] means -ab=-abc, -c=-abc
85 //     ! More than one in group is error   [!abc] means -ab calls error_exit()
86 //       primarily useful if you can switch things back off again.
87 //
88 //   You may use octal escapes with the high bit (127) set to use a control
89 //   character as an option flag. For example, \300 would be the option -@
90 
91 // Notes from getopt man page
92 //   - and -- cannot be arguments.
93 //     -- force end of arguments
94 //     - is a synonym for stdin in file arguments
95 //   -abcd means -a -b -c -d (but if -b takes an argument, then it's -a -b cd)
96 
97 // Linked list of all known options (option string parsed into this).
98 // Hangs off getoptflagstate, freed at end of option parsing.
99 struct opts {
100   struct opts *next;
101   long *arg;         // Pointer into union "this" to store arguments at.
102   int c;             // Argument character to match
103   int flags;         // |=1, ^=2, " "=4, ;=8
104   unsigned long long dex[3]; // bits to disable/enable/exclude in toys.optflags
105   char type;         // Type of arguments to store union "this"
106   union {
107     long l;
108     FLOAT f;
109   } val[3];          // low, high, default - range of allowed values
110 };
111 
112 // linked list of long options. (Hangs off getoptflagstate, free at end of
113 // option parsing, details about flag to set and global slot to fill out
114 // stored in related short option struct, but if opt->c = -1 the long option
115 // is "bare" (has no corresponding short option).
116 struct longopts {
117   struct longopts *next;
118   struct opts *opt;
119   char *str;
120   int len;
121 };
122 
123 // State during argument parsing.
124 struct getoptflagstate
125 {
126   int argc, minargs, maxargs;
127   char *arg;
128   struct opts *opts;
129   struct longopts *longopts;
130   int noerror, nodash_now, stopearly;
131   unsigned excludes, requires;
132 };
133 
134 // Use getoptflagstate to parse one command line option from argv
gotflag(struct getoptflagstate * gof,struct opts * opt)135 static int gotflag(struct getoptflagstate *gof, struct opts *opt)
136 {
137   int type;
138 
139   // Did we recognize this option?
140   if (!opt) {
141     if (gof->noerror) return 1;
142     help_exit("Unknown option %s", gof->arg);
143   }
144 
145   // Might enabling this switch off something else?
146   if (toys.optflags & opt->dex[0]) {
147     struct opts *clr;
148     unsigned long long i = 1;
149 
150     // Forget saved argument for flag we switch back off
151     for (clr=gof->opts, i=1; clr; clr = clr->next, i<<=1)
152       if (clr->arg && (i & toys.optflags & opt->dex[0])) *clr->arg = 0;
153     toys.optflags &= ~opt->dex[0];
154   }
155 
156   // Set flags
157   toys.optflags |= opt->dex[1];
158   gof->excludes |= opt->dex[2];
159   if (opt->flags&2) gof->stopearly=2;
160 
161   if (toys.optflags & gof->excludes) {
162     struct opts *bad;
163     unsigned i = 1;
164 
165     for (bad=gof->opts, i=1; bad ;bad = bad->next, i<<=1) {
166       if (opt == bad || !(i & toys.optflags)) continue;
167       if (toys.optflags & bad->dex[2]) break;
168     }
169     if (bad) help_exit("No '%c' with '%c'", opt->c, bad->c);
170   }
171 
172   // Does this option take an argument?
173   if (!gof->arg) {
174     if (opt->flags & 8) return 0;
175     gof->arg = "";
176   } else gof->arg++;
177   type = opt->type;
178 
179   if (type == '@') ++*(opt->arg);
180   else if (type) {
181     char *arg = gof->arg;
182 
183     // Handle "-xblah" and "-x blah", but also a third case: "abxc blah"
184     // to make "tar xCjfv blah1 blah2 thingy" work like
185     // "tar -x -C blah1 -j -f blah2 -v thingy"
186 
187     if (gof->nodash_now || (!arg[0] && !(opt->flags & 8)))
188       arg = toys.argv[++gof->argc];
189     if (!arg) {
190       char *s = "Missing argument to ";
191       struct longopts *lo;
192 
193       if (opt->c != -1) help_exit("%s-%c", s, opt->c);
194 
195       for (lo = gof->longopts; lo->opt != opt; lo = lo->next);
196       help_exit("%s--%.*s", s, lo->len, lo->str);
197     }
198 
199     if (type == ':') *(opt->arg) = (long)arg;
200     else if (type == '*') {
201       struct arg_list **list;
202 
203       list = (struct arg_list **)opt->arg;
204       while (*list) list=&((*list)->next);
205       *list = xzalloc(sizeof(struct arg_list));
206       (*list)->arg = arg;
207     } else if (type == '#' || type == '-') {
208       long l = atolx(arg);
209       if (type == '-' && !ispunct(*arg)) l*=-1;
210       if (l < opt->val[0].l) help_exit("-%c < %ld", opt->c, opt->val[0].l);
211       if (l > opt->val[1].l) help_exit("-%c > %ld", opt->c, opt->val[1].l);
212 
213       *(opt->arg) = l;
214     } else if (CFG_TOYBOX_FLOAT && type == '.') {
215       FLOAT *f = (FLOAT *)(opt->arg);
216 
217       *f = strtod(arg, &arg);
218       if (opt->val[0].l != LONG_MIN && *f < opt->val[0].f)
219         help_exit("-%c < %lf", opt->c, (double)opt->val[0].f);
220       if (opt->val[1].l != LONG_MAX && *f > opt->val[1].f)
221         help_exit("-%c > %lf", opt->c, (double)opt->val[1].f);
222     } else if (type=='%') *(opt->arg) = xparsemillitime(arg);
223 
224     if (!gof->nodash_now) gof->arg = "";
225   }
226 
227   return 0;
228 }
229 
230 // Parse this command's options string into struct getoptflagstate, which
231 // includes a struct opts linked list in reverse order (I.E. right-to-left)
parse_optflaglist(struct getoptflagstate * gof)232 void parse_optflaglist(struct getoptflagstate *gof)
233 {
234   char *options = toys.which->options;
235   long *nextarg = (long *)&this;
236   struct opts *new = 0;
237   int idx;
238 
239   // Parse option format string
240   memset(gof, 0, sizeof(struct getoptflagstate));
241   gof->maxargs = INT_MAX;
242   if (!options) return;
243 
244   // Parse leading special behavior indicators
245   for (;;) {
246     if (*options == '^') gof->stopearly++;
247     else if (*options == '<') gof->minargs=*(++options)-'0';
248     else if (*options == '>') gof->maxargs=*(++options)-'0';
249     else if (*options == '?') gof->noerror++;
250     else if (*options == '&') gof->nodash_now = 1;
251     else break;
252     options++;
253   }
254 
255   // Parse option string into a linked list of options with attributes.
256 
257   if (!*options) gof->stopearly++;
258   while (*options) {
259     char *temp;
260 
261     // Option groups come after all options are defined
262     if (*options == '[') break;
263 
264     // Allocate a new list entry when necessary
265     if (!new) {
266       new = xzalloc(sizeof(struct opts));
267       new->next = gof->opts;
268       gof->opts = new;
269       new->val[0].l = LONG_MIN;
270       new->val[1].l = LONG_MAX;
271     }
272     // Each option must start with "(" or an option character.  (Bare
273     // longopts only come at the start of the string.)
274     if (*options == '(' && new->c != -1) {
275       char *end;
276       struct longopts *lo;
277 
278       // Find the end of the longopt
279       for (end = ++options; *end && *end != ')'; end++);
280       if (CFG_TOYBOX_DEBUG && !*end) error_exit("(longopt) didn't end");
281 
282       // init a new struct longopts
283       lo = xmalloc(sizeof(struct longopts));
284       lo->next = gof->longopts;
285       lo->opt = new;
286       lo->str = options;
287       lo->len = end-options;
288       gof->longopts = lo;
289       options = ++end;
290 
291       // Mark this struct opt as used, even when no short opt.
292       if (!new->c) new->c = -1;
293 
294       continue;
295 
296     // If this is the start of a new option that wasn't a longopt,
297 
298     } else if (strchr(":*#@.-%", *options)) {
299       if (CFG_TOYBOX_DEBUG && new->type)
300         error_exit("multiple types %c:%c%c", new->c, new->type, *options);
301       new->type = *options;
302     } else if (-1 != (idx = stridx("|^ ;", *options))) new->flags |= 1<<idx;
303     // bounds checking
304     else if (-1 != (idx = stridx("<>=", *options))) {
305       if (new->type == '#' || new->type == '%') {
306         long l = strtol(++options, &temp, 10);
307         if (temp != options) new->val[idx].l = l;
308       } else if (CFG_TOYBOX_FLOAT && new->type == '.') {
309         FLOAT f = strtod(++options, &temp);
310         if (temp != options) new->val[idx].f = f;
311       } else error_exit("<>= only after .#%%");
312       options = --temp;
313 
314     // At this point, we've hit the end of the previous option.  The
315     // current character is the start of a new option.  If we've already
316     // assigned an option to this struct, loop to allocate a new one.
317     // (It'll get back here afterwards and fall through to next else.)
318     } else if (new->c) {
319       new = 0;
320       continue;
321 
322     // Claim this option, loop to see what's after it.
323     } else new->c = 127&*options;
324 
325     options++;
326   }
327 
328   // Initialize enable/disable/exclude masks and pointers to store arguments.
329   // (This goes right to left so we need the whole list before we can start.)
330   idx = 0;
331   for (new = gof->opts; new; new = new->next) {
332     unsigned long long u = 1L<<idx++;
333 
334     if (new->c == 1) new->c = 0;
335     new->dex[1] = u;
336     if (new->flags & 1) gof->requires |= u;
337     if (new->type) {
338       new->arg = (void *)nextarg;
339       *(nextarg++) = new->val[2].l;
340     }
341   }
342 
343   // Parse trailing group indicators
344   while (*options) {
345     unsigned bits = 0;
346 
347     if (CFG_TOYBOX_DEBUG && *options != '[') error_exit("trailing %s", options);
348 
349     idx = stridx("-+!", *++options);
350     if (CFG_TOYBOX_DEBUG && idx == -1) error_exit("[ needs +-!");
351     if (CFG_TOYBOX_DEBUG && (options[1] == ']' || !options[1]))
352       error_exit("empty []");
353 
354     // Don't advance past ] but do process it once in loop.
355     while (*options++ != ']') {
356       struct opts *opt;
357       int i;
358 
359       if (CFG_TOYBOX_DEBUG && !*options) error_exit("[ without ]");
360       // Find this option flag (in previously parsed struct opt)
361       for (i=0, opt = gof->opts; ; i++, opt = opt->next) {
362         if (*options == ']') {
363           if (!opt) break;
364           if (bits&(1<<i)) opt->dex[idx] |= bits&~(1<<i);
365         } else {
366           if (*options==1) break;
367           if (CFG_TOYBOX_DEBUG && !opt)
368             error_exit("[] unknown target %c", *options);
369           if (opt->c == *options) {
370             bits |= 1<<i;
371             break;
372           }
373         }
374       }
375     }
376   }
377 }
378 
379 // Fill out toys.optflags, toys.optargs, and this[] from toys.argv
380 
get_optflags(void)381 void get_optflags(void)
382 {
383   struct getoptflagstate gof;
384   struct opts *catch;
385   unsigned long long saveflags;
386   char *letters[]={"s",""};
387 
388   // Option parsing is a two stage process: parse the option string into
389   // a struct opts list, then use that list to process argv[];
390 
391   toys.exitval = toys.which->flags >> 24;
392 
393   // Allocate memory for optargs
394   saveflags = 0;
395   while (toys.argv[saveflags++]);
396   toys.optargs = xzalloc(sizeof(char *)*saveflags);
397 
398   parse_optflaglist(&gof);
399 
400   if (toys.argv[1] && toys.argv[1][0] == '-') gof.nodash_now = 0;
401 
402   // Iterate through command line arguments, skipping argv[0]
403   for (gof.argc=1; toys.argv[gof.argc]; gof.argc++) {
404     gof.arg = toys.argv[gof.argc];
405     catch = NULL;
406 
407     // Parse this argument
408     if (gof.stopearly>1) goto notflag;
409 
410     if (gof.argc>1 || *gof.arg=='-') gof.nodash_now = 0;
411 
412     // Various things with dashes
413     if (*gof.arg == '-') {
414 
415       // Handle -
416       if (!gof.arg[1]) goto notflag;
417       gof.arg++;
418       if (*gof.arg=='-') {
419         struct longopts *lo;
420 
421         gof.arg++;
422         // Handle --
423         if (!*gof.arg) {
424           gof.stopearly += 2;
425           continue;
426         }
427 
428         // do we match a known --longopt?
429         for (lo = gof.longopts; lo; lo = lo->next) {
430           if (!strncmp(gof.arg, lo->str, lo->len)) {
431             if (!gof.arg[lo->len]) gof.arg = 0;
432             else if (gof.arg[lo->len] == '=' && lo->opt->type)
433               gof.arg += lo->len;
434             else continue;
435             // It's a match.
436             catch = lo->opt;
437             break;
438           }
439         }
440 
441         // Should we handle this --longopt as a non-option argument?
442         if (!lo && gof.noerror) {
443           gof.arg -= 2;
444           goto notflag;
445         }
446 
447         // Long option parsed, handle option.
448         gotflag(&gof, catch);
449         continue;
450       }
451 
452     // Handle things that don't start with a dash.
453     } else {
454       if (gof.nodash_now) toys.optflags |= FLAGS_NODASH;
455       else goto notflag;
456     }
457 
458     // At this point, we have the args part of -args.  Loop through
459     // each entry (could be -abc meaning -a -b -c)
460     saveflags = toys.optflags;
461     while (*gof.arg) {
462 
463       // Identify next option char.
464       for (catch = gof.opts; catch; catch = catch->next)
465         if (*gof.arg == catch->c)
466           if (!((catch->flags&4) && gof.arg[1])) break;
467 
468       // Handle option char (advancing past what was used)
469       if (gotflag(&gof, catch) ) {
470         toys.optflags = saveflags;
471         gof.arg = toys.argv[gof.argc];
472         goto notflag;
473       }
474     }
475     continue;
476 
477     // Not a flag, save value in toys.optargs[]
478 notflag:
479     if (gof.stopearly) gof.stopearly++;
480     toys.optargs[toys.optc++] = toys.argv[gof.argc];
481   }
482 
483   // Sanity check
484   if (toys.optc<gof.minargs)
485     help_exit("Need%s %d argument%s", letters[!!(gof.minargs-1)],
486       gof.minargs, letters[!(gof.minargs-1)]);
487   if (toys.optc>gof.maxargs)
488     help_exit("Max %d argument%s", gof.maxargs, letters[!(gof.maxargs-1)]);
489   if (gof.requires && !(gof.requires & toys.optflags)) {
490     struct opts *req;
491     char needs[32], *s = needs;
492 
493     for (req = gof.opts; req; req = req->next)
494       if (req->flags & 1) *(s++) = req->c;
495     *s = 0;
496 
497     help_exit("Needs %s-%s", s[1] ? "one of " : "", needs);
498   }
499 
500   toys.exitval = 0;
501 
502   if (CFG_TOYBOX_FREE) {
503     llist_traverse(gof.opts, free);
504     llist_traverse(gof.longopts, free);
505   }
506 }
507