1 /* expr.c - evaluate expression
2  *
3  * Copyright 2016 Google Inc.
4  * Copyright 2013 Daniel Verkamp <daniel@drv.nu>
5  *
6  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
7  *
8  * The web standard is incomplete (precedence grouping missing), see:
9  * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
10  *
11  * eval_expr() uses the recursive "Precedence Climbing" algorithm:
12  *
13  * Clarke, Keith. "The top-down parsing of expressions." University of London.
14  * Queen Mary College. Department of Computer Science and Statistics, 1986.
15  *
16  * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
17  *
18  * Nice explanation and Python implementation:
19  * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
20 
21 USE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN))
22 
23 config EXPR
24   bool "expr"
25   default n
26   help
27     usage: expr ARG1 OPERATOR ARG2...
28 
29     Evaluate expression and print result. For example, "expr 1 + 2".
30 
31     The supported operators are (grouped from highest to lowest priority):
32 
33       ( )    :    * / %    + -    != <= < >= > =    &    |
34 
35     Each constant and operator must be a separate command line argument.
36     All operators are infix, meaning they expect a constant (or expression
37     that resolves to a constant) on each side of the operator. Operators of
38     the same priority (within each group above) are evaluated left to right.
39     Parentheses may be used (as separate arguments) to elevate the priority
40     of expressions.
41 
42     Calling expr from a command shell requires a lot of \( or '*' escaping
43     to avoid interpreting shell control characters.
44 
45     The & and | operators are logical (not bitwise) and may operate on
46     strings (a blank string is "false"). Comparison operators may also
47     operate on strings (alphabetical sort).
48 
49     Constants may be strings or integers. Comparison, logical, and regex
50     operators may operate on strings (a blank string is "false"), other
51     operators require integers.
52 */
53 
54 // TODO: int overflow checking
55 
56 #define FOR_expr
57 #include "toys.h"
58 
59 GLOBALS(
60   char **tok; // current token, not on the stack since recursive calls mutate it
61 
62   char *refree;
63 )
64 
65 // Scalar value.  If s != NULL, it's a string, otherwise it's an int.
66 struct value {
67   char *s;
68   long long i;
69 };
70 
71 // Get the value as a string.
72 char *get_str(struct value *v)
73 {
74   if (v->s) return v->s;
75   else return xmprintf("%lld", v->i);
76 }
77 
78 // Get the value as an integer and return 1, or return 0 on error.
79 int get_int(struct value *v, long long *ret)
80 {
81   if (v->s) {
82     char *endp;
83 
84     *ret = strtoll(v->s, &endp, 10);
85 
86     if (*endp) return 0; // If endp points to NUL, all chars were converted
87   } else *ret = v->i;
88 
89   return 1;
90 }
91 
92 // Preserve the invariant that v.s is NULL when the value is an integer.
93 void assign_int(struct value *v, long long i)
94 {
95   v->i = i;
96   v->s = NULL;
97 }
98 
99 // Check if v is 0 or the empty string.
100 static int is_false(struct value *v)
101 {
102   return get_int(v, &v->i) && !v->i;
103 }
104 
105 // 'ret' is filled with a string capture or int match position.
106 static void re(char *target, char *pattern, struct value *ret)
107 {
108   regex_t pat;
109   regmatch_t m[2];
110 
111   xregcomp(&pat, pattern, 0);
112   // must match at pos 0
113   if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) {
114     // Return first parenthesized subexpression as string, or length of match
115     if (pat.re_nsub>0) {
116       ret->s = xmprintf("%.*s", (int)(m[1].rm_eo-m[1].rm_so),
117           target+m[1].rm_so);
118       if (TT.refree) free(TT.refree);
119       TT.refree = ret->s;
120     } else assign_int(ret, m[0].rm_eo);
121   } else {
122     if (pat.re_nsub>0) ret->s = "";
123     else assign_int(ret, 0);
124   }
125   regfree(&pat);
126 }
127 
128 // 4 different signatures of operators.  S = string, I = int, SI = string or
129 // int.
130 enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
131 
132 enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
133 
134 // operators grouped by precedence
135 static struct op_def {
136   char *tok;
137   char prec, sig, op; // precedence, signature for type coercion, operator ID
138 } OPS[] = {
139   // logical ops, precedence 1 and 2, signature SI_TO_SI
140   {"|", 1, SI_TO_SI, OR  },
141   {"&", 2, SI_TO_SI, AND },
142   // comparison ops, precedence 3, signature SI_TO_I
143   {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ  }, {"!=", 3, SI_TO_I, NE },
144   {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
145   {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
146   // arithmetic ops, precedence 4 and 5, signature I_TO_I
147   {"+", 4, I_TO_I, ADD }, {"-",  4, I_TO_I, SUB },
148   {"*", 5, I_TO_I, MUL }, {"/",  5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
149   // regex match, precedence 6, signature S_TO_SI
150   {":", 6, S_TO_SI, RE },
151   {NULL, 0, 0, 0}, // sentinel
152 };
153 
154 void eval_op(struct op_def *o, struct value *ret, struct value *rhs)
155 {
156   long long a, b, x = 0; // x = a OP b for ints.
157   char *s, *t; // string operands
158   int cmp;
159 
160   switch (o->sig) {
161 
162   case SI_TO_SI:
163     switch (o->op) {
164     case OR:  if (is_false(ret)) *ret = *rhs; break;
165     case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
166     }
167     break;
168 
169   case SI_TO_I:
170     if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
171       cmp = a - b;
172     } else { // otherwise compare both as strings
173       cmp = strcmp(s = get_str(ret), t = get_str(rhs));
174       if (ret->s != s) free(s);
175       if (rhs->s != t) free(t);
176     }
177     switch (o->op) {
178     case EQ:  x = cmp == 0; break;
179     case NE:  x = cmp != 0; break;
180     case GT:  x = cmp >  0; break;
181     case GTE: x = cmp >= 0; break;
182     case LT:  x = cmp <  0; break;
183     case LTE: x = cmp <= 0; break;
184     }
185     assign_int(ret, x);
186     break;
187 
188   case I_TO_I:
189     if (!get_int(ret, &a) || !get_int(rhs, &b))
190       error_exit("non-integer argument");
191     switch (o->op) {
192     case ADD: x = a + b; break;
193     case SUB: x = a - b; break;
194     case MUL: x = a * b; break;
195     case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
196     case MOD:  if (b == 0) error_exit("division by zero"); x = a % b; break;
197     }
198     assign_int(ret, x);
199     break;
200 
201   case S_TO_SI: // op == RE
202     s = get_str(ret);
203     cmp = ret->s!=s; // ret overwritten by re so check now
204     re(s, t = get_str(rhs), ret);
205     if (cmp) free(s);
206     if (rhs->s!=t) free(t);
207     break;
208   }
209 }
210 
211 // Evalute a compound expression using recursive "Precedence Climbing"
212 // algorithm, setting 'ret'.
213 static void eval_expr(struct value *ret, int min_prec)
214 {
215   if (!*TT.tok) error_exit("Unexpected end of input");
216 
217   // Evaluate LHS atom, setting 'ret'.
218   if (!strcmp(*TT.tok, "(")) { // parenthesized expression
219     TT.tok++;  // consume (
220 
221     eval_expr(ret, 1);        // We're inside ( ), so min_prec = 1
222     if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
223     if (!*TT.tok) error_exit("Expected )");
224     if (strcmp(*TT.tok, ")")) error_exit("Expected ) but got %s", *TT.tok);
225   } else ret->s = *TT.tok;  // simple literal, all values start as strings
226   TT.tok++;
227 
228   // Evaluate RHS and apply operator until precedence is too low.
229   struct value rhs;
230   while (*TT.tok) {
231     struct op_def *o = OPS;
232 
233     while (o->tok) { // Look up operator
234       if (!strcmp(*TT.tok, o->tok)) break;
235       o++;
236     }
237     if (!o->tok) break; // Not an operator (extra input will fail later)
238     if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
239     TT.tok++;
240 
241     eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
242     eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
243   }
244 }
245 
246 void expr_main(void)
247 {
248   struct value ret = {0};
249 
250   toys.exitval = 2; // if exiting early, indicate error
251   TT.tok = toys.optargs; // initialize global token
252   eval_expr(&ret, 1);
253   if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);
254 
255   if (ret.s) printf("%s\n", ret.s);
256   else printf("%lld\n", ret.i);
257 
258   toys.exitval = is_false(&ret);
259 
260   if (TT.refree) free(TT.refree);
261 }
262