1 /* expr.c - evaluate expression
2  *
3  * Copyright 2016 Google Inc.
4  * Copyright 2013 Daniel Verkamp <daniel@drv.nu>
5  *
6  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
7  *
8  * The web standard is incomplete (precedence grouping missing), see:
9  * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
10  *
11  * eval_expr() uses the recursive "Precedence Climbing" algorithm:
12  *
13  * Clarke, Keith. "The top-down parsing of expressions." University of London.
14  * Queen Mary College. Department of Computer Science and Statistics, 1986.
15  *
16  * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
17  *
18  * Nice explanation and Python implementation:
19  * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
20 
21 USE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN))
22 
23 config EXPR
24   bool "expr"
25   default n
26   help
27     usage: expr ARG1 OPERATOR ARG2...
28 
29     Evaluate expression and print result. For example, "expr 1 + 2".
30 
31     The supported operators are (grouped from highest to lowest priority):
32 
33       ( )    :    * / %    + -    != <= < >= > =    &    |
34 
35     Each constant and operator must be a separate command line argument.
36     All operators are infix, meaning they expect a constant (or expression
37     that resolves to a constant) on each side of the operator. Operators of
38     the same priority (within each group above) are evaluated left to right.
39     Parentheses may be used (as separate arguments) to elevate the priority
40     of expressions.
41 
42     Calling expr from a command shell requires a lot of \( or '*' escaping
43     to avoid interpreting shell control characters.
44 
45     The & and | operators are logical (not bitwise) and may operate on
46     strings (a blank string is "false"). Comparison operators may also
47     operate on strings (alphabetical sort).
48 
49     Constants may be strings or integers. Comparison, logical, and regex
50     operators may operate on strings (a blank string is "false"), other
51     operators require integers.
52 */
53 
54 // TODO: int overflow checking
55 
56 #define FOR_expr
57 #include "toys.h"
58 
59 GLOBALS(
60   char **tok; // current token, not on the stack since recursive calls mutate it
61 
62   char *refree;
63 )
64 
65 // Scalar value.  If s != NULL, it's a string, otherwise it's an int.
66 struct value {
67   char *s;
68   long long i;
69 };
70 
71 // Get the value as a string.
get_str(struct value * v)72 char *get_str(struct value *v)
73 {
74   if (v->s) return v->s;
75   else return xmprintf("%lld", v->i);
76 }
77 
78 // Get the value as an integer and return 1, or return 0 on error.
get_int(struct value * v,long long * ret)79 int get_int(struct value *v, long long *ret)
80 {
81   if (v->s) {
82     char *endp;
83 
84     *ret = strtoll(v->s, &endp, 10);
85 
86     if (*endp) return 0; // If endp points to NUL, all chars were converted
87   } else *ret = v->i;
88 
89   return 1;
90 }
91 
92 // Preserve the invariant that v.s is NULL when the value is an integer.
assign_int(struct value * v,long long i)93 void assign_int(struct value *v, long long i)
94 {
95   v->i = i;
96   v->s = NULL;
97 }
98 
99 // Check if v is 0 or the empty string.
is_false(struct value * v)100 static int is_false(struct value *v)
101 {
102   return get_int(v, &v->i) && !v->i;
103 }
104 
105 // 'ret' is filled with a string capture or int match position.
re(char * target,char * pattern,struct value * ret)106 static void re(char *target, char *pattern, struct value *ret)
107 {
108   regex_t pat;
109   regmatch_t m[2];
110 
111   xregcomp(&pat, pattern, 0);
112   // must match at pos 0
113   if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) {
114     // Return first parenthesized subexpression as string, or length of match
115     if (pat.re_nsub>0) {
116       ret->s = xmprintf("%.*s", m[1].rm_eo-m[1].rm_so, target+m[1].rm_so);
117       if (TT.refree) free(TT.refree);
118       TT.refree = ret->s;
119     } else assign_int(ret, m[0].rm_eo);
120   } else {
121     if (pat.re_nsub>0) ret->s = "";
122     else assign_int(ret, 0);
123   }
124   regfree(&pat);
125 }
126 
127 // 4 different signatures of operators.  S = string, I = int, SI = string or
128 // int.
129 enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
130 
131 enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
132 
133 // operators grouped by precedence
134 static struct op_def {
135   char *tok;
136   char prec, sig, op; // precedence, signature for type coercion, operator ID
137 } OPS[] = {
138   // logical ops, precedence 1 and 2, signature SI_TO_SI
139   {"|", 1, SI_TO_SI, OR  },
140   {"&", 2, SI_TO_SI, AND },
141   // comparison ops, precedence 3, signature SI_TO_I
142   {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ  }, {"!=", 3, SI_TO_I, NE },
143   {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
144   {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
145   // arithmetic ops, precedence 4 and 5, signature I_TO_I
146   {"+", 4, I_TO_I, ADD }, {"-",  4, I_TO_I, SUB },
147   {"*", 5, I_TO_I, MUL }, {"/",  5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
148   // regex match, precedence 6, signature S_TO_SI
149   {":", 6, S_TO_SI, RE },
150   {NULL, 0, 0, 0}, // sentinel
151 };
152 
eval_op(struct op_def * o,struct value * ret,struct value * rhs)153 void eval_op(struct op_def *o, struct value *ret, struct value *rhs)
154 {
155   long long a, b, x = 0; // x = a OP b for ints.
156   char *s, *t; // string operands
157   int cmp;
158 
159   switch (o->sig) {
160 
161   case SI_TO_SI:
162     switch (o->op) {
163     case OR:  if (is_false(ret)) *ret = *rhs; break;
164     case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
165     }
166     break;
167 
168   case SI_TO_I:
169     if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
170       cmp = a - b;
171     } else { // otherwise compare both as strings
172       cmp = strcmp(s = get_str(ret), t = get_str(rhs));
173       if (ret->s != s) free(s);
174       if (rhs->s != t) free(t);
175     }
176     switch (o->op) {
177     case EQ:  x = cmp == 0; break;
178     case NE:  x = cmp != 0; break;
179     case GT:  x = cmp >  0; break;
180     case GTE: x = cmp >= 0; break;
181     case LT:  x = cmp <  0; break;
182     case LTE: x = cmp <= 0; break;
183     }
184     assign_int(ret, x);
185     break;
186 
187   case I_TO_I:
188     if (!get_int(ret, &a) || !get_int(rhs, &b))
189       error_exit("non-integer argument");
190     switch (o->op) {
191     case ADD: x = a + b; break;
192     case SUB: x = a - b; break;
193     case MUL: x = a * b; break;
194     case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
195     case MOD:  if (b == 0) error_exit("division by zero"); x = a % b; break;
196     }
197     assign_int(ret, x);
198     break;
199 
200   case S_TO_SI: // op == RE
201     s = get_str(ret);
202     cmp = ret->s!=s; // ret overwritten by re so check now
203     re(s, t = get_str(rhs), ret);
204     if (cmp) free(s);
205     if (rhs->s!=t) free(t);
206     break;
207   }
208 }
209 
210 // Evalute a compound expression using recursive "Precedence Climbing"
211 // algorithm, setting 'ret'.
eval_expr(struct value * ret,int min_prec)212 static void eval_expr(struct value *ret, int min_prec)
213 {
214   if (!*TT.tok) error_exit("Unexpected end of input");
215 
216   // Evaluate LHS atom, setting 'ret'.
217   if (!strcmp(*TT.tok, "(")) { // parenthesized expression
218     TT.tok++;  // consume (
219 
220     eval_expr(ret, 1);        // We're inside ( ), so min_prec = 1
221     if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
222     if (!*TT.tok) error_exit("Expected )");
223     if (strcmp(*TT.tok, ")")) error_exit("Expected ) but got %s", *TT.tok);
224   } else ret->s = *TT.tok;  // simple literal, all values start as strings
225   TT.tok++;
226 
227   // Evaluate RHS and apply operator until precedence is too low.
228   struct value rhs;
229   while (*TT.tok) {
230     struct op_def *o = OPS;
231 
232     while (o->tok) { // Look up operator
233       if (!strcmp(*TT.tok, o->tok)) break;
234       o++;
235     }
236     if (!o->tok) break; // Not an operator (extra input will fail later)
237     if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
238     TT.tok++;
239 
240     eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
241     eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
242   }
243 }
244 
expr_main(void)245 void expr_main(void)
246 {
247   struct value ret = {0};
248 
249   toys.exitval = 2; // if exiting early, indicate error
250   TT.tok = toys.optargs; // initialize global token
251   eval_expr(&ret, 1);
252   if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);
253 
254   if (ret.s) printf("%s\n", ret.s);
255   else printf("%lld\n", ret.i);
256 
257   toys.exitval = is_false(&ret);
258 
259   if (TT.refree) free(TT.refree);
260 }
261