1 /* expr.c - evaluate expression
2 *
3 * Copyright 2016 Google Inc.
4 * Copyright 2013 Daniel Verkamp <daniel@drv.nu>
5 *
6 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
7 *
8 * The web standard is incomplete (precedence grouping missing), see:
9 * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
10 *
11 * eval_expr() uses the recursive "Precedence Climbing" algorithm:
12 *
13 * Clarke, Keith. "The top-down parsing of expressions." University of London.
14 * Queen Mary College. Department of Computer Science and Statistics, 1986.
15 *
16 * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
17 *
18 * Nice explanation and Python implementation:
19 * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
20
21 USE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN))
22
23 config EXPR
24 bool "expr"
25 default n
26 help
27 usage: expr ARG1 OPERATOR ARG2...
28
29 Evaluate expression and print result. For example, "expr 1 + 2".
30
31 The supported operators are (grouped from highest to lowest priority):
32
33 ( ) : * / % + - != <= < >= > = & |
34
35 Each constant and operator must be a separate command line argument.
36 All operators are infix, meaning they expect a constant (or expression
37 that resolves to a constant) on each side of the operator. Operators of
38 the same priority (within each group above) are evaluated left to right.
39 Parentheses may be used (as separate arguments) to elevate the priority
40 of expressions.
41
42 Calling expr from a command shell requires a lot of \( or '*' escaping
43 to avoid interpreting shell control characters.
44
45 The & and | operators are logical (not bitwise) and may operate on
46 strings (a blank string is "false"). Comparison operators may also
47 operate on strings (alphabetical sort).
48
49 Constants may be strings or integers. Comparison, logical, and regex
50 operators may operate on strings (a blank string is "false"), other
51 operators require integers.
52 */
53
54 // TODO: int overflow checking
55
56 #define FOR_expr
57 #include "toys.h"
58
59 GLOBALS(
60 char **tok; // current token, not on the stack since recursive calls mutate it
61
62 char *refree;
63 )
64
65 // Scalar value. If s != NULL, it's a string, otherwise it's an int.
66 struct value {
67 char *s;
68 long long i;
69 };
70
71 // Get the value as a string.
get_str(struct value * v)72 char *get_str(struct value *v)
73 {
74 if (v->s) return v->s;
75 else return xmprintf("%lld", v->i);
76 }
77
78 // Get the value as an integer and return 1, or return 0 on error.
get_int(struct value * v,long long * ret)79 int get_int(struct value *v, long long *ret)
80 {
81 if (v->s) {
82 char *endp;
83
84 *ret = strtoll(v->s, &endp, 10);
85
86 if (*endp) return 0; // If endp points to NUL, all chars were converted
87 } else *ret = v->i;
88
89 return 1;
90 }
91
92 // Preserve the invariant that v.s is NULL when the value is an integer.
assign_int(struct value * v,long long i)93 void assign_int(struct value *v, long long i)
94 {
95 v->i = i;
96 v->s = NULL;
97 }
98
99 // Check if v is 0 or the empty string.
is_false(struct value * v)100 static int is_false(struct value *v)
101 {
102 return get_int(v, &v->i) && !v->i;
103 }
104
105 // 'ret' is filled with a string capture or int match position.
re(char * target,char * pattern,struct value * ret)106 static void re(char *target, char *pattern, struct value *ret)
107 {
108 regex_t pat;
109 regmatch_t m[2];
110
111 xregcomp(&pat, pattern, 0);
112 // must match at pos 0
113 if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) {
114 // Return first parenthesized subexpression as string, or length of match
115 if (pat.re_nsub>0) {
116 ret->s = xmprintf("%.*s", m[1].rm_eo-m[1].rm_so, target+m[1].rm_so);
117 if (TT.refree) free(TT.refree);
118 TT.refree = ret->s;
119 } else assign_int(ret, m[0].rm_eo);
120 } else {
121 if (pat.re_nsub>0) ret->s = "";
122 else assign_int(ret, 0);
123 }
124 regfree(&pat);
125 }
126
127 // 4 different signatures of operators. S = string, I = int, SI = string or
128 // int.
129 enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
130
131 enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
132
133 // operators grouped by precedence
134 static struct op_def {
135 char *tok;
136 char prec, sig, op; // precedence, signature for type coercion, operator ID
137 } OPS[] = {
138 // logical ops, precedence 1 and 2, signature SI_TO_SI
139 {"|", 1, SI_TO_SI, OR },
140 {"&", 2, SI_TO_SI, AND },
141 // comparison ops, precedence 3, signature SI_TO_I
142 {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ }, {"!=", 3, SI_TO_I, NE },
143 {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
144 {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
145 // arithmetic ops, precedence 4 and 5, signature I_TO_I
146 {"+", 4, I_TO_I, ADD }, {"-", 4, I_TO_I, SUB },
147 {"*", 5, I_TO_I, MUL }, {"/", 5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
148 // regex match, precedence 6, signature S_TO_SI
149 {":", 6, S_TO_SI, RE },
150 {NULL, 0, 0, 0}, // sentinel
151 };
152
eval_op(struct op_def * o,struct value * ret,struct value * rhs)153 void eval_op(struct op_def *o, struct value *ret, struct value *rhs)
154 {
155 long long a, b, x = 0; // x = a OP b for ints.
156 char *s, *t; // string operands
157 int cmp;
158
159 switch (o->sig) {
160
161 case SI_TO_SI:
162 switch (o->op) {
163 case OR: if (is_false(ret)) *ret = *rhs; break;
164 case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
165 }
166 break;
167
168 case SI_TO_I:
169 if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
170 cmp = a - b;
171 } else { // otherwise compare both as strings
172 cmp = strcmp(s = get_str(ret), t = get_str(rhs));
173 if (ret->s != s) free(s);
174 if (rhs->s != t) free(t);
175 }
176 switch (o->op) {
177 case EQ: x = cmp == 0; break;
178 case NE: x = cmp != 0; break;
179 case GT: x = cmp > 0; break;
180 case GTE: x = cmp >= 0; break;
181 case LT: x = cmp < 0; break;
182 case LTE: x = cmp <= 0; break;
183 }
184 assign_int(ret, x);
185 break;
186
187 case I_TO_I:
188 if (!get_int(ret, &a) || !get_int(rhs, &b))
189 error_exit("non-integer argument");
190 switch (o->op) {
191 case ADD: x = a + b; break;
192 case SUB: x = a - b; break;
193 case MUL: x = a * b; break;
194 case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
195 case MOD: if (b == 0) error_exit("division by zero"); x = a % b; break;
196 }
197 assign_int(ret, x);
198 break;
199
200 case S_TO_SI: // op == RE
201 s = get_str(ret);
202 cmp = ret->s!=s; // ret overwritten by re so check now
203 re(s, t = get_str(rhs), ret);
204 if (cmp) free(s);
205 if (rhs->s!=t) free(t);
206 break;
207 }
208 }
209
210 // Evalute a compound expression using recursive "Precedence Climbing"
211 // algorithm, setting 'ret'.
eval_expr(struct value * ret,int min_prec)212 static void eval_expr(struct value *ret, int min_prec)
213 {
214 if (!*TT.tok) error_exit("Unexpected end of input");
215
216 // Evaluate LHS atom, setting 'ret'.
217 if (!strcmp(*TT.tok, "(")) { // parenthesized expression
218 TT.tok++; // consume (
219
220 eval_expr(ret, 1); // We're inside ( ), so min_prec = 1
221 if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
222 if (!*TT.tok) error_exit("Expected )");
223 if (strcmp(*TT.tok, ")")) error_exit("Expected ) but got %s", *TT.tok);
224 } else ret->s = *TT.tok; // simple literal, all values start as strings
225 TT.tok++;
226
227 // Evaluate RHS and apply operator until precedence is too low.
228 struct value rhs;
229 while (*TT.tok) {
230 struct op_def *o = OPS;
231
232 while (o->tok) { // Look up operator
233 if (!strcmp(*TT.tok, o->tok)) break;
234 o++;
235 }
236 if (!o->tok) break; // Not an operator (extra input will fail later)
237 if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
238 TT.tok++;
239
240 eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
241 eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
242 }
243 }
244
expr_main(void)245 void expr_main(void)
246 {
247 struct value ret = {0};
248
249 toys.exitval = 2; // if exiting early, indicate error
250 TT.tok = toys.optargs; // initialize global token
251 eval_expr(&ret, 1);
252 if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);
253
254 if (ret.s) printf("%s\n", ret.s);
255 else printf("%lld\n", ret.i);
256
257 toys.exitval = is_false(&ret);
258
259 if (TT.refree) free(TT.refree);
260 }
261