1 /* expr.c - evaluate expression 2 * 3 * Copyright 2016 Google Inc. 4 * Copyright 2013 Daniel Verkamp <daniel@drv.nu> 5 * 6 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html 7 * 8 * The web standard is incomplete (precedence grouping missing), see: 9 * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141 10 * 11 * eval_expr() uses the recursive "Precedence Climbing" algorithm: 12 * 13 * Clarke, Keith. "The top-down parsing of expressions." University of London. 14 * Queen Mary College. Department of Computer Science and Statistics, 1986. 15 * 16 * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf 17 * 18 * Nice explanation and Python implementation: 19 * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing 20 21 USE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN)) 22 23 config EXPR 24 bool "expr" 25 default n 26 help 27 usage: expr ARG1 OPERATOR ARG2... 28 29 Evaluate expression and print result. For example, "expr 1 + 2". 30 31 The supported operators are (grouped from highest to lowest priority): 32 33 ( ) : * / % + - != <= < >= > = & | 34 35 Each constant and operator must be a separate command line argument. 36 All operators are infix, meaning they expect a constant (or expression 37 that resolves to a constant) on each side of the operator. Operators of 38 the same priority (within each group above) are evaluated left to right. 39 Parentheses may be used (as separate arguments) to elevate the priority 40 of expressions. 41 42 Calling expr from a command shell requires a lot of \( or '*' escaping 43 to avoid interpreting shell control characters. 44 45 The & and | operators are logical (not bitwise) and may operate on 46 strings (a blank string is "false"). Comparison operators may also 47 operate on strings (alphabetical sort). 48 49 Constants may be strings or integers. Comparison, logical, and regex 50 operators may operate on strings (a blank string is "false"), other 51 operators require integers. 52 */ 53 54 // TODO: int overflow checking 55 56 #define FOR_expr 57 #include "toys.h" 58 59 GLOBALS( 60 char **tok; // current token, not on the stack since recursive calls mutate it 61 62 char *refree; 63 ) 64 65 // Scalar value. If s != NULL, it's a string, otherwise it's an int. 66 struct value { 67 char *s; 68 long long i; 69 }; 70 71 // Get the value as a string. 72 char *get_str(struct value *v) 73 { 74 if (v->s) return v->s; 75 else return xmprintf("%lld", v->i); 76 } 77 78 // Get the value as an integer and return 1, or return 0 on error. 79 int get_int(struct value *v, long long *ret) 80 { 81 if (v->s) { 82 char *endp; 83 84 *ret = strtoll(v->s, &endp, 10); 85 86 if (*endp) return 0; // If endp points to NUL, all chars were converted 87 } else *ret = v->i; 88 89 return 1; 90 } 91 92 // Preserve the invariant that v.s is NULL when the value is an integer. 93 void assign_int(struct value *v, long long i) 94 { 95 v->i = i; 96 v->s = NULL; 97 } 98 99 // Check if v is 0 or the empty string. 100 static int is_false(struct value *v) 101 { 102 return get_int(v, &v->i) && !v->i; 103 } 104 105 // 'ret' is filled with a string capture or int match position. 106 static void re(char *target, char *pattern, struct value *ret) 107 { 108 regex_t pat; 109 regmatch_t m[2]; 110 111 xregcomp(&pat, pattern, 0); 112 // must match at pos 0 113 if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) { 114 // Return first parenthesized subexpression as string, or length of match 115 if (pat.re_nsub>0) { 116 ret->s = xmprintf("%.*s", (int)(m[1].rm_eo-m[1].rm_so), 117 target+m[1].rm_so); 118 if (TT.refree) free(TT.refree); 119 TT.refree = ret->s; 120 } else assign_int(ret, m[0].rm_eo); 121 } else { 122 if (pat.re_nsub>0) ret->s = ""; 123 else assign_int(ret, 0); 124 } 125 regfree(&pat); 126 } 127 128 // 4 different signatures of operators. S = string, I = int, SI = string or 129 // int. 130 enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI }; 131 132 enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE }; 133 134 // operators grouped by precedence 135 static struct op_def { 136 char *tok; 137 char prec, sig, op; // precedence, signature for type coercion, operator ID 138 } OPS[] = { 139 // logical ops, precedence 1 and 2, signature SI_TO_SI 140 {"|", 1, SI_TO_SI, OR }, 141 {"&", 2, SI_TO_SI, AND }, 142 // comparison ops, precedence 3, signature SI_TO_I 143 {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ }, {"!=", 3, SI_TO_I, NE }, 144 {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE }, 145 {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE }, 146 // arithmetic ops, precedence 4 and 5, signature I_TO_I 147 {"+", 4, I_TO_I, ADD }, {"-", 4, I_TO_I, SUB }, 148 {"*", 5, I_TO_I, MUL }, {"/", 5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD }, 149 // regex match, precedence 6, signature S_TO_SI 150 {":", 6, S_TO_SI, RE }, 151 {NULL, 0, 0, 0}, // sentinel 152 }; 153 154 void eval_op(struct op_def *o, struct value *ret, struct value *rhs) 155 { 156 long long a, b, x = 0; // x = a OP b for ints. 157 char *s, *t; // string operands 158 int cmp; 159 160 switch (o->sig) { 161 162 case SI_TO_SI: 163 switch (o->op) { 164 case OR: if (is_false(ret)) *ret = *rhs; break; 165 case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break; 166 } 167 break; 168 169 case SI_TO_I: 170 if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints 171 cmp = a - b; 172 } else { // otherwise compare both as strings 173 cmp = strcmp(s = get_str(ret), t = get_str(rhs)); 174 if (ret->s != s) free(s); 175 if (rhs->s != t) free(t); 176 } 177 switch (o->op) { 178 case EQ: x = cmp == 0; break; 179 case NE: x = cmp != 0; break; 180 case GT: x = cmp > 0; break; 181 case GTE: x = cmp >= 0; break; 182 case LT: x = cmp < 0; break; 183 case LTE: x = cmp <= 0; break; 184 } 185 assign_int(ret, x); 186 break; 187 188 case I_TO_I: 189 if (!get_int(ret, &a) || !get_int(rhs, &b)) 190 error_exit("non-integer argument"); 191 switch (o->op) { 192 case ADD: x = a + b; break; 193 case SUB: x = a - b; break; 194 case MUL: x = a * b; break; 195 case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break; 196 case MOD: if (b == 0) error_exit("division by zero"); x = a % b; break; 197 } 198 assign_int(ret, x); 199 break; 200 201 case S_TO_SI: // op == RE 202 s = get_str(ret); 203 cmp = ret->s!=s; // ret overwritten by re so check now 204 re(s, t = get_str(rhs), ret); 205 if (cmp) free(s); 206 if (rhs->s!=t) free(t); 207 break; 208 } 209 } 210 211 // Evalute a compound expression using recursive "Precedence Climbing" 212 // algorithm, setting 'ret'. 213 static void eval_expr(struct value *ret, int min_prec) 214 { 215 if (!*TT.tok) error_exit("Unexpected end of input"); 216 217 // Evaluate LHS atom, setting 'ret'. 218 if (!strcmp(*TT.tok, "(")) { // parenthesized expression 219 TT.tok++; // consume ( 220 221 eval_expr(ret, 1); // We're inside ( ), so min_prec = 1 222 if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )"); 223 if (!*TT.tok) error_exit("Expected )"); 224 if (strcmp(*TT.tok, ")")) error_exit("Expected ) but got %s", *TT.tok); 225 } else ret->s = *TT.tok; // simple literal, all values start as strings 226 TT.tok++; 227 228 // Evaluate RHS and apply operator until precedence is too low. 229 struct value rhs; 230 while (*TT.tok) { 231 struct op_def *o = OPS; 232 233 while (o->tok) { // Look up operator 234 if (!strcmp(*TT.tok, o->tok)) break; 235 o++; 236 } 237 if (!o->tok) break; // Not an operator (extra input will fail later) 238 if (o->prec < min_prec) break; // Precedence too low, pop a stack frame 239 TT.tok++; 240 241 eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence 242 eval_op(o, ret, &rhs); // Apply operator, setting 'ret' 243 } 244 } 245 246 void expr_main(void) 247 { 248 struct value ret = {0}; 249 250 toys.exitval = 2; // if exiting early, indicate error 251 TT.tok = toys.optargs; // initialize global token 252 eval_expr(&ret, 1); 253 if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok); 254 255 if (ret.s) printf("%s\n", ret.s); 256 else printf("%lld\n", ret.i); 257 258 toys.exitval = is_false(&ret); 259 260 if (TT.refree) free(TT.refree); 261 } 262