1 /*	$OpenBSD: syn.c,v 1.30 2015/09/01 13:12:31 tedu Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009,
5  *		 2011, 2012, 2013, 2014, 2015, 2016
6  *	mirabilos <m@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23 
24 #include "sh.h"
25 
26 __RCSID("$MirOS: src/bin/mksh/syn.c,v 1.115 2016/09/01 12:59:12 tg Exp $");
27 
28 struct nesting_state {
29 	int start_token;	/* token than began nesting (eg, FOR) */
30 	int start_line;		/* line nesting began on */
31 };
32 
33 struct yyrecursive_state {
34 	struct ioword *old_heres[HERES];
35 	struct yyrecursive_state *next;
36 	struct ioword **old_herep;
37 	int old_symbol;
38 	int old_salias;
39 	int old_nesting_type;
40 	bool old_reject;
41 };
42 
43 static void yyparse(void);
44 static struct op *pipeline(int);
45 static struct op *andor(void);
46 static struct op *c_list(bool);
47 static struct ioword *synio(int);
48 static struct op *nested(int, int, int);
49 static struct op *get_command(int);
50 static struct op *dogroup(void);
51 static struct op *thenpart(void);
52 static struct op *elsepart(void);
53 static struct op *caselist(void);
54 static struct op *casepart(int);
55 static struct op *function_body(char *, bool);
56 static char **wordlist(void);
57 static struct op *block(int, struct op *, struct op *);
58 static struct op *newtp(int);
59 static void syntaxerr(const char *) MKSH_A_NORETURN;
60 static void nesting_push(struct nesting_state *, int);
61 static void nesting_pop(struct nesting_state *);
62 static int inalias(struct source *) MKSH_A_PURE;
63 static Test_op dbtestp_isa(Test_env *, Test_meta);
64 static const char *dbtestp_getopnd(Test_env *, Test_op, bool);
65 static int dbtestp_eval(Test_env *, Test_op, const char *,
66     const char *, bool);
67 static void dbtestp_error(Test_env *, int, const char *) MKSH_A_NORETURN;
68 
69 static struct op *outtree;		/* yyparse output */
70 static struct nesting_state nesting;	/* \n changed to ; */
71 
72 static bool reject;			/* token(cf) gets symbol again */
73 static int symbol;			/* yylex value */
74 static int sALIAS = ALIAS;		/* 0 in yyrecursive */
75 
76 #define REJECT		(reject = true)
77 #define ACCEPT		(reject = false)
78 #define token(cf)	((reject) ? (ACCEPT, symbol) : (symbol = yylex(cf)))
79 #define tpeek(cf)	((reject) ? (symbol) : (REJECT, symbol = yylex(cf)))
80 #define musthave(c,cf)	do { if (token(cf) != (c)) syntaxerr(NULL); } while (/* CONSTCOND */ 0)
81 
82 static const char Tcbrace[] = "}";
83 static const char Tesac[] = "esac";
84 
85 static void
yyparse(void)86 yyparse(void)
87 {
88 	int c;
89 
90 	ACCEPT;
91 
92 	outtree = c_list(source->type == SSTRING);
93 	c = tpeek(0);
94 	if (c == 0 && !outtree)
95 		outtree = newtp(TEOF);
96 	else if (c != '\n' && c != 0)
97 		syntaxerr(NULL);
98 }
99 
100 static struct op *
pipeline(int cf)101 pipeline(int cf)
102 {
103 	struct op *t, *p, *tl = NULL;
104 
105 	t = get_command(cf);
106 	if (t != NULL) {
107 		while (token(0) == '|') {
108 			if ((p = get_command(CONTIN)) == NULL)
109 				syntaxerr(NULL);
110 			if (tl == NULL)
111 				t = tl = block(TPIPE, t, p);
112 			else
113 				tl = tl->right = block(TPIPE, tl->right, p);
114 		}
115 		REJECT;
116 	}
117 	return (t);
118 }
119 
120 static struct op *
andor(void)121 andor(void)
122 {
123 	struct op *t, *p;
124 	int c;
125 
126 	t = pipeline(0);
127 	if (t != NULL) {
128 		while ((c = token(0)) == LOGAND || c == LOGOR) {
129 			if ((p = pipeline(CONTIN)) == NULL)
130 				syntaxerr(NULL);
131 			t = block(c == LOGAND? TAND: TOR, t, p);
132 		}
133 		REJECT;
134 	}
135 	return (t);
136 }
137 
138 static struct op *
c_list(bool multi)139 c_list(bool multi)
140 {
141 	struct op *t = NULL, *p, *tl = NULL;
142 	int c;
143 	bool have_sep;
144 
145 	while (/* CONSTCOND */ 1) {
146 		p = andor();
147 		/*
148 		 * Token has always been read/rejected at this point, so
149 		 * we don't worry about what flags to pass token()
150 		 */
151 		c = token(0);
152 		have_sep = true;
153 		if (c == '\n' && (multi || inalias(source))) {
154 			if (!p)
155 				/* ignore blank lines */
156 				continue;
157 		} else if (!p)
158 			break;
159 		else if (c == '&' || c == COPROC)
160 			p = block(c == '&' ? TASYNC : TCOPROC, p, NULL);
161 		else if (c != ';')
162 			have_sep = false;
163 		if (!t)
164 			t = p;
165 		else if (!tl)
166 			t = tl = block(TLIST, t, p);
167 		else
168 			tl = tl->right = block(TLIST, tl->right, p);
169 		if (!have_sep)
170 			break;
171 	}
172 	REJECT;
173 	return (t);
174 }
175 
176 static const char IONDELIM_delim[] = { CHAR, '<', CHAR, '<', EOS };
177 
178 static struct ioword *
synio(int cf)179 synio(int cf)
180 {
181 	struct ioword *iop;
182 	static struct ioword *nextiop;
183 	bool ishere;
184 
185 	if (nextiop != NULL) {
186 		iop = nextiop;
187 		nextiop = NULL;
188 		return (iop);
189 	}
190 
191 	if (tpeek(cf) != REDIR)
192 		return (NULL);
193 	ACCEPT;
194 	iop = yylval.iop;
195 	ishere = (iop->ioflag & IOTYPE) == IOHERE;
196 	if (iop->ioflag & IOHERESTR) {
197 		musthave(LWORD, 0);
198 	} else if (ishere && tpeek(HEREDELIM) == '\n') {
199 		ACCEPT;
200 		yylval.cp = wdcopy(IONDELIM_delim, ATEMP);
201 		iop->ioflag |= IOEVAL | IONDELIM;
202 	} else
203 		musthave(LWORD, ishere ? HEREDELIM : 0);
204 	if (ishere) {
205 		iop->delim = yylval.cp;
206 		if (*ident != 0 && !(iop->ioflag & IOHERESTR)) {
207 			/* unquoted */
208 			iop->ioflag |= IOEVAL;
209 		}
210 		if (herep > &heres[HERES - 1])
211 			yyerror(Tf_toomany, "<<");
212 		*herep++ = iop;
213 	} else
214 		iop->ioname = yylval.cp;
215 
216 	if (iop->ioflag & IOBASH) {
217 		char *cp;
218 
219 		nextiop = alloc(sizeof(*iop), ATEMP);
220 		nextiop->ioname = cp = alloc(3, ATEMP);
221 		*cp++ = CHAR;
222 		*cp++ = digits_lc[iop->unit % 10];
223 		*cp = EOS;
224 
225 		iop->ioflag &= ~IOBASH;
226 		nextiop->unit = 2;
227 		nextiop->ioflag = IODUP;
228 		nextiop->delim = NULL;
229 		nextiop->heredoc = NULL;
230 	}
231 	return (iop);
232 }
233 
234 static struct op *
nested(int type,int smark,int emark)235 nested(int type, int smark, int emark)
236 {
237 	struct op *t;
238 	struct nesting_state old_nesting;
239 
240 	nesting_push(&old_nesting, smark);
241 	t = c_list(true);
242 	musthave(emark, KEYWORD|sALIAS);
243 	nesting_pop(&old_nesting);
244 	return (block(type, t, NULL));
245 }
246 
247 static const char let_cmd[] = {
248 	QCHAR, 'l', CHAR, 'e', CHAR, 't', CHAR, ']', EOS
249 };
250 static const char setA_cmd0[] = {
251 	QCHAR, 's', CHAR, 'e', CHAR, 't', EOS
252 };
253 static const char setA_cmd1[] = {
254 	CHAR, '-', CHAR, 'A', EOS
255 };
256 static const char setA_cmd2[] = {
257 	CHAR, '-', CHAR, '-', EOS
258 };
259 
260 static struct op *
get_command(int cf)261 get_command(int cf)
262 {
263 	struct op *t;
264 	int c, iopn = 0, syniocf, lno;
265 	struct ioword *iop, **iops;
266 	XPtrV args, vars;
267 	struct nesting_state old_nesting;
268 
269 	/* NUFILE is small enough to leave this addition unchecked */
270 	iops = alloc2((NUFILE + 1), sizeof(struct ioword *), ATEMP);
271 	XPinit(args, 16);
272 	XPinit(vars, 16);
273 
274 	syniocf = KEYWORD|sALIAS;
275 	switch (c = token(cf|KEYWORD|sALIAS|CMDASN)) {
276 	default:
277 		REJECT;
278 		afree(iops, ATEMP);
279 		XPfree(args);
280 		XPfree(vars);
281 		/* empty line */
282 		return (NULL);
283 
284 	case LWORD:
285 	case REDIR:
286 		REJECT;
287 		syniocf &= ~(KEYWORD|sALIAS);
288 		t = newtp(TCOM);
289 		t->lineno = source->line;
290 		goto get_command_start;
291 		while (/* CONSTCOND */ 1) {
292 			bool check_assign_cmd;
293 
294 			if (XPsize(args) == 0) {
295  get_command_start:
296 				check_assign_cmd = true;
297 				cf = sALIAS | CMDASN;
298 			} else if (t->u.evalflags)
299 				cf = CMDWORD | CMDASN;
300 			else
301 				cf = CMDWORD;
302 			switch (tpeek(cf)) {
303 			case REDIR:
304 				while ((iop = synio(cf)) != NULL) {
305 					if (iopn >= NUFILE)
306 						yyerror(Tf_toomany,
307 						    Tredirection);
308 					iops[iopn++] = iop;
309 				}
310 				break;
311 
312 			case LWORD:
313 				ACCEPT;
314 				/*
315 				 * the iopn == 0 and XPsize(vars) == 0 are
316 				 * dubious but AT&T ksh acts this way
317 				 */
318 				if (iopn == 0 && XPsize(vars) == 0 &&
319 				    check_assign_cmd) {
320 					if (assign_command(ident, false))
321 						t->u.evalflags = DOVACHECK;
322 					else if (strcmp(ident, Tcommand) != 0)
323 						check_assign_cmd = false;
324 				}
325 				if ((XPsize(args) == 0 || Flag(FKEYWORD)) &&
326 				    is_wdvarassign(yylval.cp))
327 					XPput(vars, yylval.cp);
328 				else
329 					XPput(args, yylval.cp);
330 				break;
331 
332 			case '(' /*)*/:
333 				if (XPsize(args) == 0 && XPsize(vars) == 1 &&
334 				    is_wdvarassign(yylval.cp)) {
335 					char *tcp;
336 
337 					/* wdarrassign: foo=(bar) */
338 					ACCEPT;
339 
340 					/* manipulate the vars string */
341 					tcp = XPptrv(vars)[(vars.len = 0)];
342 					/* 'varname=' -> 'varname' */
343 					tcp[wdscan(tcp, EOS) - tcp - 3] = EOS;
344 
345 					/* construct new args strings */
346 					XPput(args, wdcopy(setA_cmd0, ATEMP));
347 					XPput(args, wdcopy(setA_cmd1, ATEMP));
348 					XPput(args, tcp);
349 					XPput(args, wdcopy(setA_cmd2, ATEMP));
350 
351 					/* slurp in words till closing paren */
352 					while (token(CONTIN) == LWORD)
353 						XPput(args, yylval.cp);
354 					if (symbol != /*(*/ ')')
355 						syntaxerr(NULL);
356 				} else {
357 					/*
358 					 * Check for "> foo (echo hi)"
359 					 * which AT&T ksh allows (not
360 					 * POSIX, but not disallowed)
361 					 */
362 					afree(t, ATEMP);
363 					if (XPsize(args) == 0 &&
364 					    XPsize(vars) == 0) {
365 						ACCEPT;
366 						goto Subshell;
367 					}
368 
369 					/* must be a function */
370 					if (iopn != 0 || XPsize(args) != 1 ||
371 					    XPsize(vars) != 0)
372 						syntaxerr(NULL);
373 					ACCEPT;
374 					musthave(/*(*/')', 0);
375 					t = function_body(XPptrv(args)[0], false);
376 				}
377 				goto Leave;
378 
379 			default:
380 				goto Leave;
381 			}
382 		}
383  Leave:
384 		break;
385 
386 	case '(': /*)*/ {
387 		int subshell_nesting_type_saved;
388  Subshell:
389 		subshell_nesting_type_saved = subshell_nesting_type;
390 		subshell_nesting_type = ')';
391 		t = nested(TPAREN, '(', ')');
392 		subshell_nesting_type = subshell_nesting_type_saved;
393 		break;
394 	    }
395 
396 	case '{': /*}*/
397 		t = nested(TBRACE, '{', '}');
398 		break;
399 
400 	case MDPAREN:
401 		/* leave KEYWORD in syniocf (allow if (( 1 )) then ...) */
402 		lno = source->line;
403 		ACCEPT;
404 		switch (token(LETEXPR)) {
405 		case LWORD:
406 			break;
407 		case '(': /*)*/
408 			c = '(';
409 			goto Subshell;
410 		default:
411 			syntaxerr(NULL);
412 		}
413 		t = newtp(TCOM);
414 		t->lineno = lno;
415 		XPput(args, wdcopy(let_cmd, ATEMP));
416 		XPput(args, yylval.cp);
417 		break;
418 
419 	case DBRACKET: /* [[ .. ]] */
420 		/* leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */
421 		t = newtp(TDBRACKET);
422 		ACCEPT;
423 		{
424 			Test_env te;
425 
426 			te.flags = TEF_DBRACKET;
427 			te.pos.av = &args;
428 			te.isa = dbtestp_isa;
429 			te.getopnd = dbtestp_getopnd;
430 			te.eval = dbtestp_eval;
431 			te.error = dbtestp_error;
432 
433 			test_parse(&te);
434 		}
435 		break;
436 
437 	case FOR:
438 	case SELECT:
439 		t = newtp((c == FOR) ? TFOR : TSELECT);
440 		musthave(LWORD, CMDASN);
441 		if (!is_wdvarname(yylval.cp, true))
442 			yyerror("%s: bad identifier\n",
443 			    c == FOR ? "for" : Tselect);
444 		strdupx(t->str, ident, ATEMP);
445 		nesting_push(&old_nesting, c);
446 		t->vars = wordlist();
447 		t->left = dogroup();
448 		nesting_pop(&old_nesting);
449 		break;
450 
451 	case WHILE:
452 	case UNTIL:
453 		nesting_push(&old_nesting, c);
454 		t = newtp((c == WHILE) ? TWHILE : TUNTIL);
455 		t->left = c_list(true);
456 		t->right = dogroup();
457 		nesting_pop(&old_nesting);
458 		break;
459 
460 	case CASE:
461 		t = newtp(TCASE);
462 		musthave(LWORD, 0);
463 		t->str = yylval.cp;
464 		nesting_push(&old_nesting, c);
465 		t->left = caselist();
466 		nesting_pop(&old_nesting);
467 		break;
468 
469 	case IF:
470 		nesting_push(&old_nesting, c);
471 		t = newtp(TIF);
472 		t->left = c_list(true);
473 		t->right = thenpart();
474 		musthave(FI, KEYWORD|sALIAS);
475 		nesting_pop(&old_nesting);
476 		break;
477 
478 	case BANG:
479 		syniocf &= ~(KEYWORD|sALIAS);
480 		t = pipeline(0);
481 		if (t == NULL)
482 			syntaxerr(NULL);
483 		t = block(TBANG, NULL, t);
484 		break;
485 
486 	case TIME:
487 		syniocf &= ~(KEYWORD|sALIAS);
488 		t = pipeline(0);
489 		if (t && t->type == TCOM) {
490 			t->str = alloc(2, ATEMP);
491 			/* TF_* flags */
492 			t->str[0] = '\0';
493 			t->str[1] = '\0';
494 		}
495 		t = block(TTIME, t, NULL);
496 		break;
497 
498 	case FUNCTION:
499 		musthave(LWORD, 0);
500 		t = function_body(yylval.cp, true);
501 		break;
502 	}
503 
504 	while ((iop = synio(syniocf)) != NULL) {
505 		if (iopn >= NUFILE)
506 			yyerror(Tf_toomany, Tredirection);
507 		iops[iopn++] = iop;
508 	}
509 
510 	if (iopn == 0) {
511 		afree(iops, ATEMP);
512 		t->ioact = NULL;
513 	} else {
514 		iops[iopn++] = NULL;
515 		iops = aresize2(iops, iopn, sizeof(struct ioword *), ATEMP);
516 		t->ioact = iops;
517 	}
518 
519 	if (t->type == TCOM || t->type == TDBRACKET) {
520 		XPput(args, NULL);
521 		t->args = (const char **)XPclose(args);
522 		XPput(vars, NULL);
523 		t->vars = (char **)XPclose(vars);
524 	} else {
525 		XPfree(args);
526 		XPfree(vars);
527 	}
528 
529 	if (c == MDPAREN) {
530 		t = block(TBRACE, t, NULL);
531 		t->ioact = t->left->ioact;
532 		t->left->ioact = NULL;
533 	}
534 
535 	return (t);
536 }
537 
538 static struct op *
dogroup(void)539 dogroup(void)
540 {
541 	int c;
542 	struct op *list;
543 
544 	c = token(CONTIN|KEYWORD|sALIAS);
545 	/*
546 	 * A {...} can be used instead of do...done for for/select loops
547 	 * but not for while/until loops - we don't need to check if it
548 	 * is a while loop because it would have been parsed as part of
549 	 * the conditional command list...
550 	 */
551 	if (c == DO)
552 		c = DONE;
553 	else if (c == '{')
554 		c = '}';
555 	else
556 		syntaxerr(NULL);
557 	list = c_list(true);
558 	musthave(c, KEYWORD|sALIAS);
559 	return (list);
560 }
561 
562 static struct op *
thenpart(void)563 thenpart(void)
564 {
565 	struct op *t;
566 
567 	musthave(THEN, KEYWORD|sALIAS);
568 	t = newtp(0);
569 	t->left = c_list(true);
570 	if (t->left == NULL)
571 		syntaxerr(NULL);
572 	t->right = elsepart();
573 	return (t);
574 }
575 
576 static struct op *
elsepart(void)577 elsepart(void)
578 {
579 	struct op *t;
580 
581 	switch (token(KEYWORD|sALIAS|CMDASN)) {
582 	case ELSE:
583 		if ((t = c_list(true)) == NULL)
584 			syntaxerr(NULL);
585 		return (t);
586 
587 	case ELIF:
588 		t = newtp(TELIF);
589 		t->left = c_list(true);
590 		t->right = thenpart();
591 		return (t);
592 
593 	default:
594 		REJECT;
595 	}
596 	return (NULL);
597 }
598 
599 static struct op *
caselist(void)600 caselist(void)
601 {
602 	struct op *t, *tl;
603 	int c;
604 
605 	c = token(CONTIN|KEYWORD|sALIAS);
606 	/* A {...} can be used instead of in...esac for case statements */
607 	if (c == IN)
608 		c = ESAC;
609 	else if (c == '{')
610 		c = '}';
611 	else
612 		syntaxerr(NULL);
613 	t = tl = NULL;
614 	/* no ALIAS here */
615 	while ((tpeek(CONTIN|KEYWORD|ESACONLY)) != c) {
616 		struct op *tc = casepart(c);
617 		if (tl == NULL)
618 			t = tl = tc, tl->right = NULL;
619 		else
620 			tl->right = tc, tl = tc;
621 	}
622 	musthave(c, KEYWORD|sALIAS);
623 	return (t);
624 }
625 
626 static struct op *
casepart(int endtok)627 casepart(int endtok)
628 {
629 	struct op *t;
630 	XPtrV ptns;
631 
632 	XPinit(ptns, 16);
633 	t = newtp(TPAT);
634 	/* no ALIAS here */
635 	if (token(CONTIN | KEYWORD) != '(')
636 		REJECT;
637 	do {
638 		switch (token(0)) {
639 		case LWORD:
640 			break;
641 		case '}':
642 		case ESAC:
643 			if (symbol != endtok) {
644 				strdupx(yylval.cp,
645 				    symbol == '}' ? Tcbrace : Tesac, ATEMP);
646 				break;
647 			}
648 			/* FALLTHROUGH */
649 		default:
650 			syntaxerr(NULL);
651 		}
652 		XPput(ptns, yylval.cp);
653 	} while (token(0) == '|');
654 	REJECT;
655 	XPput(ptns, NULL);
656 	t->vars = (char **)XPclose(ptns);
657 	musthave(')', 0);
658 
659 	t->left = c_list(true);
660 
661 	/* initialise to default for ;; or omitted */
662 	t->u.charflag = ';';
663 	/* SUSv4 requires the ;; except in the last casepart */
664 	if ((tpeek(CONTIN|KEYWORD|sALIAS)) != endtok)
665 		switch (symbol) {
666 		default:
667 			syntaxerr(NULL);
668 		case BRKEV:
669 			t->u.charflag = '|';
670 			if (0)
671 				/* FALLTHROUGH */
672 		case BRKFT:
673 			t->u.charflag = '&';
674 			/* FALLTHROUGH */
675 		case BREAK:
676 			/* initialised above, but we need to eat the token */
677 			ACCEPT;
678 		}
679 	return (t);
680 }
681 
682 static struct op *
function_body(char * name,bool ksh_func)683 function_body(char *name,
684     /* function foo { ... } vs foo() { .. } */
685     bool ksh_func)
686 {
687 	char *sname, *p;
688 	struct op *t;
689 
690 	sname = wdstrip(name, 0);
691 	/*-
692 	 * Check for valid characters in name. POSIX and AT&T ksh93 say
693 	 * only allow [a-zA-Z_0-9] but this allows more as old pdkshs
694 	 * have allowed more; the following were never allowed:
695 	 *	NUL TAB NL SP " $ & ' ( ) ; < = > \ ` |
696 	 * C_QUOTE covers all but adds # * ? [ ]
697 	 */
698 	for (p = sname; *p; p++)
699 		if (ctype(*p, C_QUOTE))
700 			yyerror("%s: invalid function name\n", sname);
701 
702 	/*
703 	 * Note that POSIX allows only compound statements after foo(),
704 	 * sh and AT&T ksh allow any command, go with the later since it
705 	 * shouldn't break anything. However, for function foo, AT&T ksh
706 	 * only accepts an open-brace.
707 	 */
708 	if (ksh_func) {
709 		if (tpeek(CONTIN|KEYWORD|sALIAS) == '(' /*)*/) {
710 			/* function foo () { //}*/
711 			ACCEPT;
712 			musthave(')', 0);
713 			/* degrade to POSIX function */
714 			ksh_func = false;
715 		}
716 		musthave('{' /*}*/, CONTIN|KEYWORD|sALIAS);
717 		REJECT;
718 	}
719 
720 	t = newtp(TFUNCT);
721 	t->str = sname;
722 	t->u.ksh_func = tobool(ksh_func);
723 	t->lineno = source->line;
724 
725 	if ((t->left = get_command(CONTIN)) == NULL) {
726 		char *tv;
727 		/*
728 		 * Probably something like foo() followed by EOF or ';'.
729 		 * This is accepted by sh and ksh88.
730 		 * To make "typeset -f foo" work reliably (so its output can
731 		 * be used as input), we pretend there is a colon here.
732 		 */
733 		t->left = newtp(TCOM);
734 		/* (2 * sizeof(char *)) is small enough */
735 		t->left->args = alloc(2 * sizeof(char *), ATEMP);
736 		t->left->args[0] = tv = alloc(3, ATEMP);
737 		tv[0] = QCHAR;
738 		tv[1] = ':';
739 		tv[2] = EOS;
740 		t->left->args[1] = NULL;
741 		t->left->vars = alloc(sizeof(char *), ATEMP);
742 		t->left->vars[0] = NULL;
743 		t->left->lineno = 1;
744 	}
745 
746 	return (t);
747 }
748 
749 static char **
wordlist(void)750 wordlist(void)
751 {
752 	int c;
753 	XPtrV args;
754 
755 	XPinit(args, 16);
756 	/* POSIX does not do alias expansion here... */
757 	if ((c = token(CONTIN|KEYWORD|sALIAS)) != IN) {
758 		if (c != ';')
759 			/* non-POSIX, but AT&T ksh accepts a ; here */
760 			REJECT;
761 		return (NULL);
762 	}
763 	while ((c = token(0)) == LWORD)
764 		XPput(args, yylval.cp);
765 	if (c != '\n' && c != ';')
766 		syntaxerr(NULL);
767 	XPput(args, NULL);
768 	return ((char **)XPclose(args));
769 }
770 
771 /*
772  * supporting functions
773  */
774 
775 static struct op *
block(int type,struct op * t1,struct op * t2)776 block(int type, struct op *t1, struct op *t2)
777 {
778 	struct op *t;
779 
780 	t = newtp(type);
781 	t->left = t1;
782 	t->right = t2;
783 	return (t);
784 }
785 
786 static const struct tokeninfo {
787 	const char *name;
788 	short val;
789 	short reserved;
790 } tokentab[] = {
791 	/* Reserved words */
792 	{ "if",		IF,	true },
793 	{ "then",	THEN,	true },
794 	{ "else",	ELSE,	true },
795 	{ "elif",	ELIF,	true },
796 	{ "fi",		FI,	true },
797 	{ "case",	CASE,	true },
798 	{ Tesac,	ESAC,	true },
799 	{ "for",	FOR,	true },
800 	{ Tselect,	SELECT,	true },
801 	{ "while",	WHILE,	true },
802 	{ "until",	UNTIL,	true },
803 	{ "do",		DO,	true },
804 	{ "done",	DONE,	true },
805 	{ "in",		IN,	true },
806 	{ Tfunction,	FUNCTION, true },
807 	{ Ttime,	TIME,	true },
808 	{ "{",		'{',	true },
809 	{ Tcbrace,	'}',	true },
810 	{ "!",		BANG,	true },
811 	{ "[[",		DBRACKET, true },
812 	/* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
813 	{ "&&",		LOGAND,	false },
814 	{ "||",		LOGOR,	false },
815 	{ ";;",		BREAK,	false },
816 	{ ";|",		BRKEV,	false },
817 	{ ";&",		BRKFT,	false },
818 	{ "((",		MDPAREN, false },
819 	{ "|&",		COPROC,	false },
820 	/* and some special cases... */
821 	{ "newline",	'\n',	false },
822 	{ NULL,		0,	false }
823 };
824 
825 void
initkeywords(void)826 initkeywords(void)
827 {
828 	struct tokeninfo const *tt;
829 	struct tbl *p;
830 
831 	ktinit(APERM, &keywords,
832 	    /* currently 28 keywords: 75% of 64 = 2^6 */
833 	    6);
834 	for (tt = tokentab; tt->name; tt++) {
835 		if (tt->reserved) {
836 			p = ktenter(&keywords, tt->name, hash(tt->name));
837 			p->flag |= DEFINED|ISSET;
838 			p->type = CKEYWD;
839 			p->val.i = tt->val;
840 		}
841 	}
842 }
843 
844 static void
syntaxerr(const char * what)845 syntaxerr(const char *what)
846 {
847 	/* 23<<- is the longest redirection, I think */
848 	char redir[8];
849 	const char *s;
850 	struct tokeninfo const *tt;
851 	int c;
852 
853 	if (!what)
854 		what = Tunexpected;
855 	REJECT;
856 	c = token(0);
857  Again:
858 	switch (c) {
859 	case 0:
860 		if (nesting.start_token) {
861 			c = nesting.start_token;
862 			source->errline = nesting.start_line;
863 			what = "unmatched";
864 			goto Again;
865 		}
866 		/* don't quote the EOF */
867 		yyerror("%s: unexpected EOF\n", Tsynerr);
868 		/* NOTREACHED */
869 
870 	case LWORD:
871 		s = snptreef(NULL, 32, Tf_S, yylval.cp);
872 		break;
873 
874 	case REDIR:
875 		s = snptreef(redir, sizeof(redir), Tft_R, yylval.iop);
876 		break;
877 
878 	default:
879 		for (tt = tokentab; tt->name; tt++)
880 			if (tt->val == c)
881 			    break;
882 		if (tt->name)
883 			s = tt->name;
884 		else {
885 			if (c > 0 && c < 256) {
886 				redir[0] = c;
887 				redir[1] = '\0';
888 			} else
889 				shf_snprintf(redir, sizeof(redir),
890 					"?%d", c);
891 			s = redir;
892 		}
893 	}
894 	yyerror("%s: '%s' %s\n", Tsynerr, s, what);
895 }
896 
897 static void
nesting_push(struct nesting_state * save,int tok)898 nesting_push(struct nesting_state *save, int tok)
899 {
900 	*save = nesting;
901 	nesting.start_token = tok;
902 	nesting.start_line = source->line;
903 }
904 
905 static void
nesting_pop(struct nesting_state * saved)906 nesting_pop(struct nesting_state *saved)
907 {
908 	nesting = *saved;
909 }
910 
911 static struct op *
newtp(int type)912 newtp(int type)
913 {
914 	struct op *t;
915 
916 	t = alloc(sizeof(struct op), ATEMP);
917 	t->type = type;
918 	t->u.evalflags = 0;
919 	t->args = NULL;
920 	t->vars = NULL;
921 	t->ioact = NULL;
922 	t->left = t->right = NULL;
923 	t->str = NULL;
924 	return (t);
925 }
926 
927 struct op *
compile(Source * s,bool skiputf8bom)928 compile(Source *s, bool skiputf8bom)
929 {
930 	nesting.start_token = 0;
931 	nesting.start_line = 0;
932 	herep = heres;
933 	source = s;
934 	if (skiputf8bom)
935 		yyskiputf8bom();
936 	yyparse();
937 	return (outtree);
938 }
939 
940 /*-
941  * This kludge exists to take care of sh/AT&T ksh oddity in which
942  * the arguments of alias/export/readonly/typeset have no field
943  * splitting, file globbing, or (normal) tilde expansion done.
944  * AT&T ksh seems to do something similar to this since
945  *	$ touch a=a; typeset a=[ab]; echo "$a"
946  *	a=[ab]
947  *	$ x=typeset; $x a=[ab]; echo "$a"
948  *	a=a
949  *	$
950  */
951 int
assign_command(const char * s,bool docommand)952 assign_command(const char *s, bool docommand)
953 {
954 	if (!*s)
955 		return (0);
956 	return ((strcmp(s, Talias) == 0) ||
957 	    (strcmp(s, Texport) == 0) ||
958 	    (strcmp(s, Treadonly) == 0) ||
959 	    (docommand && (strcmp(s, Tcommand) == 0)) ||
960 	    (strcmp(s, Ttypeset) == 0));
961 }
962 
963 /* Check if we are in the middle of reading an alias */
964 static int
inalias(struct source * s)965 inalias(struct source *s)
966 {
967 	while (s && s->type == SALIAS) {
968 		if (!(s->flags & SF_ALIASEND))
969 			return (1);
970 		s = s->next;
971 	}
972 	return (0);
973 }
974 
975 
976 /*
977  * Order important - indexed by Test_meta values
978  * Note that ||, &&, ( and ) can't appear in as unquoted strings
979  * in normal shell input, so these can be interpreted unambiguously
980  * in the evaluation pass.
981  */
982 static const char dbtest_or[] = { CHAR, '|', CHAR, '|', EOS };
983 static const char dbtest_and[] = { CHAR, '&', CHAR, '&', EOS };
984 static const char dbtest_not[] = { CHAR, '!', EOS };
985 static const char dbtest_oparen[] = { CHAR, '(', EOS };
986 static const char dbtest_cparen[] = { CHAR, ')', EOS };
987 const char * const dbtest_tokens[] = {
988 	dbtest_or, dbtest_and, dbtest_not,
989 	dbtest_oparen, dbtest_cparen
990 };
991 static const char db_close[] = { CHAR, ']', CHAR, ']', EOS };
992 static const char db_lthan[] = { CHAR, '<', EOS };
993 static const char db_gthan[] = { CHAR, '>', EOS };
994 
995 /*
996  * Test if the current token is a whatever. Accepts the current token if
997  * it is. Returns 0 if it is not, non-zero if it is (in the case of
998  * TM_UNOP and TM_BINOP, the returned value is a Test_op).
999  */
1000 static Test_op
dbtestp_isa(Test_env * te,Test_meta meta)1001 dbtestp_isa(Test_env *te, Test_meta meta)
1002 {
1003 	int c = tpeek(CMDASN | (meta == TM_BINOP ? 0 : CONTIN));
1004 	bool uqword;
1005 	char *save = NULL;
1006 	Test_op ret = TO_NONOP;
1007 
1008 	/* unquoted word? */
1009 	uqword = c == LWORD && *ident;
1010 
1011 	if (meta == TM_OR)
1012 		ret = c == LOGOR ? TO_NONNULL : TO_NONOP;
1013 	else if (meta == TM_AND)
1014 		ret = c == LOGAND ? TO_NONNULL : TO_NONOP;
1015 	else if (meta == TM_NOT)
1016 		ret = (uqword && !strcmp(yylval.cp,
1017 		    dbtest_tokens[(int)TM_NOT])) ? TO_NONNULL : TO_NONOP;
1018 	else if (meta == TM_OPAREN)
1019 		ret = c == '(' /*)*/ ? TO_NONNULL : TO_NONOP;
1020 	else if (meta == TM_CPAREN)
1021 		ret = c == /*(*/ ')' ? TO_NONNULL : TO_NONOP;
1022 	else if (meta == TM_UNOP || meta == TM_BINOP) {
1023 		if (meta == TM_BINOP && c == REDIR &&
1024 		    (yylval.iop->ioflag == IOREAD ||
1025 		    yylval.iop->ioflag == IOWRITE)) {
1026 			ret = TO_NONNULL;
1027 			save = wdcopy(yylval.iop->ioflag == IOREAD ?
1028 			    db_lthan : db_gthan, ATEMP);
1029 		} else if (uqword && (ret = test_isop(meta, ident)))
1030 			save = yylval.cp;
1031 	} else
1032 		/* meta == TM_END */
1033 		ret = (uqword && !strcmp(yylval.cp,
1034 		    db_close)) ? TO_NONNULL : TO_NONOP;
1035 	if (ret != TO_NONOP) {
1036 		ACCEPT;
1037 		if ((unsigned int)meta < NELEM(dbtest_tokens))
1038 			save = wdcopy(dbtest_tokens[(int)meta], ATEMP);
1039 		if (save)
1040 			XPput(*te->pos.av, save);
1041 	}
1042 	return (ret);
1043 }
1044 
1045 static const char *
dbtestp_getopnd(Test_env * te,Test_op op MKSH_A_UNUSED,bool do_eval MKSH_A_UNUSED)1046 dbtestp_getopnd(Test_env *te, Test_op op MKSH_A_UNUSED,
1047     bool do_eval MKSH_A_UNUSED)
1048 {
1049 	int c = tpeek(CMDASN);
1050 
1051 	if (c != LWORD)
1052 		return (NULL);
1053 
1054 	ACCEPT;
1055 	XPput(*te->pos.av, yylval.cp);
1056 
1057 	return (null);
1058 }
1059 
1060 static int
dbtestp_eval(Test_env * te MKSH_A_UNUSED,Test_op op MKSH_A_UNUSED,const char * opnd1 MKSH_A_UNUSED,const char * opnd2 MKSH_A_UNUSED,bool do_eval MKSH_A_UNUSED)1061 dbtestp_eval(Test_env *te MKSH_A_UNUSED, Test_op op MKSH_A_UNUSED,
1062     const char *opnd1 MKSH_A_UNUSED, const char *opnd2 MKSH_A_UNUSED,
1063     bool do_eval MKSH_A_UNUSED)
1064 {
1065 	return (1);
1066 }
1067 
1068 static void
dbtestp_error(Test_env * te,int offset,const char * msg)1069 dbtestp_error(Test_env *te, int offset, const char *msg)
1070 {
1071 	te->flags |= TEF_ERROR;
1072 
1073 	if (offset < 0) {
1074 		REJECT;
1075 		/* Kludgy to say the least... */
1076 		symbol = LWORD;
1077 		yylval.cp = *(XPptrv(*te->pos.av) + XPsize(*te->pos.av) +
1078 		    offset);
1079 	}
1080 	syntaxerr(msg);
1081 }
1082 
1083 #if HAVE_SELECT
1084 
1085 #ifndef EOVERFLOW
1086 #ifdef ERANGE
1087 #define EOVERFLOW	ERANGE
1088 #else
1089 #define EOVERFLOW	EINVAL
1090 #endif
1091 #endif
1092 
1093 bool
parse_usec(const char * s,struct timeval * tv)1094 parse_usec(const char *s, struct timeval *tv)
1095 {
1096 	struct timeval tt;
1097 	int i;
1098 
1099 	tv->tv_sec = 0;
1100 	/* parse integral part */
1101 	while (ksh_isdigit(*s)) {
1102 		tt.tv_sec = tv->tv_sec * 10 + ksh_numdig(*s++);
1103 		/*XXX this overflow check maybe UB */
1104 		if (tt.tv_sec / 10 != tv->tv_sec) {
1105 			errno = EOVERFLOW;
1106 			return (true);
1107 		}
1108 		tv->tv_sec = tt.tv_sec;
1109 	}
1110 
1111 	tv->tv_usec = 0;
1112 	if (!*s)
1113 		/* no decimal fraction */
1114 		return (false);
1115 	else if (*s++ != '.') {
1116 		/* junk after integral part */
1117 		errno = EINVAL;
1118 		return (true);
1119 	}
1120 
1121 	/* parse decimal fraction */
1122 	i = 100000;
1123 	while (ksh_isdigit(*s)) {
1124 		tv->tv_usec += i * ksh_numdig(*s++);
1125 		if (i == 1)
1126 			break;
1127 		i /= 10;
1128 	}
1129 	/* check for junk after fractional part */
1130 	while (ksh_isdigit(*s))
1131 		++s;
1132 	if (*s) {
1133 		errno = EINVAL;
1134 		return (true);
1135 	}
1136 
1137 	/* end of input string reached, no errors */
1138 	return (false);
1139 }
1140 #endif
1141 
1142 /*
1143  * Helper function called from within lex.c:yylex() to parse
1144  * a COMSUB recursively using the main shell parser and lexer
1145  */
1146 char *
yyrecursive(int subtype MKSH_A_UNUSED)1147 yyrecursive(int subtype MKSH_A_UNUSED)
1148 {
1149 	struct op *t;
1150 	char *cp;
1151 	struct yyrecursive_state *ys;
1152 	int stok, etok;
1153 
1154 	if (subtype != COMSUB) {
1155 		stok = '{';
1156 		etok = '}';
1157 	} else {
1158 		stok = '(';
1159 		etok = ')';
1160 	}
1161 
1162 	ys = alloc(sizeof(struct yyrecursive_state), ATEMP);
1163 
1164 	/* tell the lexer to accept a closing parenthesis as EOD */
1165 	ys->old_nesting_type = subshell_nesting_type;
1166 	subshell_nesting_type = etok;
1167 
1168 	/* push reject state, parse recursively, pop reject state */
1169 	ys->old_reject = reject;
1170 	ys->old_symbol = symbol;
1171 	ACCEPT;
1172 	memcpy(ys->old_heres, heres, sizeof(heres));
1173 	ys->old_herep = herep;
1174 	herep = heres;
1175 	ys->old_salias = sALIAS;
1176 	sALIAS = 0;
1177 	ys->next = e->yyrecursive_statep;
1178 	e->yyrecursive_statep = ys;
1179 	/* we use TPAREN as a helper container here */
1180 	t = nested(TPAREN, stok, etok);
1181 	yyrecursive_pop(false);
1182 
1183 	/* t->left because nested(TPAREN, ...) hides our goodies there */
1184 	cp = snptreef(NULL, 0, Tf_T, t->left);
1185 	tfree(t, ATEMP);
1186 
1187 	return (cp);
1188 }
1189 
1190 void
yyrecursive_pop(bool popall)1191 yyrecursive_pop(bool popall)
1192 {
1193 	struct yyrecursive_state *ys;
1194 
1195  popnext:
1196 	if (!(ys = e->yyrecursive_statep))
1197 		return;
1198 	e->yyrecursive_statep = ys->next;
1199 
1200 	sALIAS = ys->old_salias;
1201 	memcpy(heres, ys->old_heres, sizeof(heres));
1202 	herep = ys->old_herep;
1203 	reject = ys->old_reject;
1204 	symbol = ys->old_symbol;
1205 
1206 	subshell_nesting_type = ys->old_nesting_type;
1207 
1208 	afree(ys, ATEMP);
1209 	if (popall)
1210 		goto popnext;
1211 }
1212