1 /*	$OpenBSD: syn.c,v 1.30 2015/09/01 13:12:31 tedu Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009,
5  *		 2011, 2012, 2013, 2014, 2015, 2016
6  *	mirabilos <m@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23 
24 #include "sh.h"
25 
26 __RCSID("$MirOS: src/bin/mksh/syn.c,v 1.109 2016/01/19 23:12:15 tg Exp $");
27 
28 struct nesting_state {
29 	int start_token;	/* token than began nesting (eg, FOR) */
30 	int start_line;		/* line nesting began on */
31 };
32 
33 struct yyrecursive_state {
34 	struct yyrecursive_state *next;
35 	struct ioword **old_herep;
36 	int old_symbol;
37 	int old_salias;
38 	int old_nesting_type;
39 	bool old_reject;
40 };
41 
42 static void yyparse(void);
43 static struct op *pipeline(int);
44 static struct op *andor(void);
45 static struct op *c_list(bool);
46 static struct ioword *synio(int);
47 static struct op *nested(int, int, int);
48 static struct op *get_command(int);
49 static struct op *dogroup(void);
50 static struct op *thenpart(void);
51 static struct op *elsepart(void);
52 static struct op *caselist(void);
53 static struct op *casepart(int);
54 static struct op *function_body(char *, bool);
55 static char **wordlist(void);
56 static struct op *block(int, struct op *, struct op *);
57 static struct op *newtp(int);
58 static void syntaxerr(const char *) MKSH_A_NORETURN;
59 static void nesting_push(struct nesting_state *, int);
60 static void nesting_pop(struct nesting_state *);
61 static int inalias(struct source *) MKSH_A_PURE;
62 static Test_op dbtestp_isa(Test_env *, Test_meta);
63 static const char *dbtestp_getopnd(Test_env *, Test_op, bool);
64 static int dbtestp_eval(Test_env *, Test_op, const char *,
65     const char *, bool);
66 static void dbtestp_error(Test_env *, int, const char *) MKSH_A_NORETURN;
67 
68 static struct op *outtree;		/* yyparse output */
69 static struct nesting_state nesting;	/* \n changed to ; */
70 
71 static bool reject;			/* token(cf) gets symbol again */
72 static int symbol;			/* yylex value */
73 static int sALIAS = ALIAS;		/* 0 in yyrecursive */
74 
75 #define REJECT		(reject = true)
76 #define ACCEPT		(reject = false)
77 #define token(cf)	((reject) ? (ACCEPT, symbol) : (symbol = yylex(cf)))
78 #define tpeek(cf)	((reject) ? (symbol) : (REJECT, symbol = yylex(cf)))
79 #define musthave(c,cf)	do { if (token(cf) != (c)) syntaxerr(NULL); } while (/* CONSTCOND */ 0)
80 
81 static const char Tcbrace[] = "}";
82 static const char Tesac[] = "esac";
83 
84 static void
yyparse(void)85 yyparse(void)
86 {
87 	int c;
88 
89 	ACCEPT;
90 
91 	outtree = c_list(source->type == SSTRING);
92 	c = tpeek(0);
93 	if (c == 0 && !outtree)
94 		outtree = newtp(TEOF);
95 	else if (c != '\n' && c != 0)
96 		syntaxerr(NULL);
97 }
98 
99 static struct op *
pipeline(int cf)100 pipeline(int cf)
101 {
102 	struct op *t, *p, *tl = NULL;
103 
104 	t = get_command(cf);
105 	if (t != NULL) {
106 		while (token(0) == '|') {
107 			if ((p = get_command(CONTIN)) == NULL)
108 				syntaxerr(NULL);
109 			if (tl == NULL)
110 				t = tl = block(TPIPE, t, p);
111 			else
112 				tl = tl->right = block(TPIPE, tl->right, p);
113 		}
114 		REJECT;
115 	}
116 	return (t);
117 }
118 
119 static struct op *
andor(void)120 andor(void)
121 {
122 	struct op *t, *p;
123 	int c;
124 
125 	t = pipeline(0);
126 	if (t != NULL) {
127 		while ((c = token(0)) == LOGAND || c == LOGOR) {
128 			if ((p = pipeline(CONTIN)) == NULL)
129 				syntaxerr(NULL);
130 			t = block(c == LOGAND? TAND: TOR, t, p);
131 		}
132 		REJECT;
133 	}
134 	return (t);
135 }
136 
137 static struct op *
c_list(bool multi)138 c_list(bool multi)
139 {
140 	struct op *t = NULL, *p, *tl = NULL;
141 	int c;
142 	bool have_sep;
143 
144 	while (/* CONSTCOND */ 1) {
145 		p = andor();
146 		/*
147 		 * Token has always been read/rejected at this point, so
148 		 * we don't worry about what flags to pass token()
149 		 */
150 		c = token(0);
151 		have_sep = true;
152 		if (c == '\n' && (multi || inalias(source))) {
153 			if (!p)
154 				/* ignore blank lines */
155 				continue;
156 		} else if (!p)
157 			break;
158 		else if (c == '&' || c == COPROC)
159 			p = block(c == '&' ? TASYNC : TCOPROC, p, NULL);
160 		else if (c != ';')
161 			have_sep = false;
162 		if (!t)
163 			t = p;
164 		else if (!tl)
165 			t = tl = block(TLIST, t, p);
166 		else
167 			tl = tl->right = block(TLIST, tl->right, p);
168 		if (!have_sep)
169 			break;
170 	}
171 	REJECT;
172 	return (t);
173 }
174 
175 static const char IONDELIM_delim[] = { CHAR, '<', CHAR, '<', EOS };
176 
177 static struct ioword *
synio(int cf)178 synio(int cf)
179 {
180 	struct ioword *iop;
181 	static struct ioword *nextiop;
182 	bool ishere;
183 
184 	if (nextiop != NULL) {
185 		iop = nextiop;
186 		nextiop = NULL;
187 		return (iop);
188 	}
189 
190 	if (tpeek(cf) != REDIR)
191 		return (NULL);
192 	ACCEPT;
193 	iop = yylval.iop;
194 	ishere = (iop->ioflag & IOTYPE) == IOHERE;
195 	if (iop->ioflag & IOHERESTR) {
196 		musthave(LWORD, 0);
197 	} else if (ishere && tpeek(HEREDELIM) == '\n') {
198 		ACCEPT;
199 		yylval.cp = wdcopy(IONDELIM_delim, ATEMP);
200 		iop->ioflag |= IOEVAL | IONDELIM;
201 	} else
202 		musthave(LWORD, ishere ? HEREDELIM : 0);
203 	if (ishere) {
204 		iop->delim = yylval.cp;
205 		if (*ident != 0 && !(iop->ioflag & IOHERESTR)) {
206 			/* unquoted */
207 			iop->ioflag |= IOEVAL;
208 		}
209 		if (herep > &heres[HERES - 1])
210 			yyerror("too many %ss\n", "<<");
211 		*herep++ = iop;
212 	} else
213 		iop->ioname = yylval.cp;
214 
215 	if (iop->ioflag & IOBASH) {
216 		char *cp;
217 
218 		nextiop = alloc(sizeof(*iop), ATEMP);
219 #ifdef MKSH_CONSERVATIVE_FDS
220 		nextiop->ioname = cp = alloc(3, ATEMP);
221 #else
222 		nextiop->ioname = cp = alloc(5, ATEMP);
223 
224 		if (iop->unit > 9) {
225 			*cp++ = CHAR;
226 			*cp++ = digits_lc[iop->unit / 10];
227 		}
228 #endif
229 		*cp++ = CHAR;
230 		*cp++ = digits_lc[iop->unit % 10];
231 		*cp = EOS;
232 
233 		iop->ioflag &= ~IOBASH;
234 		nextiop->unit = 2;
235 		nextiop->ioflag = IODUP;
236 		nextiop->delim = NULL;
237 		nextiop->heredoc = NULL;
238 	}
239 	return (iop);
240 }
241 
242 static struct op *
nested(int type,int smark,int emark)243 nested(int type, int smark, int emark)
244 {
245 	struct op *t;
246 	struct nesting_state old_nesting;
247 
248 	nesting_push(&old_nesting, smark);
249 	t = c_list(true);
250 	musthave(emark, KEYWORD|sALIAS);
251 	nesting_pop(&old_nesting);
252 	return (block(type, t, NULL));
253 }
254 
255 static const char let_cmd[] = {
256 	QCHAR, 'l', CHAR, 'e', CHAR, 't', CHAR, ']', EOS
257 };
258 static const char setA_cmd0[] = {
259 	QCHAR, 's', CHAR, 'e', CHAR, 't', EOS
260 };
261 static const char setA_cmd1[] = {
262 	CHAR, '-', CHAR, 'A', EOS
263 };
264 static const char setA_cmd2[] = {
265 	CHAR, '-', CHAR, '-', EOS
266 };
267 
268 static struct op *
get_command(int cf)269 get_command(int cf)
270 {
271 	struct op *t;
272 	int c, iopn = 0, syniocf, lno;
273 	struct ioword *iop, **iops;
274 	XPtrV args, vars;
275 	struct nesting_state old_nesting;
276 
277 	/* NUFILE is small enough to leave this addition unchecked */
278 	iops = alloc2((NUFILE + 1), sizeof(struct ioword *), ATEMP);
279 	XPinit(args, 16);
280 	XPinit(vars, 16);
281 
282 	syniocf = KEYWORD|sALIAS;
283 	switch (c = token(cf|KEYWORD|sALIAS|CMDASN)) {
284 	default:
285 		REJECT;
286 		afree(iops, ATEMP);
287 		XPfree(args);
288 		XPfree(vars);
289 		/* empty line */
290 		return (NULL);
291 
292 	case LWORD:
293 	case REDIR:
294 		REJECT;
295 		syniocf &= ~(KEYWORD|sALIAS);
296 		t = newtp(TCOM);
297 		t->lineno = source->line;
298 		goto get_command_begin;
299 		while (/* CONSTCOND */ 1) {
300 			bool check_assign_cmd;
301 
302 			if (XPsize(args) == 0) {
303  get_command_begin:
304 				check_assign_cmd = true;
305 				cf = sALIAS | CMDASN;
306 			} else if (t->u.evalflags)
307 				cf = CMDWORD | CMDASN;
308 			else
309 				cf = CMDWORD;
310 			switch (tpeek(cf)) {
311 			case REDIR:
312 				while ((iop = synio(cf)) != NULL) {
313 					if (iopn >= NUFILE)
314 						yyerror("too many %ss\n",
315 						    "redirection");
316 					iops[iopn++] = iop;
317 				}
318 				break;
319 
320 			case LWORD:
321 				ACCEPT;
322 				/*
323 				 * the iopn == 0 and XPsize(vars) == 0 are
324 				 * dubious but AT&T ksh acts this way
325 				 */
326 				if (iopn == 0 && XPsize(vars) == 0 &&
327 				    check_assign_cmd) {
328 					if (assign_command(ident, false))
329 						t->u.evalflags = DOVACHECK;
330 					else if (strcmp(ident, Tcommand) != 0)
331 						check_assign_cmd = false;
332 				}
333 				if ((XPsize(args) == 0 || Flag(FKEYWORD)) &&
334 				    is_wdvarassign(yylval.cp))
335 					XPput(vars, yylval.cp);
336 				else
337 					XPput(args, yylval.cp);
338 				break;
339 
340 			case '(' /*)*/:
341 				if (XPsize(args) == 0 && XPsize(vars) == 1 &&
342 				    is_wdvarassign(yylval.cp)) {
343 					char *tcp;
344 
345 					/* wdarrassign: foo=(bar) */
346 					ACCEPT;
347 
348 					/* manipulate the vars string */
349 					tcp = XPptrv(vars)[(vars.len = 0)];
350 					/* 'varname=' -> 'varname' */
351 					tcp[wdscan(tcp, EOS) - tcp - 3] = EOS;
352 
353 					/* construct new args strings */
354 					XPput(args, wdcopy(setA_cmd0, ATEMP));
355 					XPput(args, wdcopy(setA_cmd1, ATEMP));
356 					XPput(args, tcp);
357 					XPput(args, wdcopy(setA_cmd2, ATEMP));
358 
359 					/* slurp in words till closing paren */
360 					while (token(CONTIN) == LWORD)
361 						XPput(args, yylval.cp);
362 					if (symbol != /*(*/ ')')
363 						syntaxerr(NULL);
364 				} else {
365 					/*
366 					 * Check for "> foo (echo hi)"
367 					 * which AT&T ksh allows (not
368 					 * POSIX, but not disallowed)
369 					 */
370 					afree(t, ATEMP);
371 					if (XPsize(args) == 0 &&
372 					    XPsize(vars) == 0) {
373 						ACCEPT;
374 						goto Subshell;
375 					}
376 
377 					/* must be a function */
378 					if (iopn != 0 || XPsize(args) != 1 ||
379 					    XPsize(vars) != 0)
380 						syntaxerr(NULL);
381 					ACCEPT;
382 					musthave(/*(*/')', 0);
383 					t = function_body(XPptrv(args)[0], false);
384 				}
385 				goto Leave;
386 
387 			default:
388 				goto Leave;
389 			}
390 		}
391  Leave:
392 		break;
393 
394 	case '(': /*)*/ {
395 		int subshell_nesting_type_saved;
396  Subshell:
397 		subshell_nesting_type_saved = subshell_nesting_type;
398 		subshell_nesting_type = ')';
399 		t = nested(TPAREN, '(', ')');
400 		subshell_nesting_type = subshell_nesting_type_saved;
401 		break;
402 	    }
403 
404 	case '{': /*}*/
405 		t = nested(TBRACE, '{', '}');
406 		break;
407 
408 	case MDPAREN:
409 		/* leave KEYWORD in syniocf (allow if (( 1 )) then ...) */
410 		lno = source->line;
411 		ACCEPT;
412 		switch (token(LETEXPR)) {
413 		case LWORD:
414 			break;
415 		case '(': /*)*/
416 			c = '(';
417 			goto Subshell;
418 		default:
419 			syntaxerr(NULL);
420 		}
421 		t = newtp(TCOM);
422 		t->lineno = lno;
423 		XPput(args, wdcopy(let_cmd, ATEMP));
424 		XPput(args, yylval.cp);
425 		break;
426 
427 	case DBRACKET: /* [[ .. ]] */
428 		/* leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */
429 		t = newtp(TDBRACKET);
430 		ACCEPT;
431 		{
432 			Test_env te;
433 
434 			te.flags = TEF_DBRACKET;
435 			te.pos.av = &args;
436 			te.isa = dbtestp_isa;
437 			te.getopnd = dbtestp_getopnd;
438 			te.eval = dbtestp_eval;
439 			te.error = dbtestp_error;
440 
441 			test_parse(&te);
442 		}
443 		break;
444 
445 	case FOR:
446 	case SELECT:
447 		t = newtp((c == FOR) ? TFOR : TSELECT);
448 		musthave(LWORD, CMDASN);
449 		if (!is_wdvarname(yylval.cp, true))
450 			yyerror("%s: %s\n", c == FOR ? "for" : Tselect,
451 			    "bad identifier");
452 		strdupx(t->str, ident, ATEMP);
453 		nesting_push(&old_nesting, c);
454 		t->vars = wordlist();
455 		t->left = dogroup();
456 		nesting_pop(&old_nesting);
457 		break;
458 
459 	case WHILE:
460 	case UNTIL:
461 		nesting_push(&old_nesting, c);
462 		t = newtp((c == WHILE) ? TWHILE : TUNTIL);
463 		t->left = c_list(true);
464 		t->right = dogroup();
465 		nesting_pop(&old_nesting);
466 		break;
467 
468 	case CASE:
469 		t = newtp(TCASE);
470 		musthave(LWORD, 0);
471 		t->str = yylval.cp;
472 		nesting_push(&old_nesting, c);
473 		t->left = caselist();
474 		nesting_pop(&old_nesting);
475 		break;
476 
477 	case IF:
478 		nesting_push(&old_nesting, c);
479 		t = newtp(TIF);
480 		t->left = c_list(true);
481 		t->right = thenpart();
482 		musthave(FI, KEYWORD|sALIAS);
483 		nesting_pop(&old_nesting);
484 		break;
485 
486 	case BANG:
487 		syniocf &= ~(KEYWORD|sALIAS);
488 		t = pipeline(0);
489 		if (t == NULL)
490 			syntaxerr(NULL);
491 		t = block(TBANG, NULL, t);
492 		break;
493 
494 	case TIME:
495 		syniocf &= ~(KEYWORD|sALIAS);
496 		t = pipeline(0);
497 		if (t && t->type == TCOM) {
498 			t->str = alloc(2, ATEMP);
499 			/* TF_* flags */
500 			t->str[0] = '\0';
501 			t->str[1] = '\0';
502 		}
503 		t = block(TTIME, t, NULL);
504 		break;
505 
506 	case FUNCTION:
507 		musthave(LWORD, 0);
508 		t = function_body(yylval.cp, true);
509 		break;
510 	}
511 
512 	while ((iop = synio(syniocf)) != NULL) {
513 		if (iopn >= NUFILE)
514 			yyerror("too many %ss\n", "redirection");
515 		iops[iopn++] = iop;
516 	}
517 
518 	if (iopn == 0) {
519 		afree(iops, ATEMP);
520 		t->ioact = NULL;
521 	} else {
522 		iops[iopn++] = NULL;
523 		iops = aresize2(iops, iopn, sizeof(struct ioword *), ATEMP);
524 		t->ioact = iops;
525 	}
526 
527 	if (t->type == TCOM || t->type == TDBRACKET) {
528 		XPput(args, NULL);
529 		t->args = (const char **)XPclose(args);
530 		XPput(vars, NULL);
531 		t->vars = (char **)XPclose(vars);
532 	} else {
533 		XPfree(args);
534 		XPfree(vars);
535 	}
536 
537 	if (c == MDPAREN) {
538 		t = block(TBRACE, t, NULL);
539 		t->ioact = t->left->ioact;
540 		t->left->ioact = NULL;
541 	}
542 
543 	return (t);
544 }
545 
546 static struct op *
dogroup(void)547 dogroup(void)
548 {
549 	int c;
550 	struct op *list;
551 
552 	c = token(CONTIN|KEYWORD|sALIAS);
553 	/*
554 	 * A {...} can be used instead of do...done for for/select loops
555 	 * but not for while/until loops - we don't need to check if it
556 	 * is a while loop because it would have been parsed as part of
557 	 * the conditional command list...
558 	 */
559 	if (c == DO)
560 		c = DONE;
561 	else if (c == '{')
562 		c = '}';
563 	else
564 		syntaxerr(NULL);
565 	list = c_list(true);
566 	musthave(c, KEYWORD|sALIAS);
567 	return (list);
568 }
569 
570 static struct op *
thenpart(void)571 thenpart(void)
572 {
573 	struct op *t;
574 
575 	musthave(THEN, KEYWORD|sALIAS);
576 	t = newtp(0);
577 	t->left = c_list(true);
578 	if (t->left == NULL)
579 		syntaxerr(NULL);
580 	t->right = elsepart();
581 	return (t);
582 }
583 
584 static struct op *
elsepart(void)585 elsepart(void)
586 {
587 	struct op *t;
588 
589 	switch (token(KEYWORD|sALIAS|CMDASN)) {
590 	case ELSE:
591 		if ((t = c_list(true)) == NULL)
592 			syntaxerr(NULL);
593 		return (t);
594 
595 	case ELIF:
596 		t = newtp(TELIF);
597 		t->left = c_list(true);
598 		t->right = thenpart();
599 		return (t);
600 
601 	default:
602 		REJECT;
603 	}
604 	return (NULL);
605 }
606 
607 static struct op *
caselist(void)608 caselist(void)
609 {
610 	struct op *t, *tl;
611 	int c;
612 
613 	c = token(CONTIN|KEYWORD|sALIAS);
614 	/* A {...} can be used instead of in...esac for case statements */
615 	if (c == IN)
616 		c = ESAC;
617 	else if (c == '{')
618 		c = '}';
619 	else
620 		syntaxerr(NULL);
621 	t = tl = NULL;
622 	/* no ALIAS here */
623 	while ((tpeek(CONTIN|KEYWORD|ESACONLY)) != c) {
624 		struct op *tc = casepart(c);
625 		if (tl == NULL)
626 			t = tl = tc, tl->right = NULL;
627 		else
628 			tl->right = tc, tl = tc;
629 	}
630 	musthave(c, KEYWORD|sALIAS);
631 	return (t);
632 }
633 
634 static struct op *
casepart(int endtok)635 casepart(int endtok)
636 {
637 	struct op *t;
638 	XPtrV ptns;
639 
640 	XPinit(ptns, 16);
641 	t = newtp(TPAT);
642 	/* no ALIAS here */
643 	if (token(CONTIN | KEYWORD) != '(')
644 		REJECT;
645 	do {
646 		switch (token(0)) {
647 		case LWORD:
648 			break;
649 		case '}':
650 		case ESAC:
651 			if (symbol != endtok) {
652 				strdupx(yylval.cp,
653 				    symbol == '}' ? Tcbrace : Tesac, ATEMP);
654 				break;
655 			}
656 			/* FALLTHROUGH */
657 		default:
658 			syntaxerr(NULL);
659 		}
660 		XPput(ptns, yylval.cp);
661 	} while (token(0) == '|');
662 	REJECT;
663 	XPput(ptns, NULL);
664 	t->vars = (char **)XPclose(ptns);
665 	musthave(')', 0);
666 
667 	t->left = c_list(true);
668 
669 	/* initialise to default for ;; or omitted */
670 	t->u.charflag = ';';
671 	/* SUSv4 requires the ;; except in the last casepart */
672 	if ((tpeek(CONTIN|KEYWORD|sALIAS)) != endtok)
673 		switch (symbol) {
674 		default:
675 			syntaxerr(NULL);
676 		case BRKEV:
677 			t->u.charflag = '|';
678 			if (0)
679 				/* FALLTHROUGH */
680 		case BRKFT:
681 			t->u.charflag = '&';
682 			/* FALLTHROUGH */
683 		case BREAK:
684 			/* initialised above, but we need to eat the token */
685 			ACCEPT;
686 		}
687 	return (t);
688 }
689 
690 static struct op *
function_body(char * name,bool ksh_func)691 function_body(char *name,
692     /* function foo { ... } vs foo() { .. } */
693     bool ksh_func)
694 {
695 	char *sname, *p;
696 	struct op *t;
697 
698 	sname = wdstrip(name, 0);
699 	/*-
700 	 * Check for valid characters in name. POSIX and AT&T ksh93 say
701 	 * only allow [a-zA-Z_0-9] but this allows more as old pdkshs
702 	 * have allowed more; the following were never allowed:
703 	 *	NUL TAB NL SP " $ & ' ( ) ; < = > \ ` |
704 	 * C_QUOTE covers all but adds # * ? [ ]
705 	 */
706 	for (p = sname; *p; p++)
707 		if (ctype(*p, C_QUOTE))
708 			yyerror("%s: %s\n", sname, "invalid function name");
709 
710 	/*
711 	 * Note that POSIX allows only compound statements after foo(),
712 	 * sh and AT&T ksh allow any command, go with the later since it
713 	 * shouldn't break anything. However, for function foo, AT&T ksh
714 	 * only accepts an open-brace.
715 	 */
716 	if (ksh_func) {
717 		if (tpeek(CONTIN|KEYWORD|sALIAS) == '(' /*)*/) {
718 			/* function foo () { //}*/
719 			ACCEPT;
720 			musthave(')', 0);
721 			/* degrade to POSIX function */
722 			ksh_func = false;
723 		}
724 		musthave('{' /*}*/, CONTIN|KEYWORD|sALIAS);
725 		REJECT;
726 	}
727 
728 	t = newtp(TFUNCT);
729 	t->str = sname;
730 	t->u.ksh_func = tobool(ksh_func);
731 	t->lineno = source->line;
732 
733 	if ((t->left = get_command(CONTIN)) == NULL) {
734 		char *tv;
735 		/*
736 		 * Probably something like foo() followed by EOF or ';'.
737 		 * This is accepted by sh and ksh88.
738 		 * To make "typeset -f foo" work reliably (so its output can
739 		 * be used as input), we pretend there is a colon here.
740 		 */
741 		t->left = newtp(TCOM);
742 		/* (2 * sizeof(char *)) is small enough */
743 		t->left->args = alloc(2 * sizeof(char *), ATEMP);
744 		t->left->args[0] = tv = alloc(3, ATEMP);
745 		tv[0] = QCHAR;
746 		tv[1] = ':';
747 		tv[2] = EOS;
748 		t->left->args[1] = NULL;
749 		t->left->vars = alloc(sizeof(char *), ATEMP);
750 		t->left->vars[0] = NULL;
751 		t->left->lineno = 1;
752 	}
753 
754 	return (t);
755 }
756 
757 static char **
wordlist(void)758 wordlist(void)
759 {
760 	int c;
761 	XPtrV args;
762 
763 	XPinit(args, 16);
764 	/* POSIX does not do alias expansion here... */
765 	if ((c = token(CONTIN|KEYWORD|sALIAS)) != IN) {
766 		if (c != ';')
767 			/* non-POSIX, but AT&T ksh accepts a ; here */
768 			REJECT;
769 		return (NULL);
770 	}
771 	while ((c = token(0)) == LWORD)
772 		XPput(args, yylval.cp);
773 	if (c != '\n' && c != ';')
774 		syntaxerr(NULL);
775 	XPput(args, NULL);
776 	return ((char **)XPclose(args));
777 }
778 
779 /*
780  * supporting functions
781  */
782 
783 static struct op *
block(int type,struct op * t1,struct op * t2)784 block(int type, struct op *t1, struct op *t2)
785 {
786 	struct op *t;
787 
788 	t = newtp(type);
789 	t->left = t1;
790 	t->right = t2;
791 	return (t);
792 }
793 
794 static const struct tokeninfo {
795 	const char *name;
796 	short val;
797 	short reserved;
798 } tokentab[] = {
799 	/* Reserved words */
800 	{ "if",		IF,	true },
801 	{ "then",	THEN,	true },
802 	{ "else",	ELSE,	true },
803 	{ "elif",	ELIF,	true },
804 	{ "fi",		FI,	true },
805 	{ "case",	CASE,	true },
806 	{ Tesac,	ESAC,	true },
807 	{ "for",	FOR,	true },
808 	{ Tselect,	SELECT,	true },
809 	{ "while",	WHILE,	true },
810 	{ "until",	UNTIL,	true },
811 	{ "do",		DO,	true },
812 	{ "done",	DONE,	true },
813 	{ "in",		IN,	true },
814 	{ Tfunction,	FUNCTION, true },
815 	{ "time",	TIME,	true },
816 	{ "{",		'{',	true },
817 	{ Tcbrace,	'}',	true },
818 	{ "!",		BANG,	true },
819 	{ "[[",		DBRACKET, true },
820 	/* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
821 	{ "&&",		LOGAND,	false },
822 	{ "||",		LOGOR,	false },
823 	{ ";;",		BREAK,	false },
824 	{ ";|",		BRKEV,	false },
825 	{ ";&",		BRKFT,	false },
826 	{ "((",		MDPAREN, false },
827 	{ "|&",		COPROC,	false },
828 	/* and some special cases... */
829 	{ "newline",	'\n',	false },
830 	{ NULL,		0,	false }
831 };
832 
833 void
initkeywords(void)834 initkeywords(void)
835 {
836 	struct tokeninfo const *tt;
837 	struct tbl *p;
838 
839 	ktinit(APERM, &keywords,
840 	    /* currently 28 keywords: 75% of 64 = 2^6 */
841 	    6);
842 	for (tt = tokentab; tt->name; tt++) {
843 		if (tt->reserved) {
844 			p = ktenter(&keywords, tt->name, hash(tt->name));
845 			p->flag |= DEFINED|ISSET;
846 			p->type = CKEYWD;
847 			p->val.i = tt->val;
848 		}
849 	}
850 }
851 
852 static void
syntaxerr(const char * what)853 syntaxerr(const char *what)
854 {
855 	/* 23<<- is the longest redirection, I think */
856 	char redir[8];
857 	const char *s;
858 	struct tokeninfo const *tt;
859 	int c;
860 
861 	if (!what)
862 		what = "unexpected";
863 	REJECT;
864 	c = token(0);
865  Again:
866 	switch (c) {
867 	case 0:
868 		if (nesting.start_token) {
869 			c = nesting.start_token;
870 			source->errline = nesting.start_line;
871 			what = "unmatched";
872 			goto Again;
873 		}
874 		/* don't quote the EOF */
875 		yyerror("%s: %s %s\n", Tsynerr, "unexpected", "EOF");
876 		/* NOTREACHED */
877 
878 	case LWORD:
879 		s = snptreef(NULL, 32, "%S", yylval.cp);
880 		break;
881 
882 	case REDIR:
883 		s = snptreef(redir, sizeof(redir), "%R", yylval.iop);
884 		break;
885 
886 	default:
887 		for (tt = tokentab; tt->name; tt++)
888 			if (tt->val == c)
889 			    break;
890 		if (tt->name)
891 			s = tt->name;
892 		else {
893 			if (c > 0 && c < 256) {
894 				redir[0] = c;
895 				redir[1] = '\0';
896 			} else
897 				shf_snprintf(redir, sizeof(redir),
898 					"?%d", c);
899 			s = redir;
900 		}
901 	}
902 	yyerror("%s: '%s' %s\n", Tsynerr, s, what);
903 }
904 
905 static void
nesting_push(struct nesting_state * save,int tok)906 nesting_push(struct nesting_state *save, int tok)
907 {
908 	*save = nesting;
909 	nesting.start_token = tok;
910 	nesting.start_line = source->line;
911 }
912 
913 static void
nesting_pop(struct nesting_state * saved)914 nesting_pop(struct nesting_state *saved)
915 {
916 	nesting = *saved;
917 }
918 
919 static struct op *
newtp(int type)920 newtp(int type)
921 {
922 	struct op *t;
923 
924 	t = alloc(sizeof(struct op), ATEMP);
925 	t->type = type;
926 	t->u.evalflags = 0;
927 	t->args = NULL;
928 	t->vars = NULL;
929 	t->ioact = NULL;
930 	t->left = t->right = NULL;
931 	t->str = NULL;
932 	return (t);
933 }
934 
935 struct op *
compile(Source * s,bool skiputf8bom)936 compile(Source *s, bool skiputf8bom)
937 {
938 	nesting.start_token = 0;
939 	nesting.start_line = 0;
940 	herep = heres;
941 	source = s;
942 	if (skiputf8bom)
943 		yyskiputf8bom();
944 	yyparse();
945 	return (outtree);
946 }
947 
948 /*-
949  * This kludge exists to take care of sh/AT&T ksh oddity in which
950  * the arguments of alias/export/readonly/typeset have no field
951  * splitting, file globbing, or (normal) tilde expansion done.
952  * AT&T ksh seems to do something similar to this since
953  *	$ touch a=a; typeset a=[ab]; echo "$a"
954  *	a=[ab]
955  *	$ x=typeset; $x a=[ab]; echo "$a"
956  *	a=a
957  *	$
958  */
959 int
assign_command(const char * s,bool docommand)960 assign_command(const char *s, bool docommand)
961 {
962 	if (!*s)
963 		return (0);
964 	return ((strcmp(s, Talias) == 0) ||
965 	    (strcmp(s, Texport) == 0) ||
966 	    (strcmp(s, Treadonly) == 0) ||
967 	    (docommand && (strcmp(s, Tcommand) == 0)) ||
968 	    (strcmp(s, Ttypeset) == 0));
969 }
970 
971 /* Check if we are in the middle of reading an alias */
972 static int
inalias(struct source * s)973 inalias(struct source *s)
974 {
975 	for (; s && s->type == SALIAS; s = s->next)
976 		if (!(s->flags & SF_ALIASEND))
977 			return (1);
978 	return (0);
979 }
980 
981 
982 /*
983  * Order important - indexed by Test_meta values
984  * Note that ||, &&, ( and ) can't appear in as unquoted strings
985  * in normal shell input, so these can be interpreted unambiguously
986  * in the evaluation pass.
987  */
988 static const char dbtest_or[] = { CHAR, '|', CHAR, '|', EOS };
989 static const char dbtest_and[] = { CHAR, '&', CHAR, '&', EOS };
990 static const char dbtest_not[] = { CHAR, '!', EOS };
991 static const char dbtest_oparen[] = { CHAR, '(', EOS };
992 static const char dbtest_cparen[] = { CHAR, ')', EOS };
993 const char * const dbtest_tokens[] = {
994 	dbtest_or, dbtest_and, dbtest_not,
995 	dbtest_oparen, dbtest_cparen
996 };
997 static const char db_close[] = { CHAR, ']', CHAR, ']', EOS };
998 static const char db_lthan[] = { CHAR, '<', EOS };
999 static const char db_gthan[] = { CHAR, '>', EOS };
1000 
1001 /*
1002  * Test if the current token is a whatever. Accepts the current token if
1003  * it is. Returns 0 if it is not, non-zero if it is (in the case of
1004  * TM_UNOP and TM_BINOP, the returned value is a Test_op).
1005  */
1006 static Test_op
dbtestp_isa(Test_env * te,Test_meta meta)1007 dbtestp_isa(Test_env *te, Test_meta meta)
1008 {
1009 	int c = tpeek(CMDASN | (meta == TM_BINOP ? 0 : CONTIN));
1010 	bool uqword;
1011 	char *save = NULL;
1012 	Test_op ret = TO_NONOP;
1013 
1014 	/* unquoted word? */
1015 	uqword = c == LWORD && *ident;
1016 
1017 	if (meta == TM_OR)
1018 		ret = c == LOGOR ? TO_NONNULL : TO_NONOP;
1019 	else if (meta == TM_AND)
1020 		ret = c == LOGAND ? TO_NONNULL : TO_NONOP;
1021 	else if (meta == TM_NOT)
1022 		ret = (uqword && !strcmp(yylval.cp,
1023 		    dbtest_tokens[(int)TM_NOT])) ? TO_NONNULL : TO_NONOP;
1024 	else if (meta == TM_OPAREN)
1025 		ret = c == '(' /*)*/ ? TO_NONNULL : TO_NONOP;
1026 	else if (meta == TM_CPAREN)
1027 		ret = c == /*(*/ ')' ? TO_NONNULL : TO_NONOP;
1028 	else if (meta == TM_UNOP || meta == TM_BINOP) {
1029 		if (meta == TM_BINOP && c == REDIR &&
1030 		    (yylval.iop->ioflag == IOREAD ||
1031 		    yylval.iop->ioflag == IOWRITE)) {
1032 			ret = TO_NONNULL;
1033 			save = wdcopy(yylval.iop->ioflag == IOREAD ?
1034 			    db_lthan : db_gthan, ATEMP);
1035 		} else if (uqword && (ret = test_isop(meta, ident)))
1036 			save = yylval.cp;
1037 	} else
1038 		/* meta == TM_END */
1039 		ret = (uqword && !strcmp(yylval.cp,
1040 		    db_close)) ? TO_NONNULL : TO_NONOP;
1041 	if (ret != TO_NONOP) {
1042 		ACCEPT;
1043 		if ((unsigned int)meta < NELEM(dbtest_tokens))
1044 			save = wdcopy(dbtest_tokens[(int)meta], ATEMP);
1045 		if (save)
1046 			XPput(*te->pos.av, save);
1047 	}
1048 	return (ret);
1049 }
1050 
1051 static const char *
dbtestp_getopnd(Test_env * te,Test_op op MKSH_A_UNUSED,bool do_eval MKSH_A_UNUSED)1052 dbtestp_getopnd(Test_env *te, Test_op op MKSH_A_UNUSED,
1053     bool do_eval MKSH_A_UNUSED)
1054 {
1055 	int c = tpeek(CMDASN);
1056 
1057 	if (c != LWORD)
1058 		return (NULL);
1059 
1060 	ACCEPT;
1061 	XPput(*te->pos.av, yylval.cp);
1062 
1063 	return (null);
1064 }
1065 
1066 static int
dbtestp_eval(Test_env * te MKSH_A_UNUSED,Test_op op MKSH_A_UNUSED,const char * opnd1 MKSH_A_UNUSED,const char * opnd2 MKSH_A_UNUSED,bool do_eval MKSH_A_UNUSED)1067 dbtestp_eval(Test_env *te MKSH_A_UNUSED, Test_op op MKSH_A_UNUSED,
1068     const char *opnd1 MKSH_A_UNUSED, const char *opnd2 MKSH_A_UNUSED,
1069     bool do_eval MKSH_A_UNUSED)
1070 {
1071 	return (1);
1072 }
1073 
1074 static void
dbtestp_error(Test_env * te,int offset,const char * msg)1075 dbtestp_error(Test_env *te, int offset, const char *msg)
1076 {
1077 	te->flags |= TEF_ERROR;
1078 
1079 	if (offset < 0) {
1080 		REJECT;
1081 		/* Kludgy to say the least... */
1082 		symbol = LWORD;
1083 		yylval.cp = *(XPptrv(*te->pos.av) + XPsize(*te->pos.av) +
1084 		    offset);
1085 	}
1086 	syntaxerr(msg);
1087 }
1088 
1089 #if HAVE_SELECT
1090 
1091 #ifndef EOVERFLOW
1092 #ifdef ERANGE
1093 #define EOVERFLOW	ERANGE
1094 #else
1095 #define EOVERFLOW	EINVAL
1096 #endif
1097 #endif
1098 
1099 bool
parse_usec(const char * s,struct timeval * tv)1100 parse_usec(const char *s, struct timeval *tv)
1101 {
1102 	struct timeval tt;
1103 	int i;
1104 
1105 	tv->tv_sec = 0;
1106 	/* parse integral part */
1107 	while (ksh_isdigit(*s)) {
1108 		tt.tv_sec = tv->tv_sec * 10 + ksh_numdig(*s++);
1109 		/*XXX this overflow check maybe UB */
1110 		if (tt.tv_sec / 10 != tv->tv_sec) {
1111 			errno = EOVERFLOW;
1112 			return (true);
1113 		}
1114 		tv->tv_sec = tt.tv_sec;
1115 	}
1116 
1117 	tv->tv_usec = 0;
1118 	if (!*s)
1119 		/* no decimal fraction */
1120 		return (false);
1121 	else if (*s++ != '.') {
1122 		/* junk after integral part */
1123 		errno = EINVAL;
1124 		return (true);
1125 	}
1126 
1127 	/* parse decimal fraction */
1128 	i = 100000;
1129 	while (ksh_isdigit(*s)) {
1130 		tv->tv_usec += i * ksh_numdig(*s++);
1131 		if (i == 1)
1132 			break;
1133 		i /= 10;
1134 	}
1135 	/* check for junk after fractional part */
1136 	while (ksh_isdigit(*s))
1137 		++s;
1138 	if (*s) {
1139 		errno = EINVAL;
1140 		return (true);
1141 	}
1142 
1143 	/* end of input string reached, no errors */
1144 	return (false);
1145 }
1146 #endif
1147 
1148 /*
1149  * Helper function called from within lex.c:yylex() to parse
1150  * a COMSUB recursively using the main shell parser and lexer
1151  */
1152 char *
yyrecursive(int subtype MKSH_A_UNUSED)1153 yyrecursive(int subtype MKSH_A_UNUSED)
1154 {
1155 	struct op *t;
1156 	char *cp;
1157 	struct yyrecursive_state *ys;
1158 	int stok, etok;
1159 
1160 	if (subtype != COMSUB) {
1161 		stok = '{';
1162 		etok = '}';
1163 	} else {
1164 		stok = '(';
1165 		etok = ')';
1166 	}
1167 
1168 	ys = alloc(sizeof(struct yyrecursive_state), ATEMP);
1169 
1170 	/* tell the lexer to accept a closing parenthesis as EOD */
1171 	ys->old_nesting_type = subshell_nesting_type;
1172 	subshell_nesting_type = etok;
1173 
1174 	/* push reject state, parse recursively, pop reject state */
1175 	ys->old_reject = reject;
1176 	ys->old_symbol = symbol;
1177 	ACCEPT;
1178 	ys->old_herep = herep;
1179 	ys->old_salias = sALIAS;
1180 	sALIAS = 0;
1181 	ys->next = e->yyrecursive_statep;
1182 	e->yyrecursive_statep = ys;
1183 	/* we use TPAREN as a helper container here */
1184 	t = nested(TPAREN, stok, etok);
1185 	yyrecursive_pop(false);
1186 
1187 	/* t->left because nested(TPAREN, ...) hides our goodies there */
1188 	cp = snptreef(NULL, 0, "%T", t->left);
1189 	tfree(t, ATEMP);
1190 
1191 	return (cp);
1192 }
1193 
1194 void
yyrecursive_pop(bool popall)1195 yyrecursive_pop(bool popall)
1196 {
1197 	struct yyrecursive_state *ys;
1198 
1199  popnext:
1200 	if (!(ys = e->yyrecursive_statep))
1201 		return;
1202 	e->yyrecursive_statep = ys->next;
1203 
1204 	sALIAS = ys->old_salias;
1205 	herep = ys->old_herep;
1206 	reject = ys->old_reject;
1207 	symbol = ys->old_symbol;
1208 
1209 	subshell_nesting_type = ys->old_nesting_type;
1210 
1211 	afree(ys, ATEMP);
1212 	if (popall)
1213 		goto popnext;
1214 }
1215