1 /*	$OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *		 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
6  *	mirabilos <m@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23 
24 #include "sh.h"
25 
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.250 2018/10/20 18:34:14 tg Exp $");
27 
28 /*
29  * states while lexing word
30  */
31 #define SBASE		0	/* outside any lexical constructs */
32 #define SWORD		1	/* implicit quoting for substitute() */
33 #define SLETPAREN	2	/* inside (( )), implicit quoting */
34 #define SSQUOTE		3	/* inside '' */
35 #define SDQUOTE		4	/* inside "" */
36 #define SEQUOTE		5	/* inside $'' */
37 #define SBRACE		6	/* inside ${} */
38 #define SQBRACE		7	/* inside "${}" */
39 #define SBQUOTE		8	/* inside `` */
40 #define SASPAREN	9	/* inside $(( )) */
41 #define SHEREDELIM	10	/* parsing << or <<- delimiter */
42 #define SHEREDQUOTE	11	/* parsing " in << or <<- delimiter */
43 #define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM		13	/* like SBASE, looking for delimiter */
45 #define STBRACEKORN	14	/* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE	15	/* parsing ${...[#%]...} FSH */
47 #define SINVALID	255	/* invalid state */
48 
49 struct sretrace_info {
50 	struct sretrace_info *next;
51 	XString xs;
52 	char *xp;
53 };
54 
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60 	union {
61 		/* point to the next state block */
62 		struct lex_state *base;
63 		/* marks start of state output in output string */
64 		size_t start;
65 		/* SBQUOTE: true if in double quotes: "`...`" */
66 		/* SEQUOTE: got NUL, ignore rest of string */
67 		bool abool;
68 		/* SADELIM information */
69 		struct {
70 			/* character to search for */
71 			unsigned char delimiter;
72 			/* max. number of delimiters */
73 			unsigned char num;
74 		} adelim;
75 	} u;
76 	/* count open parentheses */
77 	short nparen;
78 	/* type of this state */
79 	uint8_t type;
80 	/* extra flags */
81 	uint8_t ls_flags;
82 } Lex_state;
83 #define ls_base		u.base
84 #define ls_start	u.start
85 #define ls_bool		u.abool
86 #define ls_adelim	u.adelim
87 
88 /* ls_flags */
89 #define LS_HEREDOC	BIT(0)
90 
91 typedef struct {
92 	Lex_state *base;
93 	Lex_state *end;
94 } State_info;
95 
96 static void readhere(struct ioword *);
97 static void ungetsc(int);
98 static void ungetsc_i(int);
99 static int getsc_uu(void);
100 static void getsc_line(Source *);
101 static int getsc_bn(void);
102 static int getsc_i(void);
103 static char *get_brace_var(XString *, char *);
104 static bool arraysub(char **);
105 static void gethere(void);
106 static Lex_state *push_state_i(State_info *, Lex_state *);
107 static Lex_state *pop_state_i(State_info *, Lex_state *);
108 
109 static int backslash_skip;
110 static int ignore_backslash_newline;
111 
112 /* optimised getsc_bn() */
113 #define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
114 			    !backslash_skip ? *source->str++ : getsc_bn())
115 /* optimised getsc_uu() */
116 #define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
117 
118 /* retrace helper */
119 #define o_getsc_r(carg)					\
120 	int cev = (carg);				\
121 	struct sretrace_info *rp = retrace_info;	\
122 							\
123 	while (rp) {					\
124 		Xcheck(rp->xs, rp->xp);			\
125 		*rp->xp++ = cev;			\
126 		rp = rp->next;				\
127 	}						\
128 							\
129 	return (cev);
130 
131 /* callback */
132 static int
getsc_i(void)133 getsc_i(void)
134 {
135 	o_getsc_r((unsigned int)(unsigned char)o_getsc());
136 }
137 
138 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
139 #define getsc()		getsc_i()
140 #else
141 static int getsc_r(int);
142 
143 static int
getsc_r(int c)144 getsc_r(int c)
145 {
146 	o_getsc_r(c);
147 }
148 
149 #define getsc()		getsc_r((unsigned int)(unsigned char)o_getsc())
150 #endif
151 
152 #define STATE_BSIZE	8
153 
154 #define PUSH_STATE(s)	do {					\
155 	uint8_t state_flags = statep->ls_flags;			\
156 	if (++statep == state_info.end)				\
157 		statep = push_state_i(&state_info, statep);	\
158 	state = statep->type = (s);				\
159 	statep->ls_flags = state_flags;				\
160 } while (/* CONSTCOND */ 0)
161 
162 #define POP_STATE()	do {					\
163 	if (--statep == state_info.base)			\
164 		statep = pop_state_i(&state_info, statep);	\
165 	state = statep->type;					\
166 } while (/* CONSTCOND */ 0)
167 
168 #define PUSH_SRETRACE(s) do {					\
169 	struct sretrace_info *ri;				\
170 								\
171 	PUSH_STATE(s);						\
172 	statep->ls_start = Xsavepos(ws, wp);			\
173 	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
174 	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
175 	ri->next = retrace_info;				\
176 	retrace_info = ri;					\
177 } while (/* CONSTCOND */ 0)
178 
179 #define POP_SRETRACE()	do {					\
180 	wp = Xrestpos(ws, wp, statep->ls_start);		\
181 	*retrace_info->xp = '\0';				\
182 	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
183 	dp = (void *)retrace_info;				\
184 	retrace_info = retrace_info->next;			\
185 	afree(dp, ATEMP);					\
186 	POP_STATE();						\
187 } while (/* CONSTCOND */ 0)
188 
189 /**
190  * Lexical analyser
191  *
192  * tokens are not regular expressions, they are LL(1).
193  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
194  * hence the state stack. Note "$(...)" are now parsed recursively.
195  */
196 
197 int
yylex(int cf)198 yylex(int cf)
199 {
200 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
201 	State_info state_info;
202 	int c, c2, state;
203 	size_t cz;
204 	XString ws;		/* expandable output word */
205 	char *wp;		/* output word pointer */
206 	char *sp, *dp;
207 
208  Again:
209 	states[0].type = SINVALID;
210 	states[0].ls_base = NULL;
211 	statep = &states[1];
212 	state_info.base = states;
213 	state_info.end = &state_info.base[STATE_BSIZE];
214 
215 	Xinit(ws, wp, 64, ATEMP);
216 
217 	backslash_skip = 0;
218 	ignore_backslash_newline = 0;
219 
220 	if (cf & ONEWORD)
221 		state = SWORD;
222 	else if (cf & LETEXPR) {
223 		/* enclose arguments in (double) quotes */
224 		*wp++ = OQUOTE;
225 		state = SLETPAREN;
226 		statep->nparen = 0;
227 	} else {
228 		/* normal lexing */
229 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
230 		do {
231 			c = getsc();
232 		} while (ctype(c, C_BLANK));
233 		if (c == '#') {
234 			ignore_backslash_newline++;
235 			do {
236 				c = getsc();
237 			} while (!ctype(c, C_NUL | C_LF));
238 			ignore_backslash_newline--;
239 		}
240 		ungetsc(c);
241 	}
242 	if (source->flags & SF_ALIAS) {
243 		/* trailing ' ' in alias definition */
244 		source->flags &= ~SF_ALIAS;
245 		/* POSIX: trailing space only counts if parsing simple cmd */
246 		if (!Flag(FPOSIX) || (cf & CMDWORD))
247 			cf |= ALIAS;
248 	}
249 
250 	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
251 	statep->type = state;
252 	statep->ls_flags = (cf & HEREDOC) ? LS_HEREDOC : 0;
253 
254 	/* collect non-special or quoted characters to form word */
255 	while (!((c = getsc()) == 0 ||
256 	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
257 		if (state == SBASE &&
258 		    subshell_nesting_type == ORD(/*{*/ '}') &&
259 		    (unsigned int)c == ORD(/*{*/ '}'))
260 			/* possibly end ${ :;} */
261 			break;
262 		Xcheck(ws, wp);
263 		switch (state) {
264 		case SADELIM:
265 			if ((unsigned int)c == ORD('('))
266 				statep->nparen++;
267 			else if ((unsigned int)c == ORD(')'))
268 				statep->nparen--;
269 			else if (statep->nparen == 0 &&
270 			    ((unsigned int)c == ORD(/*{*/ '}') ||
271 			    c == (int)statep->ls_adelim.delimiter)) {
272 				*wp++ = ADELIM;
273 				*wp++ = c;
274 				if ((unsigned int)c == ORD(/*{*/ '}') ||
275 				    --statep->ls_adelim.num == 0)
276 					POP_STATE();
277 				if ((unsigned int)c == ORD(/*{*/ '}'))
278 					POP_STATE();
279 				break;
280 			}
281 			/* FALLTHROUGH */
282 		case SBASE:
283 			if ((unsigned int)c == ORD('[') && (cf & CMDASN)) {
284 				/* temporary */
285 				*wp = EOS;
286 				if (is_wdvarname(Xstring(ws, wp), false)) {
287 					char *p, *tmp;
288 
289 					if (arraysub(&tmp)) {
290 						*wp++ = CHAR;
291 						*wp++ = c;
292 						for (p = tmp; *p; ) {
293 							Xcheck(ws, wp);
294 							*wp++ = CHAR;
295 							*wp++ = *p++;
296 						}
297 						afree(tmp, ATEMP);
298 						break;
299 					}
300 				}
301 				*wp++ = CHAR;
302 				*wp++ = c;
303 				break;
304 			}
305 			/* FALLTHROUGH */
306  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
307 			if (ctype(c, C_PATMO)) {
308 				c2 = getsc();
309 				if ((unsigned int)c2 == ORD('(' /*)*/)) {
310 					*wp++ = OPAT;
311 					*wp++ = c;
312 					PUSH_STATE(SPATTERN);
313 					break;
314 				}
315 				ungetsc(c2);
316 			}
317 			/* FALLTHROUGH */
318  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
319 			switch (c) {
320 			case ORD('\\'):
321  getsc_qchar:
322 				if ((c = getsc())) {
323 					/* trailing \ is lost */
324 					*wp++ = QCHAR;
325 					*wp++ = c;
326 				}
327 				break;
328 			case ORD('\''):
329  open_ssquote_unless_heredoc:
330 				if ((statep->ls_flags & LS_HEREDOC))
331 					goto store_char;
332 				*wp++ = OQUOTE;
333 				ignore_backslash_newline++;
334 				PUSH_STATE(SSQUOTE);
335 				break;
336 			case ORD('"'):
337  open_sdquote:
338 				*wp++ = OQUOTE;
339 				PUSH_STATE(SDQUOTE);
340 				break;
341 			case ORD('$'):
342 				/*
343 				 * processing of dollar sign belongs into
344 				 * Subst, except for those which can open
345 				 * a string: $'…' and $"…"
346 				 */
347  subst_dollar_ex:
348 				c = getsc();
349 				switch (c) {
350 				case ORD('"'):
351 					goto open_sdquote;
352 				case ORD('\''):
353 					goto open_sequote;
354 				default:
355 					goto SubstS;
356 				}
357 			default:
358 				goto Subst;
359 			}
360 			break;
361 
362  Subst:
363 			switch (c) {
364 			case ORD('\\'):
365 				c = getsc();
366 				switch (c) {
367 				case ORD('"'):
368 					if ((statep->ls_flags & LS_HEREDOC))
369 						goto heredocquote;
370 					/* FALLTHROUGH */
371 				case ORD('\\'):
372 				case ORD('$'):
373 				case ORD('`'):
374  store_qchar:
375 					*wp++ = QCHAR;
376 					*wp++ = c;
377 					break;
378 				default:
379  heredocquote:
380 					Xcheck(ws, wp);
381 					if (c) {
382 						/* trailing \ is lost */
383 						*wp++ = CHAR;
384 						*wp++ = '\\';
385 						*wp++ = CHAR;
386 						*wp++ = c;
387 					}
388 					break;
389 				}
390 				break;
391 			case ORD('$'):
392 				c = getsc();
393  SubstS:
394 				if ((unsigned int)c == ORD('(' /*)*/)) {
395 					c = getsc();
396 					if ((unsigned int)c == ORD('(' /*)*/)) {
397 						*wp++ = EXPRSUB;
398 						PUSH_SRETRACE(SASPAREN);
399 						/* unneeded? */
400 						/*statep->ls_flags &= ~LS_HEREDOC;*/
401 						statep->nparen = 2;
402 						*retrace_info->xp++ = '(';
403 					} else {
404 						ungetsc(c);
405  subst_command:
406 						c = COMSUB;
407  subst_command2:
408 						sp = yyrecursive(c);
409 						cz = strlen(sp) + 1;
410 						XcheckN(ws, wp, cz);
411 						*wp++ = c;
412 						memcpy(wp, sp, cz);
413 						wp += cz;
414 					}
415 				} else if ((unsigned int)c == ORD('{' /*}*/)) {
416 					if ((unsigned int)(c = getsc()) == ORD('|')) {
417 						/*
418 						 * non-subenvironment
419 						 * value substitution
420 						 */
421 						c = VALSUB;
422 						goto subst_command2;
423 					} else if (ctype(c, C_IFSWS)) {
424 						/*
425 						 * non-subenvironment
426 						 * "command" substitution
427 						 */
428 						c = FUNSUB;
429 						goto subst_command2;
430 					}
431 					ungetsc(c);
432 					*wp++ = OSUBST;
433 					*wp++ = '{' /*}*/;
434 					wp = get_brace_var(&ws, wp);
435 					c = getsc();
436 					/* allow :# and :% (ksh88 compat) */
437 					if ((unsigned int)c == ORD(':')) {
438 						*wp++ = CHAR;
439 						*wp++ = c;
440 						c = getsc();
441 						if ((unsigned int)c == ORD(':')) {
442 							*wp++ = CHAR;
443 							*wp++ = '0';
444 							*wp++ = ADELIM;
445 							*wp++ = ':';
446 							PUSH_STATE(SBRACE);
447 							/* perhaps unneeded? */
448 							statep->ls_flags &= ~LS_HEREDOC;
449 							PUSH_STATE(SADELIM);
450 							statep->ls_adelim.delimiter = ':';
451 							statep->ls_adelim.num = 1;
452 							statep->nparen = 0;
453 							break;
454 						} else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
455 						    /*XXX what else? */
456 						    c == '(' /*)*/) {
457 							/* substring subst. */
458 							if (c != ' ') {
459 								*wp++ = CHAR;
460 								*wp++ = ' ';
461 							}
462 							ungetsc(c);
463 							PUSH_STATE(SBRACE);
464 							/* perhaps unneeded? */
465 							statep->ls_flags &= ~LS_HEREDOC;
466 							PUSH_STATE(SADELIM);
467 							statep->ls_adelim.delimiter = ':';
468 							statep->ls_adelim.num = 2;
469 							statep->nparen = 0;
470 							break;
471 						}
472 					} else if (c == '/') {
473 						c2 = ADELIM;
474  parse_adelim_slash:
475 						*wp++ = CHAR;
476 						*wp++ = c;
477 						if ((unsigned int)(c = getsc()) == ORD('/')) {
478 							*wp++ = c2;
479 							*wp++ = c;
480 						} else
481 							ungetsc(c);
482 						PUSH_STATE(SBRACE);
483 						/* perhaps unneeded? */
484 						statep->ls_flags &= ~LS_HEREDOC;
485 						PUSH_STATE(SADELIM);
486 						statep->ls_adelim.delimiter = '/';
487 						statep->ls_adelim.num = 1;
488 						statep->nparen = 0;
489 						break;
490 					} else if (c == '@') {
491 						c2 = getsc();
492 						ungetsc(c2);
493 						if ((unsigned int)c2 == ORD('/')) {
494 							c2 = CHAR;
495 							goto parse_adelim_slash;
496 						}
497 					}
498 					/*
499 					 * If this is a trim operation,
500 					 * treat (,|,) specially in STBRACE.
501 					 */
502 					if (ctype(c, C_SUB2)) {
503 						ungetsc(c);
504 						if (Flag(FSH))
505 							PUSH_STATE(STBRACEBOURNE);
506 						else
507 							PUSH_STATE(STBRACEKORN);
508 						/* single-quotes-in-heredoc-trim */
509 						statep->ls_flags &= ~LS_HEREDOC;
510 					} else {
511 						ungetsc(c);
512 						if (state == SDQUOTE ||
513 						    state == SQBRACE)
514 							PUSH_STATE(SQBRACE);
515 						else
516 							PUSH_STATE(SBRACE);
517 						/* here no LS_HEREDOC removal */
518 						/* single-quotes-in-heredoc-braces */
519 					}
520 				} else if (ctype(c, C_ALPHX)) {
521 					*wp++ = OSUBST;
522 					*wp++ = 'X';
523 					do {
524 						Xcheck(ws, wp);
525 						*wp++ = c;
526 						c = getsc();
527 					} while (ctype(c, C_ALNUX));
528 					*wp++ = '\0';
529 					*wp++ = CSUBST;
530 					*wp++ = 'X';
531 					ungetsc(c);
532 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
533 					Xcheck(ws, wp);
534 					*wp++ = OSUBST;
535 					*wp++ = 'X';
536 					*wp++ = c;
537 					*wp++ = '\0';
538 					*wp++ = CSUBST;
539 					*wp++ = 'X';
540 				} else {
541 					*wp++ = CHAR;
542 					*wp++ = '$';
543 					ungetsc(c);
544 				}
545 				break;
546 			case ORD('`'):
547  subst_gravis:
548 				PUSH_STATE(SBQUOTE);
549 				*wp++ = COMASUB;
550 				/*
551 				 * We need to know whether we are within double
552 				 * quotes in order to translate \" to " within
553 				 * "…`…\"…`…" because, unlike for COMSUBs, the
554 				 * outer double quoteing changes the backslash
555 				 * meaning for the inside. For more details:
556 				 * http://austingroupbugs.net/view.php?id=1015
557 				 */
558 				statep->ls_bool = false;
559 				s2 = statep;
560 				base = state_info.base;
561 				while (/* CONSTCOND */ 1) {
562 					for (; s2 != base; s2--) {
563 						if (s2->type == SDQUOTE) {
564 							statep->ls_bool = true;
565 							break;
566 						}
567 					}
568 					if (s2 != base)
569 						break;
570 					if (!(s2 = s2->ls_base))
571 						break;
572 					base = s2-- - STATE_BSIZE;
573 				}
574 				break;
575 			case QCHAR:
576 				if (cf & LQCHAR) {
577 					*wp++ = QCHAR;
578 					*wp++ = getsc();
579 					break;
580 				}
581 				/* FALLTHROUGH */
582 			default:
583  store_char:
584 				*wp++ = CHAR;
585 				*wp++ = c;
586 			}
587 			break;
588 
589 		case SEQUOTE:
590 			if ((unsigned int)c == ORD('\'')) {
591 				POP_STATE();
592 				*wp++ = CQUOTE;
593 				ignore_backslash_newline--;
594 			} else if ((unsigned int)c == ORD('\\')) {
595 				if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
596 					c2 = getsc();
597 				if (c2 == 0)
598 					statep->ls_bool = true;
599 				if (!statep->ls_bool) {
600 					char ts[4];
601 
602 					if ((unsigned int)c2 < 0x100) {
603 						*wp++ = QCHAR;
604 						*wp++ = c2;
605 					} else {
606 						cz = utf_wctomb(ts, c2 - 0x100);
607 						ts[cz] = 0;
608 						cz = 0;
609 						do {
610 							*wp++ = QCHAR;
611 							*wp++ = ts[cz];
612 						} while (ts[++cz]);
613 					}
614 				}
615 			} else if (!statep->ls_bool) {
616 				*wp++ = QCHAR;
617 				*wp++ = c;
618 			}
619 			break;
620 
621 		case SSQUOTE:
622 			if ((unsigned int)c == ORD('\'')) {
623 				POP_STATE();
624 				if ((statep->ls_flags & LS_HEREDOC) ||
625 				    state == SQBRACE)
626 					goto store_char;
627 				*wp++ = CQUOTE;
628 				ignore_backslash_newline--;
629 			} else {
630 				*wp++ = QCHAR;
631 				*wp++ = c;
632 			}
633 			break;
634 
635 		case SDQUOTE:
636 			if ((unsigned int)c == ORD('"')) {
637 				POP_STATE();
638 				*wp++ = CQUOTE;
639 			} else
640 				goto Subst;
641 			break;
642 
643 		/* $(( ... )) */
644 		case SASPAREN:
645 			if ((unsigned int)c == ORD('('))
646 				statep->nparen++;
647 			else if ((unsigned int)c == ORD(')')) {
648 				statep->nparen--;
649 				if (statep->nparen == 1) {
650 					/* end of EXPRSUB */
651 					POP_SRETRACE();
652 
653 					if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
654 						cz = strlen(sp) - 2;
655 						XcheckN(ws, wp, cz);
656 						memcpy(wp, sp + 1, cz);
657 						wp += cz;
658 						afree(sp, ATEMP);
659 						*wp++ = '\0';
660 						break;
661 					} else {
662 						Source *s;
663 
664 						ungetsc(c2);
665 						/*
666 						 * mismatched parenthesis -
667 						 * assume we were really
668 						 * parsing a $(...) expression
669 						 */
670 						--wp;
671 						s = pushs(SREREAD,
672 						    source->areap);
673 						s->start = s->str =
674 						    s->u.freeme = sp;
675 						s->next = source;
676 						source = s;
677 						goto subst_command;
678 					}
679 				}
680 			}
681 			/* reuse existing state machine */
682 			goto Sbase2;
683 
684 		case SQBRACE:
685 			if ((unsigned int)c == ORD('\\')) {
686 				/*
687 				 * perform POSIX "quote removal" if the back-
688 				 * slash is "special", i.e. same cases as the
689 				 * {case '\\':} in Subst: plus closing brace;
690 				 * in mksh code "quote removal" on '\c' means
691 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
692 				 * emitted (in heredocquote:)
693 				 */
694 				if ((unsigned int)(c = getsc()) == ORD('"') ||
695 				    (unsigned int)c == ORD('\\') ||
696 				    ctype(c, C_DOLAR | C_GRAVE) ||
697 				    (unsigned int)c == ORD(/*{*/ '}'))
698 					goto store_qchar;
699 				goto heredocquote;
700 			}
701 			goto common_SQBRACE;
702 
703 		case SBRACE:
704 			if ((unsigned int)c == ORD('\''))
705 				goto open_ssquote_unless_heredoc;
706 			else if ((unsigned int)c == ORD('\\'))
707 				goto getsc_qchar;
708  common_SQBRACE:
709 			if ((unsigned int)c == ORD('"'))
710 				goto open_sdquote;
711 			else if ((unsigned int)c == ORD('$'))
712 				goto subst_dollar_ex;
713 			else if ((unsigned int)c == ORD('`'))
714 				goto subst_gravis;
715 			else if ((unsigned int)c != ORD(/*{*/ '}'))
716 				goto store_char;
717 			POP_STATE();
718 			*wp++ = CSUBST;
719 			*wp++ = /*{*/ '}';
720 			break;
721 
722 		/* Same as SBASE, except (,|,) treated specially */
723 		case STBRACEKORN:
724 			if ((unsigned int)c == ORD('|'))
725 				*wp++ = SPAT;
726 			else if ((unsigned int)c == ORD('(')) {
727 				*wp++ = OPAT;
728 				/* simile for @ */
729 				*wp++ = ' ';
730 				PUSH_STATE(SPATTERN);
731 			} else /* FALLTHROUGH */
732 		case STBRACEBOURNE:
733 			  if ((unsigned int)c == ORD(/*{*/ '}')) {
734 				POP_STATE();
735 				*wp++ = CSUBST;
736 				*wp++ = /*{*/ '}';
737 			} else
738 				goto Sbase1;
739 			break;
740 
741 		case SBQUOTE:
742 			if ((unsigned int)c == ORD('`')) {
743 				*wp++ = 0;
744 				POP_STATE();
745 			} else if ((unsigned int)c == ORD('\\')) {
746 				switch (c = getsc()) {
747 				case 0:
748 					/* trailing \ is lost */
749 					break;
750 				case ORD('$'):
751 				case ORD('`'):
752 				case ORD('\\'):
753 					*wp++ = c;
754 					break;
755 				case ORD('"'):
756 					if (statep->ls_bool) {
757 						*wp++ = c;
758 						break;
759 					}
760 					/* FALLTHROUGH */
761 				default:
762 					*wp++ = '\\';
763 					*wp++ = c;
764 					break;
765 				}
766 			} else
767 				*wp++ = c;
768 			break;
769 
770 		/* ONEWORD */
771 		case SWORD:
772 			goto Subst;
773 
774 		/* LETEXPR: (( ... )) */
775 		case SLETPAREN:
776 			if ((unsigned int)c == ORD(/*(*/ ')')) {
777 				if (statep->nparen > 0)
778 					--statep->nparen;
779 				else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
780 					c = 0;
781 					*wp++ = CQUOTE;
782 					goto Done;
783 				} else {
784 					Source *s;
785 
786 					ungetsc(c2);
787 					ungetsc(c);
788 					/*
789 					 * mismatched parenthesis -
790 					 * assume we were really
791 					 * parsing a (...) expression
792 					 */
793 					*wp = EOS;
794 					sp = Xstring(ws, wp);
795 					dp = wdstrip(sp + 1, WDS_TPUTS);
796 					s = pushs(SREREAD, source->areap);
797 					s->start = s->str = s->u.freeme = dp;
798 					s->next = source;
799 					source = s;
800 					ungetsc('(' /*)*/);
801 					return (ORD('(' /*)*/));
802 				}
803 			} else if ((unsigned int)c == ORD('('))
804 				/*
805 				 * parentheses inside quotes and
806 				 * backslashes are lost, but AT&T ksh
807 				 * doesn't count them either
808 				 */
809 				++statep->nparen;
810 			goto Sbase2;
811 
812 		/* << or <<- delimiter */
813 		case SHEREDELIM:
814 			/*
815 			 * here delimiters need a special case since
816 			 * $ and `...` are not to be treated specially
817 			 */
818 			switch (c) {
819 			case ORD('\\'):
820 				if ((c = getsc())) {
821 					/* trailing \ is lost */
822 					*wp++ = QCHAR;
823 					*wp++ = c;
824 				}
825 				break;
826 			case ORD('\''):
827 				goto open_ssquote_unless_heredoc;
828 			case ORD('$'):
829 				if ((unsigned int)(c2 = getsc()) == ORD('\'')) {
830  open_sequote:
831 					*wp++ = OQUOTE;
832 					ignore_backslash_newline++;
833 					PUSH_STATE(SEQUOTE);
834 					statep->ls_bool = false;
835 					break;
836 				} else if ((unsigned int)c2 == ORD('"')) {
837 					/* FALLTHROUGH */
838 			case ORD('"'):
839 					PUSH_SRETRACE(SHEREDQUOTE);
840 					break;
841 				}
842 				ungetsc(c2);
843 				/* FALLTHROUGH */
844 			default:
845 				*wp++ = CHAR;
846 				*wp++ = c;
847 			}
848 			break;
849 
850 		/* " in << or <<- delimiter */
851 		case SHEREDQUOTE:
852 			if ((unsigned int)c != ORD('"'))
853 				goto Subst;
854 			POP_SRETRACE();
855 			dp = strnul(sp) - 1;
856 			/* remove the trailing double quote */
857 			*dp = '\0';
858 			/* store the quoted string */
859 			*wp++ = OQUOTE;
860 			XcheckN(ws, wp, (dp - sp) * 2);
861 			dp = sp;
862 			while ((c = *dp++)) {
863 				if (c == '\\') {
864 					switch ((c = *dp++)) {
865 					case ORD('\\'):
866 					case ORD('"'):
867 					case ORD('$'):
868 					case ORD('`'):
869 						break;
870 					default:
871 						*wp++ = CHAR;
872 						*wp++ = '\\';
873 						break;
874 					}
875 				}
876 				*wp++ = CHAR;
877 				*wp++ = c;
878 			}
879 			afree(sp, ATEMP);
880 			*wp++ = CQUOTE;
881 			state = statep->type = SHEREDELIM;
882 			break;
883 
884 		/* in *(...|...) pattern (*+?@!) */
885 		case SPATTERN:
886 			if ((unsigned int)c == ORD(/*(*/ ')')) {
887 				*wp++ = CPAT;
888 				POP_STATE();
889 			} else if ((unsigned int)c == ORD('|')) {
890 				*wp++ = SPAT;
891 			} else if ((unsigned int)c == ORD('(')) {
892 				*wp++ = OPAT;
893 				/* simile for @ */
894 				*wp++ = ' ';
895 				PUSH_STATE(SPATTERN);
896 			} else
897 				goto Sbase1;
898 			break;
899 		}
900 	}
901  Done:
902 	Xcheck(ws, wp);
903 	if (statep != &states[1])
904 		/* XXX figure out what is missing */
905 		yyerror("no closing quote");
906 
907 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
908 	if (state == SHEREDELIM)
909 		state = SBASE;
910 
911 	dp = Xstring(ws, wp);
912 	if (state == SBASE && (
913 	    (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
914 	    ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
915 	    (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
916 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
917 
918 		iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
919 
920 		if (c == '&') {
921 			if ((unsigned int)(c2 = getsc()) != ORD('>')) {
922 				ungetsc(c2);
923 				goto no_iop;
924 			}
925 			c = c2;
926 			iop->ioflag = IOBASH;
927 		} else
928 			iop->ioflag = 0;
929 
930 		c2 = getsc();
931 		/* <<, >>, <> are ok, >< is not */
932 		if (c == c2 || ((unsigned int)c == ORD('<') &&
933 		    (unsigned int)c2 == ORD('>'))) {
934 			iop->ioflag |= c == c2 ?
935 			    ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR;
936 			if (iop->ioflag == IOHERE) {
937 				if ((unsigned int)(c2 = getsc()) == ORD('-'))
938 					iop->ioflag |= IOSKIP;
939 				else if ((unsigned int)c2 == ORD('<'))
940 					iop->ioflag |= IOHERESTR;
941 				else
942 					ungetsc(c2);
943 			}
944 		} else if ((unsigned int)c2 == ORD('&'))
945 			iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0);
946 		else {
947 			iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD;
948 			if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|'))
949 				iop->ioflag |= IOCLOB;
950 			else
951 				ungetsc(c2);
952 		}
953 
954 		iop->ioname = NULL;
955 		iop->delim = NULL;
956 		iop->heredoc = NULL;
957 		/* free word */
958 		Xfree(ws, wp);
959 		yylval.iop = iop;
960 		return (REDIR);
961  no_iop:
962 		afree(iop, ATEMP);
963 	}
964 
965 	if (wp == dp && state == SBASE) {
966 		/* free word */
967 		Xfree(ws, wp);
968 		/* no word, process LEX1 character */
969 		if (((unsigned int)c == ORD('|')) ||
970 		    ((unsigned int)c == ORD('&')) ||
971 		    ((unsigned int)c == ORD(';')) ||
972 		    ((unsigned int)c == ORD('(' /*)*/))) {
973 			if ((c2 = getsc()) == c)
974 				c = ((unsigned int)c == ORD(';')) ? BREAK :
975 				    ((unsigned int)c == ORD('|')) ? LOGOR :
976 				    ((unsigned int)c == ORD('&')) ? LOGAND :
977 				    /* (unsigned int)c == ORD('(' )) */ MDPAREN;
978 			else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&'))
979 				c = COPROC;
980 			else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|'))
981 				c = BRKEV;
982 			else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&'))
983 				c = BRKFT;
984 			else
985 				ungetsc(c2);
986 #ifndef MKSH_SMALL
987 			if (c == BREAK) {
988 				if ((unsigned int)(c2 = getsc()) == ORD('&'))
989 					c = BRKEV;
990 				else
991 					ungetsc(c2);
992 			}
993 #endif
994 		} else if ((unsigned int)c == ORD('\n')) {
995 			if (cf & HEREDELIM)
996 				ungetsc(c);
997 			else {
998 				gethere();
999 				if (cf & CONTIN)
1000 					goto Again;
1001 			}
1002 		} else if (c == '\0' && !(cf & HEREDELIM)) {
1003 			struct ioword **p = heres;
1004 
1005 			while (p < herep)
1006 				if ((*p)->ioflag & IOHERESTR)
1007 					++p;
1008 				else
1009 					/* ksh -c 'cat <<EOF' can cause this */
1010 					yyerror(Tf_heredoc,
1011 					    evalstr((*p)->delim, 0));
1012 		}
1013 		return (c);
1014 	}
1015 
1016 	/* terminate word */
1017 	*wp++ = EOS;
1018 	yylval.cp = Xclose(ws, wp);
1019 	if (state == SWORD || state == SLETPAREN
1020 	    /* XXX ONEWORD? */)
1021 		return (LWORD);
1022 
1023 	/* unget terminator */
1024 	ungetsc(c);
1025 
1026 	/*
1027 	 * note: the alias-vs-function code below depends on several
1028 	 * interna: starting from here, source->str is not modified;
1029 	 * the way getsc() and ungetsc() operate; etc.
1030 	 */
1031 
1032 	/* copy word to unprefixed string ident */
1033 	sp = yylval.cp;
1034 	dp = ident;
1035 	while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1036 		*dp++ = *sp++;
1037 	if (c != EOS)
1038 		/* word is not unquoted, or space ran out */
1039 		dp = ident;
1040 	/* make sure the ident array stays NUL padded */
1041 	memset(dp, 0, (ident + IDENT) - dp + 1);
1042 
1043 	if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1044 		struct tbl *p;
1045 		uint32_t h = hash(ident);
1046 
1047 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1048 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
1049 		    (unsigned int)p->val.i == ORD(/*{*/ '}'))) {
1050 			afree(yylval.cp, ATEMP);
1051 			return (p->val.i);
1052 		}
1053 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1054 		    (p->flag & ISSET)) {
1055 			/*
1056 			 * this still points to the same character as the
1057 			 * ungetsc'd terminator from above
1058 			 */
1059 			const char *cp = source->str;
1060 
1061 			/* prefer POSIX but not Korn functions over aliases */
1062 			while (ctype(*cp, C_BLANK))
1063 				/*
1064 				 * this is like getsc() without skipping
1065 				 * over Source boundaries (including not
1066 				 * parsing ungetsc'd characters that got
1067 				 * pushed into an SREREAD) which is what
1068 				 * we want here anyway: find out whether
1069 				 * the alias name is followed by a POSIX
1070 				 * function definition
1071 				 */
1072 				++cp;
1073 			/* prefer functions over aliases */
1074 			if (cp[0] != '(' || cp[1] != ')') {
1075 				Source *s = source;
1076 
1077 				while (s && (s->flags & SF_HASALIAS))
1078 					if (s->u.tblp == p)
1079 						return (LWORD);
1080 					else
1081 						s = s->next;
1082 				/* push alias expansion */
1083 				s = pushs(SALIAS, source->areap);
1084 				s->start = s->str = p->val.s;
1085 				s->u.tblp = p;
1086 				s->flags |= SF_HASALIAS;
1087 				s->line = source->line;
1088 				s->next = source;
1089 				if (source->type == SEOF) {
1090 					/* prevent infinite recursion at EOS */
1091 					source->u.tblp = p;
1092 					source->flags |= SF_HASALIAS;
1093 				}
1094 				source = s;
1095 				afree(yylval.cp, ATEMP);
1096 				goto Again;
1097 			}
1098 		}
1099 	} else if (*ident == '\0') {
1100 		/* retain typeset et al. even when quoted */
1101 		struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0)));
1102 		uint32_t flag = tt ? tt->flag : 0;
1103 
1104 		if (flag & (DECL_UTIL | DECL_FWDR))
1105 			strlcpy(ident, dp, sizeof(ident));
1106 		afree(dp, ATEMP);
1107 	}
1108 
1109 	return (LWORD);
1110 }
1111 
1112 static void
gethere(void)1113 gethere(void)
1114 {
1115 	struct ioword **p;
1116 
1117 	for (p = heres; p < herep; p++)
1118 		if (!((*p)->ioflag & IOHERESTR))
1119 			readhere(*p);
1120 	herep = heres;
1121 }
1122 
1123 /*
1124  * read "<<word" text into temp file
1125  */
1126 
1127 static void
readhere(struct ioword * iop)1128 readhere(struct ioword *iop)
1129 {
1130 	int c;
1131 	const char *eof, *eofp;
1132 	XString xs;
1133 	char *xp;
1134 	size_t xpos;
1135 
1136 	eof = evalstr(iop->delim, 0);
1137 
1138 	if (!(iop->ioflag & IOEVAL))
1139 		ignore_backslash_newline++;
1140 
1141 	Xinit(xs, xp, 256, ATEMP);
1142 
1143  heredoc_read_line:
1144 	/* beginning of line */
1145 	eofp = eof;
1146 	xpos = Xsavepos(xs, xp);
1147 	if (iop->ioflag & IOSKIP) {
1148 		/* skip over leading tabs */
1149 		while ((c = getsc()) == '\t')
1150 			;	/* nothing */
1151 		goto heredoc_parse_char;
1152 	}
1153  heredoc_read_char:
1154 	c = getsc();
1155  heredoc_parse_char:
1156 	/* compare with here document marker */
1157 	if (!*eofp) {
1158 		/* end of here document marker, what to do? */
1159 		switch (c) {
1160 		case ORD(/*(*/ ')'):
1161 			if (!subshell_nesting_type)
1162 				/*-
1163 				 * not allowed outside $(...) or (...)
1164 				 * => mismatch
1165 				 */
1166 				break;
1167 			/* allow $(...) or (...) to close here */
1168 			ungetsc(/*(*/ ')');
1169 			/* FALLTHROUGH */
1170 		case 0:
1171 			/*
1172 			 * Allow EOF here to commands without trailing
1173 			 * newlines (mksh -c '...') will work as well.
1174 			 */
1175 		case ORD('\n'):
1176 			/* Newline terminates here document marker */
1177 			goto heredoc_found_terminator;
1178 		}
1179 	} else if ((unsigned int)c == ord(*eofp++))
1180 		/* store; then read and compare next character */
1181 		goto heredoc_store_and_loop;
1182 	/* nope, mismatch; read until end of line */
1183 	while (c != '\n') {
1184 		if (!c)
1185 			/* oops, reached EOF */
1186 			yyerror(Tf_heredoc, eof);
1187 		/* store character */
1188 		Xcheck(xs, xp);
1189 		Xput(xs, xp, c);
1190 		/* read next character */
1191 		c = getsc();
1192 	}
1193 	/* we read a newline as last character */
1194  heredoc_store_and_loop:
1195 	/* store character */
1196 	Xcheck(xs, xp);
1197 	Xput(xs, xp, c);
1198 	if (c == '\n')
1199 		goto heredoc_read_line;
1200 	goto heredoc_read_char;
1201 
1202  heredoc_found_terminator:
1203 	/* jump back to saved beginning of line */
1204 	xp = Xrestpos(xs, xp, xpos);
1205 	/* terminate, close and store */
1206 	Xput(xs, xp, '\0');
1207 	iop->heredoc = Xclose(xs, xp);
1208 
1209 	if (!(iop->ioflag & IOEVAL))
1210 		ignore_backslash_newline--;
1211 }
1212 
1213 void
yyerror(const char * fmt,...)1214 yyerror(const char *fmt, ...)
1215 {
1216 	va_list va;
1217 
1218 	/* pop aliases and re-reads */
1219 	while (source->type == SALIAS || source->type == SREREAD)
1220 		source = source->next;
1221 	/* zap pending input */
1222 	source->str = null;
1223 
1224 	error_prefix(true);
1225 	va_start(va, fmt);
1226 	shf_vfprintf(shl_out, fmt, va);
1227 	shf_putc('\n', shl_out);
1228 	va_end(va);
1229 	errorfz();
1230 }
1231 
1232 /*
1233  * input for yylex with alias expansion
1234  */
1235 
1236 Source *
pushs(int type,Area * areap)1237 pushs(int type, Area *areap)
1238 {
1239 	Source *s;
1240 
1241 	s = alloc(sizeof(Source), areap);
1242 	memset(s, 0, sizeof(Source));
1243 	s->type = type;
1244 	s->str = null;
1245 	s->areap = areap;
1246 	if (type == SFILE || type == SSTDIN)
1247 		XinitN(s->xs, 256, s->areap);
1248 	return (s);
1249 }
1250 
1251 static int
getsc_uu(void)1252 getsc_uu(void)
1253 {
1254 	Source *s = source;
1255 	int c;
1256 
1257 	while ((c = ord(*s->str++)) == 0) {
1258 		/* return 0 for EOF by default */
1259 		s->str = NULL;
1260 		switch (s->type) {
1261 		case SEOF:
1262 			s->str = null;
1263 			return (0);
1264 
1265 		case SSTDIN:
1266 		case SFILE:
1267 			getsc_line(s);
1268 			break;
1269 
1270 		case SWSTR:
1271 			break;
1272 
1273 		case SSTRING:
1274 		case SSTRINGCMDLINE:
1275 			break;
1276 
1277 		case SWORDS:
1278 			s->start = s->str = *s->u.strv++;
1279 			s->type = SWORDSEP;
1280 			break;
1281 
1282 		case SWORDSEP:
1283 			if (*s->u.strv == NULL) {
1284 				s->start = s->str = "\n";
1285 				s->type = SEOF;
1286 			} else {
1287 				s->start = s->str = T1space;
1288 				s->type = SWORDS;
1289 			}
1290 			break;
1291 
1292 		case SALIAS:
1293 			if (s->flags & SF_ALIASEND) {
1294 				/* pass on an unused SF_ALIAS flag */
1295 				source = s->next;
1296 				source->flags |= s->flags & SF_ALIAS;
1297 				s = source;
1298 			} else if (*s->u.tblp->val.s &&
1299 			    ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
1300 				/* pop source stack */
1301 				source = s = s->next;
1302 				/*
1303 				 * Note that this alias ended with a
1304 				 * space, enabling alias expansion on
1305 				 * the following word.
1306 				 */
1307 				s->flags |= SF_ALIAS;
1308 			} else {
1309 				/*
1310 				 * At this point, we need to keep the current
1311 				 * alias in the source list so recursive
1312 				 * aliases can be detected and we also need to
1313 				 * return the next character. Do this by
1314 				 * temporarily popping the alias to get the
1315 				 * next character and then put it back in the
1316 				 * source list with the SF_ALIASEND flag set.
1317 				 */
1318 				/* pop source stack */
1319 				source = s->next;
1320 				source->flags |= s->flags & SF_ALIAS;
1321 				c = getsc_uu();
1322 				if (c) {
1323 					s->flags |= SF_ALIASEND;
1324 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1325 					s->start = s->str = s->ugbuf;
1326 					s->next = source;
1327 					source = s;
1328 				} else {
1329 					s = source;
1330 					/* avoid reading EOF twice */
1331 					s->str = NULL;
1332 					break;
1333 				}
1334 			}
1335 			continue;
1336 
1337 		case SREREAD:
1338 			if (s->start != s->ugbuf)
1339 				/* yuck */
1340 				afree(s->u.freeme, ATEMP);
1341 			source = s = s->next;
1342 			continue;
1343 		}
1344 		if (s->str == NULL) {
1345 			s->type = SEOF;
1346 			s->start = s->str = null;
1347 			return ('\0');
1348 		}
1349 		if (s->flags & SF_ECHO) {
1350 			shf_puts(s->str, shl_out);
1351 			shf_flush(shl_out);
1352 		}
1353 	}
1354 	return (c);
1355 }
1356 
1357 static void
getsc_line(Source * s)1358 getsc_line(Source *s)
1359 {
1360 	char *xp = Xstring(s->xs, xp), *cp;
1361 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1362 	bool have_tty = interactive && (s->flags & SF_TTY) && tty_hasstate;
1363 
1364 	/* Done here to ensure nothing odd happens when a timeout occurs */
1365 	XcheckN(s->xs, xp, LINE);
1366 	*xp = '\0';
1367 	s->start = s->str = xp;
1368 
1369 	if (have_tty && ksh_tmout) {
1370 		ksh_tmout_state = TMOUT_READING;
1371 		alarm(ksh_tmout);
1372 	}
1373 	if (interactive) {
1374 		if (cur_prompt == PS1)
1375 			histsave(&s->line, NULL, HIST_FLUSH, true);
1376 		change_winsz();
1377 	}
1378 #ifndef MKSH_NO_CMDLINE_EDITING
1379 	if (have_tty && (
1380 #if !MKSH_S_NOVI
1381 	    Flag(FVI) ||
1382 #endif
1383 	    Flag(FEMACS) || Flag(FGMACS))) {
1384 		int nread;
1385 
1386 		nread = x_read(xp);
1387 		if (nread < 0)
1388 			/* read error */
1389 			nread = 0;
1390 		xp[nread] = '\0';
1391 		xp += nread;
1392 	} else
1393 #endif
1394 	  {
1395 		if (interactive)
1396 			pprompt(prompt, 0);
1397 		else
1398 			s->line++;
1399 
1400 		while (/* CONSTCOND */ 1) {
1401 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1402 
1403 			if (!p && shf_error(s->u.shf) &&
1404 			    shf_errno(s->u.shf) == EINTR) {
1405 				shf_clearerr(s->u.shf);
1406 				if (trap)
1407 					runtraps(0);
1408 				continue;
1409 			}
1410 			if (!p || (xp = p, xp[-1] == '\n'))
1411 				break;
1412 			/* double buffer size */
1413 			/* move past NUL so doubling works... */
1414 			xp++;
1415 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1416 			/* ...and move back again */
1417 			xp--;
1418 		}
1419 		/*
1420 		 * flush any unwanted input so other programs/builtins
1421 		 * can read it. Not very optimal, but less error prone
1422 		 * than flushing else where, dealing with redirections,
1423 		 * etc.
1424 		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1425 		 */
1426 		if (s->type == SSTDIN)
1427 			shf_flush(s->u.shf);
1428 	}
1429 	/*
1430 	 * XXX: temporary kludge to restore source after a
1431 	 * trap may have been executed.
1432 	 */
1433 	source = s;
1434 	if (have_tty && ksh_tmout) {
1435 		ksh_tmout_state = TMOUT_EXECUTING;
1436 		alarm(0);
1437 	}
1438 	cp = Xstring(s->xs, xp);
1439 	rndpush(cp);
1440 	s->start = s->str = cp;
1441 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1442 	/* Note: if input is all nulls, this is not eof */
1443 	if (Xlength(s->xs, xp) == 0) {
1444 		/* EOF */
1445 		if (s->type == SFILE)
1446 			shf_fdclose(s->u.shf);
1447 		s->str = NULL;
1448 	} else if (interactive && *s->str) {
1449 		if (cur_prompt != PS1)
1450 			histsave(&s->line, s->str, HIST_APPEND, true);
1451 		else if (!ctype(*s->str, C_IFS | C_IFSWS))
1452 			histsave(&s->line, s->str, HIST_QUEUE, true);
1453 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1454 		else
1455 			goto check_for_sole_return;
1456 	} else if (interactive && cur_prompt == PS1) {
1457  check_for_sole_return:
1458 		cp = Xstring(s->xs, xp);
1459 		while (ctype(*cp, C_IFSWS))
1460 			++cp;
1461 		if (!*cp) {
1462 			histsave(&s->line, NULL, HIST_FLUSH, true);
1463 			histsync();
1464 		}
1465 #endif
1466 	}
1467 	if (interactive)
1468 		set_prompt(PS2, NULL);
1469 }
1470 
1471 void
set_prompt(int to,Source * s)1472 set_prompt(int to, Source *s)
1473 {
1474 	cur_prompt = (uint8_t)to;
1475 
1476 	switch (to) {
1477 	/* command */
1478 	case PS1:
1479 		/*
1480 		 * Substitute ! and !! here, before substitutions are done
1481 		 * so ! in expanded variables are not expanded.
1482 		 * NOTE: this is not what AT&T ksh does (it does it after
1483 		 * substitutions, POSIX doesn't say which is to be done.
1484 		 */
1485 		{
1486 			struct shf *shf;
1487 			char * volatile ps1;
1488 			Area *saved_atemp;
1489 			int saved_lineno;
1490 
1491 			ps1 = str_val(global("PS1"));
1492 			shf = shf_sopen(NULL, strlen(ps1) * 2,
1493 			    SHF_WR | SHF_DYNAMIC, NULL);
1494 			while (*ps1)
1495 				if (*ps1 != '!' || *++ps1 == '!')
1496 					shf_putchar(*ps1++, shf);
1497 				else
1498 					shf_fprintf(shf, Tf_lu, s ?
1499 					    (unsigned long)s->line + 1 : 0UL);
1500 			ps1 = shf_sclose(shf);
1501 			saved_lineno = current_lineno;
1502 			if (s)
1503 				current_lineno = s->line + 1;
1504 			saved_atemp = ATEMP;
1505 			newenv(E_ERRH);
1506 			if (kshsetjmp(e->jbuf)) {
1507 				prompt = safe_prompt;
1508 				/*
1509 				 * Don't print an error - assume it has already
1510 				 * been printed. Reason is we may have forked
1511 				 * to run a command and the child may be
1512 				 * unwinding its stack through this code as it
1513 				 * exits.
1514 				 */
1515 			} else {
1516 				char *cp = substitute(ps1, 0);
1517 				strdupx(prompt, cp, saved_atemp);
1518 			}
1519 			current_lineno = saved_lineno;
1520 			quitenv(NULL);
1521 		}
1522 		break;
1523 	/* command continuation */
1524 	case PS2:
1525 		prompt = str_val(global("PS2"));
1526 		break;
1527 	}
1528 }
1529 
1530 int
pprompt(const char * cp,int ntruncate)1531 pprompt(const char *cp, int ntruncate)
1532 {
1533 	char delimiter = 0;
1534 	bool doprint = (ntruncate != -1);
1535 	bool indelimit = false;
1536 	int columns = 0, lines = 0;
1537 
1538 	/*
1539 	 * Undocumented AT&T ksh feature:
1540 	 * If the second char in the prompt string is \r then the first
1541 	 * char is taken to be a non-printing delimiter and any chars
1542 	 * between two instances of the delimiter are not considered to
1543 	 * be part of the prompt length
1544 	 */
1545 	if (*cp && cp[1] == '\r') {
1546 		delimiter = *cp;
1547 		cp += 2;
1548 	}
1549 	for (; *cp; cp++) {
1550 		if (indelimit && *cp != delimiter)
1551 			;
1552 		else if (ctype(*cp, C_CR | C_LF)) {
1553 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1554 			columns = 0;
1555 		} else if (*cp == '\t') {
1556 			columns = (columns | 7) + 1;
1557 		} else if (*cp == '\b') {
1558 			if (columns > 0)
1559 				columns--;
1560 		} else if (*cp == delimiter)
1561 			indelimit = !indelimit;
1562 		else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
1563 			const char *cp2;
1564 			columns += utf_widthadj(cp, &cp2);
1565 			if (doprint && (indelimit ||
1566 			    (ntruncate < (x_cols * lines + columns))))
1567 				shf_write(cp, cp2 - cp, shl_out);
1568 			cp = cp2 - /* loop increment */ 1;
1569 			continue;
1570 		} else
1571 			columns++;
1572 		if (doprint && (*cp != delimiter) &&
1573 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
1574 			shf_putc(*cp, shl_out);
1575 	}
1576 	if (doprint)
1577 		shf_flush(shl_out);
1578 	return (x_cols * lines + columns);
1579 }
1580 
1581 /*
1582  * Read the variable part of a ${...} expression (i.e. up to but not
1583  * including the :[-+?=#%] or close-brace).
1584  */
1585 static char *
get_brace_var(XString * wsp,char * wp)1586 get_brace_var(XString *wsp, char *wp)
1587 {
1588 	char c;
1589 	enum parse_state {
1590 		PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
1591 		PS_IDENT, PS_NUMBER, PS_VAR1
1592 	} state = PS_INITIAL;
1593 
1594 	while (/* CONSTCOND */ 1) {
1595 		c = getsc();
1596 		/* State machine to figure out where the variable part ends. */
1597 		switch (state) {
1598 		case PS_SAW_HASH:
1599 			if (ctype(c, C_VAR1)) {
1600 				char c2;
1601 
1602 				c2 = getsc();
1603 				ungetsc(c2);
1604 				if (ord(c2) != ORD(/*{*/ '}')) {
1605 					ungetsc(c);
1606 					goto out;
1607 				}
1608 			}
1609 			goto ps_common;
1610 		case PS_SAW_BANG:
1611 			switch (ord(c)) {
1612 			case ORD('@'):
1613 			case ORD('#'):
1614 			case ORD('-'):
1615 			case ORD('?'):
1616 				goto out;
1617 			}
1618 			goto ps_common;
1619 		case PS_INITIAL:
1620 			switch (ord(c)) {
1621 			case ORD('%'):
1622 				state = PS_SAW_PERCENT;
1623 				goto next;
1624 			case ORD('#'):
1625 				state = PS_SAW_HASH;
1626 				goto next;
1627 			case ORD('!'):
1628 				state = PS_SAW_BANG;
1629 				goto next;
1630 			}
1631 			/* FALLTHROUGH */
1632 		case PS_SAW_PERCENT:
1633  ps_common:
1634 			if (ctype(c, C_ALPHX))
1635 				state = PS_IDENT;
1636 			else if (ctype(c, C_DIGIT))
1637 				state = PS_NUMBER;
1638 			else if (ctype(c, C_VAR1))
1639 				state = PS_VAR1;
1640 			else
1641 				goto out;
1642 			break;
1643 		case PS_IDENT:
1644 			if (!ctype(c, C_ALNUX)) {
1645 				if (ord(c) == ORD('[')) {
1646 					char *tmp, *p;
1647 
1648 					if (!arraysub(&tmp))
1649 						yyerror("missing ]");
1650 					*wp++ = c;
1651 					p = tmp;
1652 					while (*p) {
1653 						Xcheck(*wsp, wp);
1654 						*wp++ = *p++;
1655 					}
1656 					afree(tmp, ATEMP);
1657 					/* the ] */
1658 					c = getsc();
1659 				}
1660 				goto out;
1661 			}
1662  next:
1663 			break;
1664 		case PS_NUMBER:
1665 			if (!ctype(c, C_DIGIT))
1666 				goto out;
1667 			break;
1668 		case PS_VAR1:
1669 			goto out;
1670 		}
1671 		Xcheck(*wsp, wp);
1672 		*wp++ = c;
1673 	}
1674  out:
1675 	/* end of variable part */
1676 	*wp++ = '\0';
1677 	ungetsc(c);
1678 	return (wp);
1679 }
1680 
1681 /*
1682  * Save an array subscript - returns true if matching bracket found, false
1683  * if eof or newline was found.
1684  * (Returned string double null terminated)
1685  */
1686 static bool
arraysub(char ** strp)1687 arraysub(char **strp)
1688 {
1689 	XString ws;
1690 	char *wp, c;
1691 	/* we are just past the initial [ */
1692 	unsigned int depth = 1;
1693 
1694 	Xinit(ws, wp, 32, ATEMP);
1695 
1696 	do {
1697 		c = getsc();
1698 		Xcheck(ws, wp);
1699 		*wp++ = c;
1700 		if (ord(c) == ORD('['))
1701 			depth++;
1702 		else if (ord(c) == ORD(']'))
1703 			depth--;
1704 	} while (depth > 0 && c && c != '\n');
1705 
1706 	*wp++ = '\0';
1707 	*strp = Xclose(ws, wp);
1708 
1709 	return (tobool(depth == 0));
1710 }
1711 
1712 /* Unget a char: handles case when we are already at the start of the buffer */
1713 static void
ungetsc(int c)1714 ungetsc(int c)
1715 {
1716 	struct sretrace_info *rp = retrace_info;
1717 
1718 	if (backslash_skip)
1719 		backslash_skip--;
1720 	/* Don't unget EOF... */
1721 	if (source->str == null && c == '\0')
1722 		return;
1723 	while (rp) {
1724 		if (Xlength(rp->xs, rp->xp))
1725 			rp->xp--;
1726 		rp = rp->next;
1727 	}
1728 	ungetsc_i(c);
1729 }
1730 static void
ungetsc_i(int c)1731 ungetsc_i(int c)
1732 {
1733 	if (source->str > source->start)
1734 		source->str--;
1735 	else {
1736 		Source *s;
1737 
1738 		s = pushs(SREREAD, source->areap);
1739 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1740 		s->start = s->str = s->ugbuf;
1741 		s->next = source;
1742 		source = s;
1743 	}
1744 }
1745 
1746 
1747 /* Called to get a char that isn't a \newline sequence. */
1748 static int
getsc_bn(void)1749 getsc_bn(void)
1750 {
1751 	int c, c2;
1752 
1753 	if (ignore_backslash_newline)
1754 		return (o_getsc_u());
1755 
1756 	if (backslash_skip == 1) {
1757 		backslash_skip = 2;
1758 		return (o_getsc_u());
1759 	}
1760 
1761 	backslash_skip = 0;
1762 
1763 	while (/* CONSTCOND */ 1) {
1764 		c = o_getsc_u();
1765 		if (c == '\\') {
1766 			if ((c2 = o_getsc_u()) == '\n')
1767 				/* ignore the \newline; get the next char... */
1768 				continue;
1769 			ungetsc_i(c2);
1770 			backslash_skip = 1;
1771 		}
1772 		return (c);
1773 	}
1774 }
1775 
1776 void
yyskiputf8bom(void)1777 yyskiputf8bom(void)
1778 {
1779 	int c;
1780 
1781 	if (rtt2asc((c = o_getsc_u())) != 0xEF) {
1782 		ungetsc_i(c);
1783 		return;
1784 	}
1785 	if (rtt2asc((c = o_getsc_u())) != 0xBB) {
1786 		ungetsc_i(c);
1787 		ungetsc_i(asc2rtt(0xEF));
1788 		return;
1789 	}
1790 	if (rtt2asc((c = o_getsc_u())) != 0xBF) {
1791 		ungetsc_i(c);
1792 		ungetsc_i(asc2rtt(0xBB));
1793 		ungetsc_i(asc2rtt(0xEF));
1794 		return;
1795 	}
1796 	UTFMODE |= 8;
1797 }
1798 
1799 static Lex_state *
push_state_i(State_info * si,Lex_state * old_end)1800 push_state_i(State_info *si, Lex_state *old_end)
1801 {
1802 	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1803 
1804 	news[0].ls_base = old_end;
1805 	si->base = &news[0];
1806 	si->end = &news[STATE_BSIZE];
1807 	return (&news[1]);
1808 }
1809 
1810 static Lex_state *
pop_state_i(State_info * si,Lex_state * old_end)1811 pop_state_i(State_info *si, Lex_state *old_end)
1812 {
1813 	Lex_state *old_base = si->base;
1814 
1815 	si->base = old_end->ls_base - STATE_BSIZE;
1816 	si->end = old_end->ls_base;
1817 
1818 	afree(old_base, ATEMP);
1819 
1820 	return (si->base + STATE_BSIZE - 1);
1821 }
1822