1 /*	$OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *		 2011, 2012, 2013, 2014, 2015, 2016
6  *	mirabilos <m@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23 
24 #include "sh.h"
25 
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.228 2016/08/01 21:38:03 tg Exp $");
27 
28 /*
29  * states while lexing word
30  */
31 #define SBASE		0	/* outside any lexical constructs */
32 #define SWORD		1	/* implicit quoting for substitute() */
33 #define SLETPAREN	2	/* inside (( )), implicit quoting */
34 #define SSQUOTE		3	/* inside '' */
35 #define SDQUOTE		4	/* inside "" */
36 #define SEQUOTE		5	/* inside $'' */
37 #define SBRACE		6	/* inside ${} */
38 #define SQBRACE		7	/* inside "${}" */
39 #define SBQUOTE		8	/* inside `` */
40 #define SASPAREN	9	/* inside $(( )) */
41 #define SHEREDELIM	10	/* parsing << or <<- delimiter */
42 #define SHEREDQUOTE	11	/* parsing " in << or <<- delimiter */
43 #define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM		13	/* like SBASE, looking for delimiter */
45 #define STBRACEKORN	14	/* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE	15	/* parsing ${...[#%]...} FSH */
47 #define SINVALID	255	/* invalid state */
48 
49 struct sretrace_info {
50 	struct sretrace_info *next;
51 	XString xs;
52 	char *xp;
53 };
54 
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60 	union {
61 		/* point to the next state block */
62 		struct lex_state *base;
63 		/* marks start of state output in output string */
64 		size_t start;
65 		/* SBQUOTE: true if in double quotes: "`...`" */
66 		/* SEQUOTE: got NUL, ignore rest of string */
67 		bool abool;
68 		/* SADELIM information */
69 		struct {
70 			/* character to search for */
71 			unsigned char delimiter;
72 			/* max. number of delimiters */
73 			unsigned char num;
74 		} adelim;
75 	} u;
76 	/* count open parentheses */
77 	short nparen;
78 	/* type of this state */
79 	uint8_t type;
80 } Lex_state;
81 #define ls_base		u.base
82 #define ls_start	u.start
83 #define ls_bool		u.abool
84 #define ls_adelim	u.adelim
85 
86 typedef struct {
87 	Lex_state *base;
88 	Lex_state *end;
89 } State_info;
90 
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int getsc_i(void);
98 static char *get_brace_var(XString *, char *);
99 static bool arraysub(char **);
100 static void gethere(void);
101 static Lex_state *push_state_i(State_info *, Lex_state *);
102 static Lex_state *pop_state_i(State_info *, Lex_state *);
103 
104 static int backslash_skip;
105 static int ignore_backslash_newline;
106 
107 /* optimised getsc_bn() */
108 #define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
109 			    !backslash_skip ? *source->str++ : getsc_bn())
110 /* optimised getsc_uu() */
111 #define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
112 
113 /* retrace helper */
114 #define o_getsc_r(carg)					\
115 	int cev = (carg);				\
116 	struct sretrace_info *rp = retrace_info;	\
117 							\
118 	while (rp) {					\
119 		Xcheck(rp->xs, rp->xp);			\
120 		*rp->xp++ = cev;			\
121 		rp = rp->next;				\
122 	}						\
123 							\
124 	return (cev);
125 
126 /* callback */
127 static int
getsc_i(void)128 getsc_i(void)
129 {
130 	o_getsc_r(o_getsc());
131 }
132 
133 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
134 #define getsc getsc_i
135 #else
136 static int getsc_r(int);
137 
138 static int
getsc_r(int c)139 getsc_r(int c)
140 {
141 	o_getsc_r(c);
142 }
143 
144 #define getsc()		getsc_r(o_getsc())
145 #endif
146 
147 #define STATE_BSIZE	8
148 
149 #define PUSH_STATE(s)	do {					\
150 	if (++statep == state_info.end)				\
151 		statep = push_state_i(&state_info, statep);	\
152 	state = statep->type = (s);				\
153 } while (/* CONSTCOND */ 0)
154 
155 #define POP_STATE()	do {					\
156 	if (--statep == state_info.base)			\
157 		statep = pop_state_i(&state_info, statep);	\
158 	state = statep->type;					\
159 } while (/* CONSTCOND */ 0)
160 
161 #define PUSH_SRETRACE(s) do {					\
162 	struct sretrace_info *ri;				\
163 								\
164 	PUSH_STATE(s);						\
165 	statep->ls_start = Xsavepos(ws, wp);			\
166 	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
167 	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
168 	ri->next = retrace_info;				\
169 	retrace_info = ri;					\
170 } while (/* CONSTCOND */ 0)
171 
172 #define POP_SRETRACE()	do {					\
173 	wp = Xrestpos(ws, wp, statep->ls_start);		\
174 	*retrace_info->xp = '\0';				\
175 	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
176 	dp = (void *)retrace_info;				\
177 	retrace_info = retrace_info->next;			\
178 	afree(dp, ATEMP);					\
179 	POP_STATE();						\
180 } while (/* CONSTCOND */ 0)
181 
182 /**
183  * Lexical analyser
184  *
185  * tokens are not regular expressions, they are LL(1).
186  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
187  * hence the state stack. Note "$(...)" are now parsed recursively.
188  */
189 
190 int
yylex(int cf)191 yylex(int cf)
192 {
193 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
194 	State_info state_info;
195 	int c, c2, state;
196 	size_t cz;
197 	XString ws;		/* expandable output word */
198 	char *wp;		/* output word pointer */
199 	char *sp, *dp;
200 
201  Again:
202 	states[0].type = SINVALID;
203 	states[0].ls_base = NULL;
204 	statep = &states[1];
205 	state_info.base = states;
206 	state_info.end = &state_info.base[STATE_BSIZE];
207 
208 	Xinit(ws, wp, 64, ATEMP);
209 
210 	backslash_skip = 0;
211 	ignore_backslash_newline = 0;
212 
213 	if (cf & ONEWORD)
214 		state = SWORD;
215 	else if (cf & LETEXPR) {
216 		/* enclose arguments in (double) quotes */
217 		*wp++ = OQUOTE;
218 		state = SLETPAREN;
219 		statep->nparen = 0;
220 	} else {
221 		/* normal lexing */
222 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
223 		while ((c = getsc()) == ' ' || c == '\t')
224 			;
225 		if (c == '#') {
226 			ignore_backslash_newline++;
227 			while ((c = getsc()) != '\0' && c != '\n')
228 				;
229 			ignore_backslash_newline--;
230 		}
231 		ungetsc(c);
232 	}
233 	if (source->flags & SF_ALIAS) {
234 		/* trailing ' ' in alias definition */
235 		source->flags &= ~SF_ALIAS;
236 		/* POSIX: trailing space only counts if parsing simple cmd */
237 		if (!Flag(FPOSIX) || (cf & CMDWORD))
238 			cf |= ALIAS;
239 	}
240 
241 	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
242 	statep->type = state;
243 
244 	/* collect non-special or quoted characters to form word */
245 	while (!((c = getsc()) == 0 ||
246 	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
247 		if (state == SBASE &&
248 		    subshell_nesting_type == /*{*/ '}' &&
249 		    c == /*{*/ '}')
250 			/* possibly end ${ :;} */
251 			break;
252 		Xcheck(ws, wp);
253 		switch (state) {
254 		case SADELIM:
255 			if (c == '(')
256 				statep->nparen++;
257 			else if (c == ')')
258 				statep->nparen--;
259 			else if (statep->nparen == 0 && (c == /*{*/ '}' ||
260 			    c == (int)statep->ls_adelim.delimiter)) {
261 				*wp++ = ADELIM;
262 				*wp++ = c;
263 				if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
264 					POP_STATE();
265 				if (c == /*{*/ '}')
266 					POP_STATE();
267 				break;
268 			}
269 			/* FALLTHROUGH */
270 		case SBASE:
271 			if (c == '[' && (cf & CMDASN)) {
272 				/* temporary */
273 				*wp = EOS;
274 				if (is_wdvarname(Xstring(ws, wp), false)) {
275 					char *p, *tmp;
276 
277 					if (arraysub(&tmp)) {
278 						*wp++ = CHAR;
279 						*wp++ = c;
280 						for (p = tmp; *p; ) {
281 							Xcheck(ws, wp);
282 							*wp++ = CHAR;
283 							*wp++ = *p++;
284 						}
285 						afree(tmp, ATEMP);
286 						break;
287 					} else {
288 						Source *s;
289 
290 						s = pushs(SREREAD,
291 						    source->areap);
292 						s->start = s->str =
293 						    s->u.freeme = tmp;
294 						s->next = source;
295 						source = s;
296 					}
297 				}
298 				*wp++ = CHAR;
299 				*wp++ = c;
300 				break;
301 			}
302 			/* FALLTHROUGH */
303  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
304 			if (c == '*' || c == '@' || c == '+' || c == '?' ||
305 			    c == '!') {
306 				c2 = getsc();
307 				if (c2 == '(' /*)*/ ) {
308 					*wp++ = OPAT;
309 					*wp++ = c;
310 					PUSH_STATE(SPATTERN);
311 					break;
312 				}
313 				ungetsc(c2);
314 			}
315 			/* FALLTHROUGH */
316  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
317 			switch (c) {
318 			case '\\':
319  getsc_qchar:
320 				if ((c = getsc())) {
321 					/* trailing \ is lost */
322 					*wp++ = QCHAR;
323 					*wp++ = c;
324 				}
325 				break;
326 			case '\'':
327  open_ssquote_unless_heredoc:
328 				if ((cf & HEREDOC))
329 					goto store_char;
330 				*wp++ = OQUOTE;
331 				ignore_backslash_newline++;
332 				PUSH_STATE(SSQUOTE);
333 				break;
334 			case '"':
335  open_sdquote:
336 				*wp++ = OQUOTE;
337 				PUSH_STATE(SDQUOTE);
338 				break;
339 			case '$':
340 				/*
341 				 * processing of dollar sign belongs into
342 				 * Subst, except for those which can open
343 				 * a string: $'…' and $"…"
344 				 */
345  subst_dollar_ex:
346 				c = getsc();
347 				switch (c) {
348 				case '"':
349 					goto open_sdquote;
350 				case '\'':
351 					goto open_sequote;
352 				default:
353 					goto SubstS;
354 				}
355 			default:
356 				goto Subst;
357 			}
358 			break;
359 
360  Subst:
361 			switch (c) {
362 			case '\\':
363 				c = getsc();
364 				switch (c) {
365 				case '"':
366 					if ((cf & HEREDOC))
367 						goto heredocquote;
368 					/* FALLTHROUGH */
369 				case '\\':
370 				case '$': case '`':
371  store_qchar:
372 					*wp++ = QCHAR;
373 					*wp++ = c;
374 					break;
375 				default:
376  heredocquote:
377 					Xcheck(ws, wp);
378 					if (c) {
379 						/* trailing \ is lost */
380 						*wp++ = CHAR;
381 						*wp++ = '\\';
382 						*wp++ = CHAR;
383 						*wp++ = c;
384 					}
385 					break;
386 				}
387 				break;
388 			case '$':
389 				c = getsc();
390  SubstS:
391 				if (c == '(') /*)*/ {
392 					c = getsc();
393 					if (c == '(') /*)*/ {
394 						*wp++ = EXPRSUB;
395 						PUSH_SRETRACE(SASPAREN);
396 						statep->nparen = 2;
397 						*retrace_info->xp++ = '(';
398 					} else {
399 						ungetsc(c);
400  subst_command:
401 						c = COMSUB;
402  subst_command2:
403 						sp = yyrecursive(c);
404 						cz = strlen(sp) + 1;
405 						XcheckN(ws, wp, cz);
406 						*wp++ = c;
407 						memcpy(wp, sp, cz);
408 						wp += cz;
409 					}
410 				} else if (c == '{') /*}*/ {
411 					if ((c = getsc()) == '|') {
412 						/*
413 						 * non-subenvironment
414 						 * value substitution
415 						 */
416 						c = VALSUB;
417 						goto subst_command2;
418 					} else if (ctype(c, C_IFSWS)) {
419 						/*
420 						 * non-subenvironment
421 						 * "command" substitution
422 						 */
423 						c = FUNSUB;
424 						goto subst_command2;
425 					}
426 					ungetsc(c);
427 					*wp++ = OSUBST;
428 					*wp++ = '{'; /*}*/
429 					wp = get_brace_var(&ws, wp);
430 					c = getsc();
431 					/* allow :# and :% (ksh88 compat) */
432 					if (c == ':') {
433 						*wp++ = CHAR;
434 						*wp++ = c;
435 						c = getsc();
436 						if (c == ':') {
437 							*wp++ = CHAR;
438 							*wp++ = '0';
439 							*wp++ = ADELIM;
440 							*wp++ = ':';
441 							PUSH_STATE(SBRACE);
442 							PUSH_STATE(SADELIM);
443 							statep->ls_adelim.delimiter = ':';
444 							statep->ls_adelim.num = 1;
445 							statep->nparen = 0;
446 							break;
447 						} else if (ksh_isdigit(c) ||
448 						    c == '('/*)*/ || c == ' ' ||
449 						    /*XXX what else? */
450 						    c == '$') {
451 							/* substring subst. */
452 							if (c != ' ') {
453 								*wp++ = CHAR;
454 								*wp++ = ' ';
455 							}
456 							ungetsc(c);
457 							PUSH_STATE(SBRACE);
458 							PUSH_STATE(SADELIM);
459 							statep->ls_adelim.delimiter = ':';
460 							statep->ls_adelim.num = 2;
461 							statep->nparen = 0;
462 							break;
463 						}
464 					} else if (c == '/') {
465 						c2 = ADELIM;
466  parse_adelim_slash:
467 						*wp++ = CHAR;
468 						*wp++ = c;
469 						if ((c = getsc()) == '/') {
470 							*wp++ = c2;
471 							*wp++ = c;
472 						} else
473 							ungetsc(c);
474 						PUSH_STATE(SBRACE);
475 						PUSH_STATE(SADELIM);
476 						statep->ls_adelim.delimiter = '/';
477 						statep->ls_adelim.num = 1;
478 						statep->nparen = 0;
479 						break;
480 					} else if (c == '@') {
481 						c2 = getsc();
482 						ungetsc(c2);
483 						if (c2 == '/') {
484 							c2 = CHAR;
485 							goto parse_adelim_slash;
486 						}
487 					}
488 					/*
489 					 * If this is a trim operation,
490 					 * treat (,|,) specially in STBRACE.
491 					 */
492 					if (ctype(c, C_SUBOP2)) {
493 						ungetsc(c);
494 						if (Flag(FSH))
495 							PUSH_STATE(STBRACEBOURNE);
496 						else
497 							PUSH_STATE(STBRACEKORN);
498 					} else {
499 						ungetsc(c);
500 						if (state == SDQUOTE ||
501 						    state == SQBRACE)
502 							PUSH_STATE(SQBRACE);
503 						else
504 							PUSH_STATE(SBRACE);
505 					}
506 				} else if (ksh_isalphx(c)) {
507 					*wp++ = OSUBST;
508 					*wp++ = 'X';
509 					do {
510 						Xcheck(ws, wp);
511 						*wp++ = c;
512 						c = getsc();
513 					} while (ksh_isalnux(c));
514 					*wp++ = '\0';
515 					*wp++ = CSUBST;
516 					*wp++ = 'X';
517 					ungetsc(c);
518 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
519 					Xcheck(ws, wp);
520 					*wp++ = OSUBST;
521 					*wp++ = 'X';
522 					*wp++ = c;
523 					*wp++ = '\0';
524 					*wp++ = CSUBST;
525 					*wp++ = 'X';
526 				} else {
527 					*wp++ = CHAR;
528 					*wp++ = '$';
529 					ungetsc(c);
530 				}
531 				break;
532 			case '`':
533  subst_gravis:
534 				PUSH_STATE(SBQUOTE);
535 				*wp++ = COMSUB;
536 				/*
537 				 * We need to know whether we are within double
538 				 * quotes in order to translate \" to " within
539 				 * "…`…\"…`…" because, unlike for COMSUBs, the
540 				 * outer double quoteing changes the backslash
541 				 * meaning for the inside. For more details:
542 				 * http://austingroupbugs.net/view.php?id=1015
543 				 */
544 				statep->ls_bool = false;
545 				s2 = statep;
546 				base = state_info.base;
547 				while (/* CONSTCOND */ 1) {
548 					for (; s2 != base; s2--) {
549 						if (s2->type == SDQUOTE) {
550 							statep->ls_bool = true;
551 							break;
552 						}
553 					}
554 					if (s2 != base)
555 						break;
556 					if (!(s2 = s2->ls_base))
557 						break;
558 					base = s2-- - STATE_BSIZE;
559 				}
560 				break;
561 			case QCHAR:
562 				if (cf & LQCHAR) {
563 					*wp++ = QCHAR;
564 					*wp++ = getsc();
565 					break;
566 				}
567 				/* FALLTHROUGH */
568 			default:
569  store_char:
570 				*wp++ = CHAR;
571 				*wp++ = c;
572 			}
573 			break;
574 
575 		case SEQUOTE:
576 			if (c == '\'') {
577 				POP_STATE();
578 				*wp++ = CQUOTE;
579 				ignore_backslash_newline--;
580 			} else if (c == '\\') {
581 				if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
582 					c2 = getsc();
583 				if (c2 == 0)
584 					statep->ls_bool = true;
585 				if (!statep->ls_bool) {
586 					char ts[4];
587 
588 					if ((unsigned int)c2 < 0x100) {
589 						*wp++ = QCHAR;
590 						*wp++ = c2;
591 					} else {
592 						cz = utf_wctomb(ts, c2 - 0x100);
593 						ts[cz] = 0;
594 						cz = 0;
595 						do {
596 							*wp++ = QCHAR;
597 							*wp++ = ts[cz];
598 						} while (ts[++cz]);
599 					}
600 				}
601 			} else if (!statep->ls_bool) {
602 				*wp++ = QCHAR;
603 				*wp++ = c;
604 			}
605 			break;
606 
607 		case SSQUOTE:
608 			if (c == '\'') {
609 				POP_STATE();
610 				if ((cf & HEREDOC) || state == SQBRACE)
611 					goto store_char;
612 				*wp++ = CQUOTE;
613 				ignore_backslash_newline--;
614 			} else {
615 				*wp++ = QCHAR;
616 				*wp++ = c;
617 			}
618 			break;
619 
620 		case SDQUOTE:
621 			if (c == '"') {
622 				POP_STATE();
623 				*wp++ = CQUOTE;
624 			} else
625 				goto Subst;
626 			break;
627 
628 		/* $(( ... )) */
629 		case SASPAREN:
630 			if (c == '(')
631 				statep->nparen++;
632 			else if (c == ')') {
633 				statep->nparen--;
634 				if (statep->nparen == 1) {
635 					/* end of EXPRSUB */
636 					POP_SRETRACE();
637 
638 					if ((c2 = getsc()) == /*(*/ ')') {
639 						cz = strlen(sp) - 2;
640 						XcheckN(ws, wp, cz);
641 						memcpy(wp, sp + 1, cz);
642 						wp += cz;
643 						afree(sp, ATEMP);
644 						*wp++ = '\0';
645 						break;
646 					} else {
647 						Source *s;
648 
649 						ungetsc(c2);
650 						/*
651 						 * mismatched parenthesis -
652 						 * assume we were really
653 						 * parsing a $(...) expression
654 						 */
655 						--wp;
656 						s = pushs(SREREAD,
657 						    source->areap);
658 						s->start = s->str =
659 						    s->u.freeme = sp;
660 						s->next = source;
661 						source = s;
662 						goto subst_command;
663 					}
664 				}
665 			}
666 			/* reuse existing state machine */
667 			goto Sbase2;
668 
669 		case SQBRACE:
670 			if (c == '\\') {
671 				/*
672 				 * perform POSIX "quote removal" if the back-
673 				 * slash is "special", i.e. same cases as the
674 				 * {case '\\':} in Subst: plus closing brace;
675 				 * in mksh code "quote removal" on '\c' means
676 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
677 				 * emitted (in heredocquote:)
678 				 */
679 				if ((c = getsc()) == '"' || c == '\\' ||
680 				    c == '$' || c == '`' || c == /*{*/'}')
681 					goto store_qchar;
682 				goto heredocquote;
683 			}
684 			goto common_SQBRACE;
685 
686 		case SBRACE:
687 			if (c == '\'')
688 				goto open_ssquote_unless_heredoc;
689 			else if (c == '\\')
690 				goto getsc_qchar;
691  common_SQBRACE:
692 			if (c == '"')
693 				goto open_sdquote;
694 			else if (c == '$')
695 				goto subst_dollar_ex;
696 			else if (c == '`')
697 				goto subst_gravis;
698 			else if (c != /*{*/ '}')
699 				goto store_char;
700 			POP_STATE();
701 			*wp++ = CSUBST;
702 			*wp++ = /*{*/ '}';
703 			break;
704 
705 		/* Same as SBASE, except (,|,) treated specially */
706 		case STBRACEKORN:
707 			if (c == '|')
708 				*wp++ = SPAT;
709 			else if (c == '(') {
710 				*wp++ = OPAT;
711 				/* simile for @ */
712 				*wp++ = ' ';
713 				PUSH_STATE(SPATTERN);
714 			} else /* FALLTHROUGH */
715 		case STBRACEBOURNE:
716 			  if (c == /*{*/ '}') {
717 				POP_STATE();
718 				*wp++ = CSUBST;
719 				*wp++ = /*{*/ '}';
720 			} else
721 				goto Sbase1;
722 			break;
723 
724 		case SBQUOTE:
725 			if (c == '`') {
726 				*wp++ = 0;
727 				POP_STATE();
728 			} else if (c == '\\') {
729 				switch (c = getsc()) {
730 				case 0:
731 					/* trailing \ is lost */
732 					break;
733 				case '$':
734 				case '`':
735 				case '\\':
736 					*wp++ = c;
737 					break;
738 				case '"':
739 					if (statep->ls_bool) {
740 						*wp++ = c;
741 						break;
742 					}
743 					/* FALLTHROUGH */
744 				default:
745 					*wp++ = '\\';
746 					*wp++ = c;
747 					break;
748 				}
749 			} else
750 				*wp++ = c;
751 			break;
752 
753 		/* ONEWORD */
754 		case SWORD:
755 			goto Subst;
756 
757 		/* LETEXPR: (( ... )) */
758 		case SLETPAREN:
759 			if (c == /*(*/ ')') {
760 				if (statep->nparen > 0)
761 					--statep->nparen;
762 				else if ((c2 = getsc()) == /*(*/ ')') {
763 					c = 0;
764 					*wp++ = CQUOTE;
765 					goto Done;
766 				} else {
767 					Source *s;
768 
769 					ungetsc(c2);
770 					ungetsc(c);
771 					/*
772 					 * mismatched parenthesis -
773 					 * assume we were really
774 					 * parsing a (...) expression
775 					 */
776 					*wp = EOS;
777 					sp = Xstring(ws, wp);
778 					dp = wdstrip(sp + 1, WDS_TPUTS);
779 					s = pushs(SREREAD, source->areap);
780 					s->start = s->str = s->u.freeme = dp;
781 					s->next = source;
782 					source = s;
783 					ungetsc('('/*)*/);
784 					return ('('/*)*/);
785 				}
786 			} else if (c == '(')
787 				/*
788 				 * parentheses inside quotes and
789 				 * backslashes are lost, but AT&T ksh
790 				 * doesn't count them either
791 				 */
792 				++statep->nparen;
793 			goto Sbase2;
794 
795 		/* << or <<- delimiter */
796 		case SHEREDELIM:
797 			/*
798 			 * here delimiters need a special case since
799 			 * $ and `...` are not to be treated specially
800 			 */
801 			switch (c) {
802 			case '\\':
803 				if ((c = getsc())) {
804 					/* trailing \ is lost */
805 					*wp++ = QCHAR;
806 					*wp++ = c;
807 				}
808 				break;
809 			case '\'':
810 				goto open_ssquote_unless_heredoc;
811 			case '$':
812 				if ((c2 = getsc()) == '\'') {
813  open_sequote:
814 					*wp++ = OQUOTE;
815 					ignore_backslash_newline++;
816 					PUSH_STATE(SEQUOTE);
817 					statep->ls_bool = false;
818 					break;
819 				} else if (c2 == '"') {
820 					/* FALLTHROUGH */
821 			case '"':
822 					PUSH_SRETRACE(SHEREDQUOTE);
823 					break;
824 				}
825 				ungetsc(c2);
826 				/* FALLTHROUGH */
827 			default:
828 				*wp++ = CHAR;
829 				*wp++ = c;
830 			}
831 			break;
832 
833 		/* " in << or <<- delimiter */
834 		case SHEREDQUOTE:
835 			if (c != '"')
836 				goto Subst;
837 			POP_SRETRACE();
838 			dp = strnul(sp) - 1;
839 			/* remove the trailing double quote */
840 			*dp = '\0';
841 			/* store the quoted string */
842 			*wp++ = OQUOTE;
843 			XcheckN(ws, wp, (dp - sp) * 2);
844 			dp = sp;
845 			while ((c = *dp++)) {
846 				if (c == '\\') {
847 					switch ((c = *dp++)) {
848 					case '\\':
849 					case '"':
850 					case '$':
851 					case '`':
852 						break;
853 					default:
854 						*wp++ = CHAR;
855 						*wp++ = '\\';
856 						break;
857 					}
858 				}
859 				*wp++ = CHAR;
860 				*wp++ = c;
861 			}
862 			afree(sp, ATEMP);
863 			*wp++ = CQUOTE;
864 			state = statep->type = SHEREDELIM;
865 			break;
866 
867 		/* in *(...|...) pattern (*+?@!) */
868 		case SPATTERN:
869 			if (c == /*(*/ ')') {
870 				*wp++ = CPAT;
871 				POP_STATE();
872 			} else if (c == '|') {
873 				*wp++ = SPAT;
874 			} else if (c == '(') {
875 				*wp++ = OPAT;
876 				/* simile for @ */
877 				*wp++ = ' ';
878 				PUSH_STATE(SPATTERN);
879 			} else
880 				goto Sbase1;
881 			break;
882 		}
883 	}
884  Done:
885 	Xcheck(ws, wp);
886 	if (statep != &states[1])
887 		/* XXX figure out what is missing */
888 		yyerror("no closing quote\n");
889 
890 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
891 	if (state == SHEREDELIM)
892 		state = SBASE;
893 
894 	dp = Xstring(ws, wp);
895 	if (state == SBASE && (
896 #ifndef MKSH_LEGACY_MODE
897 	    (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
898 #endif
899 	    c == '<' || c == '>') && ((c2 = Xlength(ws, wp)) == 0 ||
900 	    (c2 == 2 && dp[0] == CHAR && ksh_isdigit(dp[1])))) {
901 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
902 
903 		iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
904 
905 		if (c == '&') {
906 			if ((c2 = getsc()) != '>') {
907 				ungetsc(c2);
908 				goto no_iop;
909 			}
910 			c = c2;
911 			iop->ioflag = IOBASH;
912 		} else
913 			iop->ioflag = 0;
914 
915 		c2 = getsc();
916 		/* <<, >>, <> are ok, >< is not */
917 		if (c == c2 || (c == '<' && c2 == '>')) {
918 			iop->ioflag |= c == c2 ?
919 			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
920 			if (iop->ioflag == IOHERE) {
921 				if ((c2 = getsc()) == '-')
922 					iop->ioflag |= IOSKIP;
923 				else if (c2 == '<')
924 					iop->ioflag |= IOHERESTR;
925 				else
926 					ungetsc(c2);
927 			}
928 		} else if (c2 == '&')
929 			iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
930 		else {
931 			iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
932 			if (c == '>' && c2 == '|')
933 				iop->ioflag |= IOCLOB;
934 			else
935 				ungetsc(c2);
936 		}
937 
938 		iop->ioname = NULL;
939 		iop->delim = NULL;
940 		iop->heredoc = NULL;
941 		/* free word */
942 		Xfree(ws, wp);
943 		yylval.iop = iop;
944 		return (REDIR);
945  no_iop:
946 		afree(iop, ATEMP);
947 	}
948 
949 	if (wp == dp && state == SBASE) {
950 		/* free word */
951 		Xfree(ws, wp);
952 		/* no word, process LEX1 character */
953 		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
954 			if ((c2 = getsc()) == c)
955 				c = (c == ';') ? BREAK :
956 				    (c == '|') ? LOGOR :
957 				    (c == '&') ? LOGAND :
958 				    /* c == '(' ) */ MDPAREN;
959 			else if (c == '|' && c2 == '&')
960 				c = COPROC;
961 			else if (c == ';' && c2 == '|')
962 				c = BRKEV;
963 			else if (c == ';' && c2 == '&')
964 				c = BRKFT;
965 			else
966 				ungetsc(c2);
967 #ifndef MKSH_SMALL
968 			if (c == BREAK) {
969 				if ((c2 = getsc()) == '&')
970 					c = BRKEV;
971 				else
972 					ungetsc(c2);
973 			}
974 #endif
975 		} else if (c == '\n') {
976 			if (cf & HEREDELIM)
977 				ungetsc(c);
978 			else {
979 				gethere();
980 				if (cf & CONTIN)
981 					goto Again;
982 			}
983 		} else if (c == '\0' && !(cf & HEREDELIM)) {
984 			struct ioword **p = heres;
985 
986 			while (p < herep)
987 				if ((*p)->ioflag & IOHERESTR)
988 					++p;
989 				else
990 					/* ksh -c 'cat <<EOF' can cause this */
991 					yyerror(Tf_heredoc,
992 					    evalstr((*p)->delim, 0));
993 		}
994 		return (c);
995 	}
996 
997 	/* terminate word */
998 	*wp++ = EOS;
999 	yylval.cp = Xclose(ws, wp);
1000 	if (state == SWORD || state == SLETPAREN
1001 	    /* XXX ONEWORD? */)
1002 		return (LWORD);
1003 
1004 	/* unget terminator */
1005 	ungetsc(c);
1006 
1007 	/*
1008 	 * note: the alias-vs-function code below depends on several
1009 	 * interna: starting from here, source->str is not modified;
1010 	 * the way getsc() and ungetsc() operate; etc.
1011 	 */
1012 
1013 	/* copy word to unprefixed string ident */
1014 	sp = yylval.cp;
1015 	dp = ident;
1016 	while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1017 		*dp++ = *sp++;
1018 	if (c != EOS)
1019 		/* word is not unquoted */
1020 		dp = ident;
1021 	/* make sure the ident array stays NUL padded */
1022 	memset(dp, 0, (ident + IDENT) - dp + 1);
1023 
1024 	if (!(cf & (KEYWORD | ALIAS)))
1025 		return (LWORD);
1026 
1027 	if (*ident != '\0') {
1028 		struct tbl *p;
1029 		uint32_t h = hash(ident);
1030 
1031 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1032 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
1033 		    p->val.i == /*{*/ '}')) {
1034 			afree(yylval.cp, ATEMP);
1035 			return (p->val.i);
1036 		}
1037 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1038 		    (p->flag & ISSET)) {
1039 			/*
1040 			 * this still points to the same character as the
1041 			 * ungetsc'd terminator from above
1042 			 */
1043 			const char *cp = source->str;
1044 
1045 			/* prefer POSIX but not Korn functions over aliases */
1046 			while (*cp == ' ' || *cp == '\t')
1047 				/*
1048 				 * this is like getsc() without skipping
1049 				 * over Source boundaries (including not
1050 				 * parsing ungetsc'd characters that got
1051 				 * pushed into an SREREAD) which is what
1052 				 * we want here anyway: find out whether
1053 				 * the alias name is followed by a POSIX
1054 				 * function definition
1055 				 */
1056 				++cp;
1057 			/* prefer functions over aliases */
1058 			if (cp[0] != '(' || cp[1] != ')') {
1059 				Source *s = source;
1060 
1061 				while (s && (s->flags & SF_HASALIAS))
1062 					if (s->u.tblp == p)
1063 						return (LWORD);
1064 					else
1065 						s = s->next;
1066 				/* push alias expansion */
1067 				s = pushs(SALIAS, source->areap);
1068 				s->start = s->str = p->val.s;
1069 				s->u.tblp = p;
1070 				s->flags |= SF_HASALIAS;
1071 				s->next = source;
1072 				if (source->type == SEOF) {
1073 					/* prevent infinite recursion at EOS */
1074 					source->u.tblp = p;
1075 					source->flags |= SF_HASALIAS;
1076 				}
1077 				source = s;
1078 				afree(yylval.cp, ATEMP);
1079 				goto Again;
1080 			}
1081 		}
1082 	} else if (cf & ALIAS) {
1083 		/* retain typeset et al. even when quoted */
1084 		if (assign_command((dp = wdstrip(yylval.cp, 0)), true))
1085 			strlcpy(ident, dp, sizeof(ident));
1086 		afree(dp, ATEMP);
1087 	}
1088 
1089 	return (LWORD);
1090 }
1091 
1092 static void
gethere(void)1093 gethere(void)
1094 {
1095 	struct ioword **p;
1096 
1097 	for (p = heres; p < herep; p++)
1098 		if (!((*p)->ioflag & IOHERESTR))
1099 			readhere(*p);
1100 	herep = heres;
1101 }
1102 
1103 /*
1104  * read "<<word" text into temp file
1105  */
1106 
1107 static void
readhere(struct ioword * iop)1108 readhere(struct ioword *iop)
1109 {
1110 	int c;
1111 	const char *eof, *eofp;
1112 	XString xs;
1113 	char *xp;
1114 	size_t xpos;
1115 
1116 	eof = evalstr(iop->delim, 0);
1117 
1118 	if (!(iop->ioflag & IOEVAL))
1119 		ignore_backslash_newline++;
1120 
1121 	Xinit(xs, xp, 256, ATEMP);
1122 
1123  heredoc_read_line:
1124 	/* beginning of line */
1125 	eofp = eof;
1126 	xpos = Xsavepos(xs, xp);
1127 	if (iop->ioflag & IOSKIP) {
1128 		/* skip over leading tabs */
1129 		while ((c = getsc()) == '\t')
1130 			;	/* nothing */
1131 		goto heredoc_parse_char;
1132 	}
1133  heredoc_read_char:
1134 	c = getsc();
1135  heredoc_parse_char:
1136 	/* compare with here document marker */
1137 	if (!*eofp) {
1138 		/* end of here document marker, what to do? */
1139 		switch (c) {
1140 		case /*(*/ ')':
1141 			if (!subshell_nesting_type)
1142 				/*-
1143 				 * not allowed outside $(...) or (...)
1144 				 * => mismatch
1145 				 */
1146 				break;
1147 			/* allow $(...) or (...) to close here */
1148 			ungetsc(/*(*/ ')');
1149 			/* FALLTHROUGH */
1150 		case 0:
1151 			/*
1152 			 * Allow EOF here to commands without trailing
1153 			 * newlines (mksh -c '...') will work as well.
1154 			 */
1155 		case '\n':
1156 			/* Newline terminates here document marker */
1157 			goto heredoc_found_terminator;
1158 		}
1159 	} else if (c == *eofp++)
1160 		/* store; then read and compare next character */
1161 		goto heredoc_store_and_loop;
1162 	/* nope, mismatch; read until end of line */
1163 	while (c != '\n') {
1164 		if (!c)
1165 			/* oops, reached EOF */
1166 			yyerror(Tf_heredoc, eof);
1167 		/* store character */
1168 		Xcheck(xs, xp);
1169 		Xput(xs, xp, c);
1170 		/* read next character */
1171 		c = getsc();
1172 	}
1173 	/* we read a newline as last character */
1174  heredoc_store_and_loop:
1175 	/* store character */
1176 	Xcheck(xs, xp);
1177 	Xput(xs, xp, c);
1178 	if (c == '\n')
1179 		goto heredoc_read_line;
1180 	goto heredoc_read_char;
1181 
1182  heredoc_found_terminator:
1183 	/* jump back to saved beginning of line */
1184 	xp = Xrestpos(xs, xp, xpos);
1185 	/* terminate, close and store */
1186 	Xput(xs, xp, '\0');
1187 	iop->heredoc = Xclose(xs, xp);
1188 
1189 	if (!(iop->ioflag & IOEVAL))
1190 		ignore_backslash_newline--;
1191 }
1192 
1193 void
yyerror(const char * fmt,...)1194 yyerror(const char *fmt, ...)
1195 {
1196 	va_list va;
1197 
1198 	/* pop aliases and re-reads */
1199 	while (source->type == SALIAS || source->type == SREREAD)
1200 		source = source->next;
1201 	/* zap pending input */
1202 	source->str = null;
1203 
1204 	error_prefix(true);
1205 	va_start(va, fmt);
1206 	shf_vfprintf(shl_out, fmt, va);
1207 	va_end(va);
1208 	errorfz();
1209 }
1210 
1211 /*
1212  * input for yylex with alias expansion
1213  */
1214 
1215 Source *
pushs(int type,Area * areap)1216 pushs(int type, Area *areap)
1217 {
1218 	Source *s;
1219 
1220 	s = alloc(sizeof(Source), areap);
1221 	memset(s, 0, sizeof(Source));
1222 	s->type = type;
1223 	s->str = null;
1224 	s->areap = areap;
1225 	if (type == SFILE || type == SSTDIN)
1226 		XinitN(s->xs, 256, s->areap);
1227 	return (s);
1228 }
1229 
1230 static int
getsc_uu(void)1231 getsc_uu(void)
1232 {
1233 	Source *s = source;
1234 	int c;
1235 
1236 	while ((c = *s->str++) == 0) {
1237 		/* return 0 for EOF by default */
1238 		s->str = NULL;
1239 		switch (s->type) {
1240 		case SEOF:
1241 			s->str = null;
1242 			return (0);
1243 
1244 		case SSTDIN:
1245 		case SFILE:
1246 			getsc_line(s);
1247 			break;
1248 
1249 		case SWSTR:
1250 			break;
1251 
1252 		case SSTRING:
1253 		case SSTRINGCMDLINE:
1254 			break;
1255 
1256 		case SWORDS:
1257 			s->start = s->str = *s->u.strv++;
1258 			s->type = SWORDSEP;
1259 			break;
1260 
1261 		case SWORDSEP:
1262 			if (*s->u.strv == NULL) {
1263 				s->start = s->str = "\n";
1264 				s->type = SEOF;
1265 			} else {
1266 				s->start = s->str = T1space;
1267 				s->type = SWORDS;
1268 			}
1269 			break;
1270 
1271 		case SALIAS:
1272 			if (s->flags & SF_ALIASEND) {
1273 				/* pass on an unused SF_ALIAS flag */
1274 				source = s->next;
1275 				source->flags |= s->flags & SF_ALIAS;
1276 				s = source;
1277 			} else if (*s->u.tblp->val.s &&
1278 			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1279 				/* pop source stack */
1280 				source = s = s->next;
1281 				/*
1282 				 * Note that this alias ended with a
1283 				 * space, enabling alias expansion on
1284 				 * the following word.
1285 				 */
1286 				s->flags |= SF_ALIAS;
1287 			} else {
1288 				/*
1289 				 * At this point, we need to keep the current
1290 				 * alias in the source list so recursive
1291 				 * aliases can be detected and we also need to
1292 				 * return the next character. Do this by
1293 				 * temporarily popping the alias to get the
1294 				 * next character and then put it back in the
1295 				 * source list with the SF_ALIASEND flag set.
1296 				 */
1297 				/* pop source stack */
1298 				source = s->next;
1299 				source->flags |= s->flags & SF_ALIAS;
1300 				c = getsc_uu();
1301 				if (c) {
1302 					s->flags |= SF_ALIASEND;
1303 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1304 					s->start = s->str = s->ugbuf;
1305 					s->next = source;
1306 					source = s;
1307 				} else {
1308 					s = source;
1309 					/* avoid reading EOF twice */
1310 					s->str = NULL;
1311 					break;
1312 				}
1313 			}
1314 			continue;
1315 
1316 		case SREREAD:
1317 			if (s->start != s->ugbuf)
1318 				/* yuck */
1319 				afree(s->u.freeme, ATEMP);
1320 			source = s = s->next;
1321 			continue;
1322 		}
1323 		if (s->str == NULL) {
1324 			s->type = SEOF;
1325 			s->start = s->str = null;
1326 			return ('\0');
1327 		}
1328 		if (s->flags & SF_ECHO) {
1329 			shf_puts(s->str, shl_out);
1330 			shf_flush(shl_out);
1331 		}
1332 	}
1333 	return (c);
1334 }
1335 
1336 static void
getsc_line(Source * s)1337 getsc_line(Source *s)
1338 {
1339 	char *xp = Xstring(s->xs, xp), *cp;
1340 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1341 	bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1342 
1343 	/* Done here to ensure nothing odd happens when a timeout occurs */
1344 	XcheckN(s->xs, xp, LINE);
1345 	*xp = '\0';
1346 	s->start = s->str = xp;
1347 
1348 	if (have_tty && ksh_tmout) {
1349 		ksh_tmout_state = TMOUT_READING;
1350 		alarm(ksh_tmout);
1351 	}
1352 	if (interactive) {
1353 		if (cur_prompt == PS1)
1354 			histsave(&s->line, NULL, HIST_FLUSH, true);
1355 		change_winsz();
1356 	}
1357 #ifndef MKSH_NO_CMDLINE_EDITING
1358 	if (have_tty && (
1359 #if !MKSH_S_NOVI
1360 	    Flag(FVI) ||
1361 #endif
1362 	    Flag(FEMACS) || Flag(FGMACS))) {
1363 		int nread;
1364 
1365 		nread = x_read(xp);
1366 		if (nread < 0)
1367 			/* read error */
1368 			nread = 0;
1369 		xp[nread] = '\0';
1370 		xp += nread;
1371 	} else
1372 #endif
1373 	  {
1374 		if (interactive)
1375 			pprompt(prompt, 0);
1376 		else
1377 			s->line++;
1378 
1379 		while (/* CONSTCOND */ 1) {
1380 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1381 
1382 			if (!p && shf_error(s->u.shf) &&
1383 			    shf_errno(s->u.shf) == EINTR) {
1384 				shf_clearerr(s->u.shf);
1385 				if (trap)
1386 					runtraps(0);
1387 				continue;
1388 			}
1389 			if (!p || (xp = p, xp[-1] == '\n'))
1390 				break;
1391 			/* double buffer size */
1392 			/* move past NUL so doubling works... */
1393 			xp++;
1394 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1395 			/* ...and move back again */
1396 			xp--;
1397 		}
1398 		/*
1399 		 * flush any unwanted input so other programs/builtins
1400 		 * can read it. Not very optimal, but less error prone
1401 		 * than flushing else where, dealing with redirections,
1402 		 * etc.
1403 		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1404 		 */
1405 		if (s->type == SSTDIN)
1406 			shf_flush(s->u.shf);
1407 	}
1408 	/*
1409 	 * XXX: temporary kludge to restore source after a
1410 	 * trap may have been executed.
1411 	 */
1412 	source = s;
1413 	if (have_tty && ksh_tmout) {
1414 		ksh_tmout_state = TMOUT_EXECUTING;
1415 		alarm(0);
1416 	}
1417 	cp = Xstring(s->xs, xp);
1418 	rndpush(cp);
1419 	s->start = s->str = cp;
1420 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1421 	/* Note: if input is all nulls, this is not eof */
1422 	if (Xlength(s->xs, xp) == 0) {
1423 		/* EOF */
1424 		if (s->type == SFILE)
1425 			shf_fdclose(s->u.shf);
1426 		s->str = NULL;
1427 	} else if (interactive && *s->str) {
1428 		if (cur_prompt != PS1)
1429 			histsave(&s->line, s->str, HIST_APPEND, true);
1430 		else if (!ctype(*s->str, C_IFS | C_IFSWS))
1431 			histsave(&s->line, s->str, HIST_QUEUE, true);
1432 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1433 		else
1434 			goto check_for_sole_return;
1435 	} else if (interactive && cur_prompt == PS1) {
1436  check_for_sole_return:
1437 		cp = Xstring(s->xs, xp);
1438 		while (*cp && ctype(*cp, C_IFSWS))
1439 			++cp;
1440 		if (!*cp) {
1441 			histsave(&s->line, NULL, HIST_FLUSH, true);
1442 			histsync();
1443 		}
1444 #endif
1445 	}
1446 	if (interactive)
1447 		set_prompt(PS2, NULL);
1448 }
1449 
1450 void
set_prompt(int to,Source * s)1451 set_prompt(int to, Source *s)
1452 {
1453 	cur_prompt = (uint8_t)to;
1454 
1455 	switch (to) {
1456 	/* command */
1457 	case PS1:
1458 		/*
1459 		 * Substitute ! and !! here, before substitutions are done
1460 		 * so ! in expanded variables are not expanded.
1461 		 * NOTE: this is not what AT&T ksh does (it does it after
1462 		 * substitutions, POSIX doesn't say which is to be done.
1463 		 */
1464 		{
1465 			struct shf *shf;
1466 			char * volatile ps1;
1467 			Area *saved_atemp;
1468 			int saved_lineno;
1469 
1470 			ps1 = str_val(global("PS1"));
1471 			shf = shf_sopen(NULL, strlen(ps1) * 2,
1472 			    SHF_WR | SHF_DYNAMIC, NULL);
1473 			while (*ps1)
1474 				if (*ps1 != '!' || *++ps1 == '!')
1475 					shf_putchar(*ps1++, shf);
1476 				else
1477 					shf_fprintf(shf, Tf_lu, s ?
1478 					    (unsigned long)s->line + 1 : 0UL);
1479 			ps1 = shf_sclose(shf);
1480 			saved_lineno = current_lineno;
1481 			if (s)
1482 				current_lineno = s->line + 1;
1483 			saved_atemp = ATEMP;
1484 			newenv(E_ERRH);
1485 			if (kshsetjmp(e->jbuf)) {
1486 				prompt = safe_prompt;
1487 				/*
1488 				 * Don't print an error - assume it has already
1489 				 * been printed. Reason is we may have forked
1490 				 * to run a command and the child may be
1491 				 * unwinding its stack through this code as it
1492 				 * exits.
1493 				 */
1494 			} else {
1495 				char *cp = substitute(ps1, 0);
1496 				strdupx(prompt, cp, saved_atemp);
1497 			}
1498 			current_lineno = saved_lineno;
1499 			quitenv(NULL);
1500 		}
1501 		break;
1502 	/* command continuation */
1503 	case PS2:
1504 		prompt = str_val(global("PS2"));
1505 		break;
1506 	}
1507 }
1508 
1509 int
pprompt(const char * cp,int ntruncate)1510 pprompt(const char *cp, int ntruncate)
1511 {
1512 	char delimiter = 0;
1513 	bool doprint = (ntruncate != -1);
1514 	bool indelimit = false;
1515 	int columns = 0, lines = 0;
1516 
1517 	/*
1518 	 * Undocumented AT&T ksh feature:
1519 	 * If the second char in the prompt string is \r then the first
1520 	 * char is taken to be a non-printing delimiter and any chars
1521 	 * between two instances of the delimiter are not considered to
1522 	 * be part of the prompt length
1523 	 */
1524 	if (*cp && cp[1] == '\r') {
1525 		delimiter = *cp;
1526 		cp += 2;
1527 	}
1528 	for (; *cp; cp++) {
1529 		if (indelimit && *cp != delimiter)
1530 			;
1531 		else if (*cp == '\n' || *cp == '\r') {
1532 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1533 			columns = 0;
1534 		} else if (*cp == '\t') {
1535 			columns = (columns | 7) + 1;
1536 		} else if (*cp == '\b') {
1537 			if (columns > 0)
1538 				columns--;
1539 		} else if (*cp == delimiter)
1540 			indelimit = !indelimit;
1541 		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1542 			const char *cp2;
1543 			columns += utf_widthadj(cp, &cp2);
1544 			if (doprint && (indelimit ||
1545 			    (ntruncate < (x_cols * lines + columns))))
1546 				shf_write(cp, cp2 - cp, shl_out);
1547 			cp = cp2 - /* loop increment */ 1;
1548 			continue;
1549 		} else
1550 			columns++;
1551 		if (doprint && (*cp != delimiter) &&
1552 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
1553 			shf_putc(*cp, shl_out);
1554 	}
1555 	if (doprint)
1556 		shf_flush(shl_out);
1557 	return (x_cols * lines + columns);
1558 }
1559 
1560 /*
1561  * Read the variable part of a ${...} expression (i.e. up to but not
1562  * including the :[-+?=#%] or close-brace).
1563  */
1564 static char *
get_brace_var(XString * wsp,char * wp)1565 get_brace_var(XString *wsp, char *wp)
1566 {
1567 	char c;
1568 	enum parse_state {
1569 		PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
1570 		PS_IDENT, PS_NUMBER, PS_VAR1
1571 	} state = PS_INITIAL;
1572 
1573 	while (/* CONSTCOND */ 1) {
1574 		c = getsc();
1575 		/* State machine to figure out where the variable part ends. */
1576 		switch (state) {
1577 		case PS_SAW_HASH:
1578 			if (ctype(c, C_VAR1)) {
1579 				char c2;
1580 
1581 				c2 = getsc();
1582 				ungetsc(c2);
1583 				if (c2 != /*{*/ '}') {
1584 					ungetsc(c);
1585 					goto out;
1586 				}
1587 			}
1588 			goto ps_common;
1589 		case PS_SAW_BANG:
1590 			switch (c) {
1591 			case '@':
1592 			case '#':
1593 			case '-':
1594 			case '?':
1595 				goto out;
1596 			}
1597 			goto ps_common;
1598 		case PS_INITIAL:
1599 			switch (c) {
1600 			case '%':
1601 				state = PS_SAW_PERCENT;
1602 				goto next;
1603 			case '#':
1604 				state = PS_SAW_HASH;
1605 				goto next;
1606 			case '!':
1607 				state = PS_SAW_BANG;
1608 				goto next;
1609 			}
1610 			/* FALLTHROUGH */
1611 		case PS_SAW_PERCENT:
1612  ps_common:
1613 			if (ksh_isalphx(c))
1614 				state = PS_IDENT;
1615 			else if (ksh_isdigit(c))
1616 				state = PS_NUMBER;
1617 			else if (ctype(c, C_VAR1))
1618 				state = PS_VAR1;
1619 			else
1620 				goto out;
1621 			break;
1622 		case PS_IDENT:
1623 			if (!ksh_isalnux(c)) {
1624 				if (c == '[') {
1625 					char *tmp, *p;
1626 
1627 					if (!arraysub(&tmp))
1628 						yyerror("missing ]\n");
1629 					*wp++ = c;
1630 					for (p = tmp; *p; ) {
1631 						Xcheck(*wsp, wp);
1632 						*wp++ = *p++;
1633 					}
1634 					afree(tmp, ATEMP);
1635 					/* the ] */
1636 					c = getsc();
1637 				}
1638 				goto out;
1639 			}
1640  next:
1641 			break;
1642 		case PS_NUMBER:
1643 			if (!ksh_isdigit(c))
1644 				goto out;
1645 			break;
1646 		case PS_VAR1:
1647 			goto out;
1648 		}
1649 		Xcheck(*wsp, wp);
1650 		*wp++ = c;
1651 	}
1652  out:
1653 	/* end of variable part */
1654 	*wp++ = '\0';
1655 	ungetsc(c);
1656 	return (wp);
1657 }
1658 
1659 /*
1660  * Save an array subscript - returns true if matching bracket found, false
1661  * if eof or newline was found.
1662  * (Returned string double null terminated)
1663  */
1664 static bool
arraysub(char ** strp)1665 arraysub(char **strp)
1666 {
1667 	XString ws;
1668 	char *wp, c;
1669 	/* we are just past the initial [ */
1670 	unsigned int depth = 1;
1671 
1672 	Xinit(ws, wp, 32, ATEMP);
1673 
1674 	do {
1675 		c = getsc();
1676 		Xcheck(ws, wp);
1677 		*wp++ = c;
1678 		if (c == '[')
1679 			depth++;
1680 		else if (c == ']')
1681 			depth--;
1682 	} while (depth > 0 && c && c != '\n');
1683 
1684 	*wp++ = '\0';
1685 	*strp = Xclose(ws, wp);
1686 
1687 	return (tobool(depth == 0));
1688 }
1689 
1690 /* Unget a char: handles case when we are already at the start of the buffer */
1691 static void
ungetsc(int c)1692 ungetsc(int c)
1693 {
1694 	struct sretrace_info *rp = retrace_info;
1695 
1696 	if (backslash_skip)
1697 		backslash_skip--;
1698 	/* Don't unget EOF... */
1699 	if (source->str == null && c == '\0')
1700 		return;
1701 	while (rp) {
1702 		if (Xlength(rp->xs, rp->xp))
1703 			rp->xp--;
1704 		rp = rp->next;
1705 	}
1706 	ungetsc_i(c);
1707 }
1708 static void
ungetsc_i(int c)1709 ungetsc_i(int c)
1710 {
1711 	if (source->str > source->start)
1712 		source->str--;
1713 	else {
1714 		Source *s;
1715 
1716 		s = pushs(SREREAD, source->areap);
1717 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1718 		s->start = s->str = s->ugbuf;
1719 		s->next = source;
1720 		source = s;
1721 	}
1722 }
1723 
1724 
1725 /* Called to get a char that isn't a \newline sequence. */
1726 static int
getsc_bn(void)1727 getsc_bn(void)
1728 {
1729 	int c, c2;
1730 
1731 	if (ignore_backslash_newline)
1732 		return (o_getsc_u());
1733 
1734 	if (backslash_skip == 1) {
1735 		backslash_skip = 2;
1736 		return (o_getsc_u());
1737 	}
1738 
1739 	backslash_skip = 0;
1740 
1741 	while (/* CONSTCOND */ 1) {
1742 		c = o_getsc_u();
1743 		if (c == '\\') {
1744 			if ((c2 = o_getsc_u()) == '\n')
1745 				/* ignore the \newline; get the next char... */
1746 				continue;
1747 			ungetsc_i(c2);
1748 			backslash_skip = 1;
1749 		}
1750 		return (c);
1751 	}
1752 }
1753 
1754 void
yyskiputf8bom(void)1755 yyskiputf8bom(void)
1756 {
1757 	int c;
1758 
1759 	if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1760 		ungetsc_i(c);
1761 		return;
1762 	}
1763 	if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1764 		ungetsc_i(c);
1765 		ungetsc_i(0xEF);
1766 		return;
1767 	}
1768 	if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1769 		ungetsc_i(c);
1770 		ungetsc_i(0xBB);
1771 		ungetsc_i(0xEF);
1772 		return;
1773 	}
1774 	UTFMODE |= 8;
1775 }
1776 
1777 static Lex_state *
push_state_i(State_info * si,Lex_state * old_end)1778 push_state_i(State_info *si, Lex_state *old_end)
1779 {
1780 	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1781 
1782 	news[0].ls_base = old_end;
1783 	si->base = &news[0];
1784 	si->end = &news[STATE_BSIZE];
1785 	return (&news[1]);
1786 }
1787 
1788 static Lex_state *
pop_state_i(State_info * si,Lex_state * old_end)1789 pop_state_i(State_info *si, Lex_state *old_end)
1790 {
1791 	Lex_state *old_base = si->base;
1792 
1793 	si->base = old_end->ls_base - STATE_BSIZE;
1794 	si->end = old_end->ls_base;
1795 
1796 	afree(old_base, ATEMP);
1797 
1798 	return (si->base + STATE_BSIZE - 1);
1799 }
1800