1 /*	$OpenBSD: lex.c,v 1.49 2013/12/17 16:37:06 deraadt Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *		 2011, 2012, 2013, 2014, 2015
6  *	Thorsten Glaser <tg@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23 
24 #include "sh.h"
25 
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.193.2.5 2015/04/19 19:18:19 tg Exp $");
27 
28 /*
29  * states while lexing word
30  */
31 #define SBASE		0	/* outside any lexical constructs */
32 #define SWORD		1	/* implicit quoting for substitute() */
33 #define SLETPAREN	2	/* inside (( )), implicit quoting */
34 #define SSQUOTE		3	/* inside '' */
35 #define SDQUOTE		4	/* inside "" */
36 #define SEQUOTE		5	/* inside $'' */
37 #define SBRACE		6	/* inside ${} */
38 #define SQBRACE		7	/* inside "${}" */
39 #define SBQUOTE		8	/* inside `` */
40 #define SASPAREN	9	/* inside $(( )) */
41 #define SHEREDELIM	10	/* parsing <<,<<-,<<< delimiter */
42 #define SHEREDQUOTE	11	/* parsing " in <<,<<-,<<< delimiter */
43 #define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM		13	/* like SBASE, looking for delimiter */
45 #define STBRACEKORN	14	/* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE	15	/* parsing ${...[#%]...} FSH */
47 #define SINVALID	255	/* invalid state */
48 
49 struct sretrace_info {
50 	struct sretrace_info *next;
51 	XString xs;
52 	char *xp;
53 };
54 
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60 	union {
61 		/* point to the next state block */
62 		struct lex_state *base;
63 		/* marks start of state output in output string */
64 		int start;
65 		/* SBQUOTE: true if in double quotes: "`...`" */
66 		/* SEQUOTE: got NUL, ignore rest of string */
67 		bool abool;
68 		/* SADELIM information */
69 		struct {
70 			/* character to search for */
71 			unsigned char delimiter;
72 			/* max. number of delimiters */
73 			unsigned char num;
74 		} adelim;
75 	} u;
76 	/* count open parentheses */
77 	short nparen;
78 	/* type of this state */
79 	uint8_t type;
80 } Lex_state;
81 #define ls_base		u.base
82 #define ls_start	u.start
83 #define ls_bool		u.abool
84 #define ls_adelim	u.adelim
85 
86 typedef struct {
87 	Lex_state *base;
88 	Lex_state *end;
89 } State_info;
90 
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int s_get(void);
98 static void s_put(int);
99 static char *get_brace_var(XString *, char *);
100 static bool arraysub(char **);
101 static void gethere(bool);
102 static Lex_state *push_state_i(State_info *, Lex_state *);
103 static Lex_state *pop_state_i(State_info *, Lex_state *);
104 
105 static int backslash_skip;
106 static int ignore_backslash_newline;
107 
108 /* optimised getsc_bn() */
109 #define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
110 			    !backslash_skip ? *source->str++ : getsc_bn())
111 /* optimised getsc_uu() */
112 #define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
113 
114 /* retrace helper */
115 #define o_getsc_r(carg)	{				\
116 	int cev = (carg);				\
117 	struct sretrace_info *rp = retrace_info;	\
118 							\
119 	while (rp) {					\
120 		Xcheck(rp->xs, rp->xp);			\
121 		*rp->xp++ = cev;			\
122 		rp = rp->next;				\
123 	}						\
124 							\
125 	return (cev);					\
126 }
127 
128 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
129 static int getsc(void);
130 
131 static int
getsc(void)132 getsc(void)
133 {
134 	o_getsc_r(o_getsc());
135 }
136 #else
137 static int getsc_r(int);
138 
139 static int
getsc_r(int c)140 getsc_r(int c)
141 {
142 	o_getsc_r(c);
143 }
144 
145 #define getsc()		getsc_r(o_getsc())
146 #endif
147 
148 #define STATE_BSIZE	8
149 
150 #define PUSH_STATE(s)	do {					\
151 	if (++statep == state_info.end)				\
152 		statep = push_state_i(&state_info, statep);	\
153 	state = statep->type = (s);				\
154 } while (/* CONSTCOND */ 0)
155 
156 #define POP_STATE()	do {					\
157 	if (--statep == state_info.base)			\
158 		statep = pop_state_i(&state_info, statep);	\
159 	state = statep->type;					\
160 } while (/* CONSTCOND */ 0)
161 
162 #define PUSH_SRETRACE(s) do {					\
163 	struct sretrace_info *ri;				\
164 								\
165 	PUSH_STATE(s);						\
166 	statep->ls_start = Xsavepos(ws, wp);			\
167 	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
168 	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
169 	ri->next = retrace_info;				\
170 	retrace_info = ri;					\
171 } while (/* CONSTCOND */ 0)
172 
173 #define POP_SRETRACE()	do {					\
174 	wp = Xrestpos(ws, wp, statep->ls_start);		\
175 	*retrace_info->xp = '\0';				\
176 	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
177 	dp = (void *)retrace_info;				\
178 	retrace_info = retrace_info->next;			\
179 	afree(dp, ATEMP);					\
180 	POP_STATE();						\
181 } while (/* CONSTCOND */ 0)
182 
183 /**
184  * Lexical analyser
185  *
186  * tokens are not regular expressions, they are LL(1).
187  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
188  * hence the state stack. Note "$(...)" are now parsed recursively.
189  */
190 
191 int
yylex(int cf)192 yylex(int cf)
193 {
194 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
195 	State_info state_info;
196 	int c, c2, state;
197 	size_t cz;
198 	XString ws;		/* expandable output word */
199 	char *wp;		/* output word pointer */
200 	char *sp, *dp;
201 
202  Again:
203 	states[0].type = SINVALID;
204 	states[0].ls_base = NULL;
205 	statep = &states[1];
206 	state_info.base = states;
207 	state_info.end = &state_info.base[STATE_BSIZE];
208 
209 	Xinit(ws, wp, 64, ATEMP);
210 
211 	backslash_skip = 0;
212 	ignore_backslash_newline = 0;
213 
214 	if (cf & ONEWORD)
215 		state = SWORD;
216 	else if (cf & LETEXPR) {
217 		/* enclose arguments in (double) quotes */
218 		*wp++ = OQUOTE;
219 		state = SLETPAREN;
220 		statep->nparen = 0;
221 	} else {
222 		/* normal lexing */
223 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
224 		while ((c = getsc()) == ' ' || c == '\t')
225 			;
226 		if (c == '#') {
227 			ignore_backslash_newline++;
228 			while ((c = getsc()) != '\0' && c != '\n')
229 				;
230 			ignore_backslash_newline--;
231 		}
232 		ungetsc(c);
233 	}
234 	if (source->flags & SF_ALIAS) {
235 		/* trailing ' ' in alias definition */
236 		source->flags &= ~SF_ALIAS;
237 		cf |= ALIAS;
238 	}
239 
240 	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
241 	statep->type = state;
242 
243 	/* check for here string */
244 	if (state == SHEREDELIM) {
245 		c = getsc();
246 		if (c == '<') {
247 			state = SHEREDELIM;
248 			while ((c = getsc()) == ' ' || c == '\t')
249 				;
250 			ungetsc(c);
251 			c = '<';
252 			goto accept_nonword;
253 		}
254 		ungetsc(c);
255 	}
256 
257 	/* collect non-special or quoted characters to form word */
258 	while (!((c = getsc()) == 0 ||
259 	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
260 		if (state == SBASE &&
261 		    subshell_nesting_type == /*{*/ '}' &&
262 		    c == /*{*/ '}')
263 			/* possibly end ${ :;} */
264 			break;
265  accept_nonword:
266 		Xcheck(ws, wp);
267 		switch (state) {
268 		case SADELIM:
269 			if (c == '(')
270 				statep->nparen++;
271 			else if (c == ')')
272 				statep->nparen--;
273 			else if (statep->nparen == 0 && (c == /*{*/ '}' ||
274 			    c == (int)statep->ls_adelim.delimiter)) {
275 				*wp++ = ADELIM;
276 				*wp++ = c;
277 				if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
278 					POP_STATE();
279 				if (c == /*{*/ '}')
280 					POP_STATE();
281 				break;
282 			}
283 			/* FALLTHROUGH */
284 		case SBASE:
285 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
286 				/* temporary */
287 				*wp = EOS;
288 				if (is_wdvarname(Xstring(ws, wp), false)) {
289 					char *p, *tmp;
290 
291 					if (arraysub(&tmp)) {
292 						*wp++ = CHAR;
293 						*wp++ = c;
294 						for (p = tmp; *p; ) {
295 							Xcheck(ws, wp);
296 							*wp++ = CHAR;
297 							*wp++ = *p++;
298 						}
299 						afree(tmp, ATEMP);
300 						break;
301 					} else {
302 						Source *s;
303 
304 						s = pushs(SREREAD,
305 						    source->areap);
306 						s->start = s->str =
307 						    s->u.freeme = tmp;
308 						s->next = source;
309 						source = s;
310 					}
311 				}
312 				*wp++ = CHAR;
313 				*wp++ = c;
314 				break;
315 			}
316 			/* FALLTHROUGH */
317  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
318 			if (c == '*' || c == '@' || c == '+' || c == '?' ||
319 			    c == '!') {
320 				c2 = getsc();
321 				if (c2 == '(' /*)*/ ) {
322 					*wp++ = OPAT;
323 					*wp++ = c;
324 					PUSH_STATE(SPATTERN);
325 					break;
326 				}
327 				ungetsc(c2);
328 			}
329 			/* FALLTHROUGH */
330  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
331 			switch (c) {
332 			case '\\':
333  getsc_qchar:
334 				if ((c = getsc())) {
335 					/* trailing \ is lost */
336 					*wp++ = QCHAR;
337 					*wp++ = c;
338 				}
339 				break;
340 			case '\'':
341  open_ssquote_unless_heredoc:
342 				if ((cf & HEREDOC))
343 					goto store_char;
344 				*wp++ = OQUOTE;
345 				ignore_backslash_newline++;
346 				PUSH_STATE(SSQUOTE);
347 				break;
348 			case '"':
349  open_sdquote:
350 				*wp++ = OQUOTE;
351 				PUSH_STATE(SDQUOTE);
352 				break;
353 			case '$':
354 				/*
355 				 * processing of dollar sign belongs into
356 				 * Subst, except for those which can open
357 				 * a string: $'…' and $"…"
358 				 */
359  subst_dollar_ex:
360 				c = getsc();
361 				switch (c) {
362 				case '"':
363 					goto open_sdquote;
364 				case '\'':
365 					goto open_sequote;
366 				default:
367 					goto SubstS;
368 				}
369 			default:
370 				goto Subst;
371 			}
372 			break;
373 
374  Subst:
375 			switch (c) {
376 			case '\\':
377 				c = getsc();
378 				switch (c) {
379 				case '"':
380 					if ((cf & HEREDOC))
381 						goto heredocquote;
382 					/* FALLTHROUGH */
383 				case '\\':
384 				case '$': case '`':
385  store_qchar:
386 					*wp++ = QCHAR;
387 					*wp++ = c;
388 					break;
389 				default:
390  heredocquote:
391 					Xcheck(ws, wp);
392 					if (c) {
393 						/* trailing \ is lost */
394 						*wp++ = CHAR;
395 						*wp++ = '\\';
396 						*wp++ = CHAR;
397 						*wp++ = c;
398 					}
399 					break;
400 				}
401 				break;
402 			case '$':
403 				c = getsc();
404  SubstS:
405 				if (c == '(') /*)*/ {
406 					c = getsc();
407 					if (c == '(') /*)*/ {
408 						*wp++ = EXPRSUB;
409 						PUSH_SRETRACE(SASPAREN);
410 						statep->nparen = 2;
411 						*retrace_info->xp++ = '(';
412 					} else {
413 						ungetsc(c);
414  subst_command:
415 						c = COMSUB;
416  subst_command2:
417 						sp = yyrecursive(c);
418 						cz = strlen(sp) + 1;
419 						XcheckN(ws, wp, cz);
420 						*wp++ = c;
421 						memcpy(wp, sp, cz);
422 						wp += cz;
423 					}
424 				} else if (c == '{') /*}*/ {
425 					if ((c = getsc()) == '|') {
426 						/*
427 						 * non-subenvironment
428 						 * value substitution
429 						 */
430 						c = VALSUB;
431 						goto subst_command2;
432 					} else if (ctype(c, C_IFSWS)) {
433 						/*
434 						 * non-subenvironment
435 						 * "command" substitution
436 						 */
437 						c = FUNSUB;
438 						goto subst_command2;
439 					}
440 					ungetsc(c);
441 					*wp++ = OSUBST;
442 					*wp++ = '{'; /*}*/
443 					wp = get_brace_var(&ws, wp);
444 					c = getsc();
445 					/* allow :# and :% (ksh88 compat) */
446 					if (c == ':') {
447 						*wp++ = CHAR;
448 						*wp++ = c;
449 						c = getsc();
450 						if (c == ':') {
451 							*wp++ = CHAR;
452 							*wp++ = '0';
453 							*wp++ = ADELIM;
454 							*wp++ = ':';
455 							PUSH_STATE(SBRACE);
456 							PUSH_STATE(SADELIM);
457 							statep->ls_adelim.delimiter = ':';
458 							statep->ls_adelim.num = 1;
459 							statep->nparen = 0;
460 							break;
461 						} else if (ksh_isdigit(c) ||
462 						    c == '('/*)*/ || c == ' ' ||
463 						    /*XXX what else? */
464 						    c == '$') {
465 							/* substring subst. */
466 							if (c != ' ') {
467 								*wp++ = CHAR;
468 								*wp++ = ' ';
469 							}
470 							ungetsc(c);
471 							PUSH_STATE(SBRACE);
472 							PUSH_STATE(SADELIM);
473 							statep->ls_adelim.delimiter = ':';
474 							statep->ls_adelim.num = 2;
475 							statep->nparen = 0;
476 							break;
477 						}
478 					} else if (c == '/') {
479 						*wp++ = CHAR;
480 						*wp++ = c;
481 						if ((c = getsc()) == '/') {
482 							*wp++ = ADELIM;
483 							*wp++ = c;
484 						} else
485 							ungetsc(c);
486 						PUSH_STATE(SBRACE);
487 						PUSH_STATE(SADELIM);
488 						statep->ls_adelim.delimiter = '/';
489 						statep->ls_adelim.num = 1;
490 						statep->nparen = 0;
491 						break;
492 					}
493 					/*
494 					 * If this is a trim operation,
495 					 * treat (,|,) specially in STBRACE.
496 					 */
497 					if (ctype(c, C_SUBOP2)) {
498 						ungetsc(c);
499 						if (Flag(FSH))
500 							PUSH_STATE(STBRACEBOURNE);
501 						else
502 							PUSH_STATE(STBRACEKORN);
503 					} else {
504 						ungetsc(c);
505 						if (state == SDQUOTE ||
506 						    state == SQBRACE)
507 							PUSH_STATE(SQBRACE);
508 						else
509 							PUSH_STATE(SBRACE);
510 					}
511 				} else if (ksh_isalphx(c)) {
512 					*wp++ = OSUBST;
513 					*wp++ = 'X';
514 					do {
515 						Xcheck(ws, wp);
516 						*wp++ = c;
517 						c = getsc();
518 					} while (ksh_isalnux(c));
519 					*wp++ = '\0';
520 					*wp++ = CSUBST;
521 					*wp++ = 'X';
522 					ungetsc(c);
523 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
524 					Xcheck(ws, wp);
525 					*wp++ = OSUBST;
526 					*wp++ = 'X';
527 					*wp++ = c;
528 					*wp++ = '\0';
529 					*wp++ = CSUBST;
530 					*wp++ = 'X';
531 				} else {
532 					*wp++ = CHAR;
533 					*wp++ = '$';
534 					ungetsc(c);
535 				}
536 				break;
537 			case '`':
538  subst_gravis:
539 				PUSH_STATE(SBQUOTE);
540 				*wp++ = COMSUB;
541 				/*
542 				 * Need to know if we are inside double quotes
543 				 * since sh/AT&T-ksh translate the \" to " in
544 				 * "`...\"...`".
545 				 * This is not done in POSIX mode (section
546 				 * 3.2.3, Double Quotes: "The backquote shall
547 				 * retain its special meaning introducing the
548 				 * other form of command substitution (see
549 				 * 3.6.3). The portion of the quoted string
550 				 * from the initial backquote and the
551 				 * characters up to the next backquote that
552 				 * is not preceded by a backslash (having
553 				 * escape characters removed) defines that
554 				 * command whose output replaces `...` when
555 				 * the word is expanded."
556 				 * Section 3.6.3, Command Substitution:
557 				 * "Within the backquoted style of command
558 				 * substitution, backslash shall retain its
559 				 * literal meaning, except when followed by
560 				 * $ ` \.").
561 				 */
562 				statep->ls_bool = false;
563 				s2 = statep;
564 				base = state_info.base;
565 				while (/* CONSTCOND */ 1) {
566 					for (; s2 != base; s2--) {
567 						if (s2->type == SDQUOTE) {
568 							statep->ls_bool = true;
569 							break;
570 						}
571 					}
572 					if (s2 != base)
573 						break;
574 					if (!(s2 = s2->ls_base))
575 						break;
576 					base = s2-- - STATE_BSIZE;
577 				}
578 				break;
579 			case QCHAR:
580 				if (cf & LQCHAR) {
581 					*wp++ = QCHAR;
582 					*wp++ = getsc();
583 					break;
584 				}
585 				/* FALLTHROUGH */
586 			default:
587  store_char:
588 				*wp++ = CHAR;
589 				*wp++ = c;
590 			}
591 			break;
592 
593 		case SEQUOTE:
594 			if (c == '\'') {
595 				POP_STATE();
596 				*wp++ = CQUOTE;
597 				ignore_backslash_newline--;
598 			} else if (c == '\\') {
599 				if ((c2 = unbksl(true, s_get, s_put)) == -1)
600 					c2 = s_get();
601 				if (c2 == 0)
602 					statep->ls_bool = true;
603 				if (!statep->ls_bool) {
604 					char ts[4];
605 
606 					if ((unsigned int)c2 < 0x100) {
607 						*wp++ = QCHAR;
608 						*wp++ = c2;
609 					} else {
610 						cz = utf_wctomb(ts, c2 - 0x100);
611 						ts[cz] = 0;
612 						for (cz = 0; ts[cz]; ++cz) {
613 							*wp++ = QCHAR;
614 							*wp++ = ts[cz];
615 						}
616 					}
617 				}
618 			} else if (!statep->ls_bool) {
619 				*wp++ = QCHAR;
620 				*wp++ = c;
621 			}
622 			break;
623 
624 		case SSQUOTE:
625 			if (c == '\'') {
626 				POP_STATE();
627 				if ((cf & HEREDOC) || state == SQBRACE)
628 					goto store_char;
629 				*wp++ = CQUOTE;
630 				ignore_backslash_newline--;
631 			} else {
632 				*wp++ = QCHAR;
633 				*wp++ = c;
634 			}
635 			break;
636 
637 		case SDQUOTE:
638 			if (c == '"') {
639 				POP_STATE();
640 				*wp++ = CQUOTE;
641 			} else
642 				goto Subst;
643 			break;
644 
645 		/* $(( ... )) */
646 		case SASPAREN:
647 			if (c == '(')
648 				statep->nparen++;
649 			else if (c == ')') {
650 				statep->nparen--;
651 				if (statep->nparen == 1) {
652 					/* end of EXPRSUB */
653 					POP_SRETRACE();
654 
655 					if ((c2 = getsc()) == /*(*/ ')') {
656 						cz = strlen(sp) - 2;
657 						XcheckN(ws, wp, cz);
658 						memcpy(wp, sp + 1, cz);
659 						wp += cz;
660 						afree(sp, ATEMP);
661 						*wp++ = '\0';
662 						break;
663 					} else {
664 						Source *s;
665 
666 						ungetsc(c2);
667 						/*
668 						 * mismatched parenthesis -
669 						 * assume we were really
670 						 * parsing a $(...) expression
671 						 */
672 						--wp;
673 						s = pushs(SREREAD,
674 						    source->areap);
675 						s->start = s->str =
676 						    s->u.freeme = sp;
677 						s->next = source;
678 						source = s;
679 						goto subst_command;
680 					}
681 				}
682 			}
683 			/* reuse existing state machine */
684 			goto Sbase2;
685 
686 		case SQBRACE:
687 			if (c == '\\') {
688 				/*
689 				 * perform POSIX "quote removal" if the back-
690 				 * slash is "special", i.e. same cases as the
691 				 * {case '\\':} in Subst: plus closing brace;
692 				 * in mksh code "quote removal" on '\c' means
693 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
694 				 * emitted (in heredocquote:)
695 				 */
696 				if ((c = getsc()) == '"' || c == '\\' ||
697 				    c == '$' || c == '`' || c == /*{*/'}')
698 					goto store_qchar;
699 				goto heredocquote;
700 			}
701 			goto common_SQBRACE;
702 
703 		case SBRACE:
704 			if (c == '\'')
705 				goto open_ssquote_unless_heredoc;
706 			else if (c == '\\')
707 				goto getsc_qchar;
708  common_SQBRACE:
709 			if (c == '"')
710 				goto open_sdquote;
711 			else if (c == '$')
712 				goto subst_dollar_ex;
713 			else if (c == '`')
714 				goto subst_gravis;
715 			else if (c != /*{*/ '}')
716 				goto store_char;
717 			POP_STATE();
718 			*wp++ = CSUBST;
719 			*wp++ = /*{*/ '}';
720 			break;
721 
722 		/* Same as SBASE, except (,|,) treated specially */
723 		case STBRACEKORN:
724 			if (c == '|')
725 				*wp++ = SPAT;
726 			else if (c == '(') {
727 				*wp++ = OPAT;
728 				/* simile for @ */
729 				*wp++ = ' ';
730 				PUSH_STATE(SPATTERN);
731 			} else /* FALLTHROUGH */
732 		case STBRACEBOURNE:
733 			  if (c == /*{*/ '}') {
734 				POP_STATE();
735 				*wp++ = CSUBST;
736 				*wp++ = /*{*/ '}';
737 			} else
738 				goto Sbase1;
739 			break;
740 
741 		case SBQUOTE:
742 			if (c == '`') {
743 				*wp++ = 0;
744 				POP_STATE();
745 			} else if (c == '\\') {
746 				switch (c = getsc()) {
747 				case 0:
748 					/* trailing \ is lost */
749 					break;
750 				case '\\':
751 				case '$': case '`':
752 					*wp++ = c;
753 					break;
754 				case '"':
755 					if (statep->ls_bool) {
756 						*wp++ = c;
757 						break;
758 					}
759 					/* FALLTHROUGH */
760 				default:
761 					*wp++ = '\\';
762 					*wp++ = c;
763 					break;
764 				}
765 			} else
766 				*wp++ = c;
767 			break;
768 
769 		/* ONEWORD */
770 		case SWORD:
771 			goto Subst;
772 
773 		/* LETEXPR: (( ... )) */
774 		case SLETPAREN:
775 			if (c == /*(*/ ')') {
776 				if (statep->nparen > 0)
777 					--statep->nparen;
778 				else if ((c2 = getsc()) == /*(*/ ')') {
779 					c = 0;
780 					*wp++ = CQUOTE;
781 					goto Done;
782 				} else {
783 					Source *s;
784 
785 					ungetsc(c2);
786 					/*
787 					 * mismatched parenthesis -
788 					 * assume we were really
789 					 * parsing a (...) expression
790 					 */
791 					*wp = EOS;
792 					sp = Xstring(ws, wp);
793 					dp = wdstrip(sp, WDS_KEEPQ);
794 					s = pushs(SREREAD, source->areap);
795 					s->start = s->str = s->u.freeme = dp;
796 					s->next = source;
797 					source = s;
798 					return ('('/*)*/);
799 				}
800 			} else if (c == '(')
801 				/*
802 				 * parentheses inside quotes and
803 				 * backslashes are lost, but AT&T ksh
804 				 * doesn't count them either
805 				 */
806 				++statep->nparen;
807 			goto Sbase2;
808 
809 		/* <<, <<-, <<< delimiter */
810 		case SHEREDELIM:
811 			/*
812 			 * here delimiters need a special case since
813 			 * $ and `...` are not to be treated specially
814 			 */
815 			switch (c) {
816 			case '\\':
817 				if ((c = getsc())) {
818 					/* trailing \ is lost */
819 					*wp++ = QCHAR;
820 					*wp++ = c;
821 				}
822 				break;
823 			case '\'':
824 				goto open_ssquote_unless_heredoc;
825 			case '$':
826 				if ((c2 = getsc()) == '\'') {
827  open_sequote:
828 					*wp++ = OQUOTE;
829 					ignore_backslash_newline++;
830 					PUSH_STATE(SEQUOTE);
831 					statep->ls_bool = false;
832 					break;
833 				} else if (c2 == '"') {
834 					/* FALLTHROUGH */
835 			case '"':
836 					PUSH_SRETRACE(SHEREDQUOTE);
837 					break;
838 				}
839 				ungetsc(c2);
840 				/* FALLTHROUGH */
841 			default:
842 				*wp++ = CHAR;
843 				*wp++ = c;
844 			}
845 			break;
846 
847 		/* " in <<, <<-, <<< delimiter */
848 		case SHEREDQUOTE:
849 			if (c != '"')
850 				goto Subst;
851 			POP_SRETRACE();
852 			dp = strnul(sp) - 1;
853 			/* remove the trailing double quote */
854 			*dp = '\0';
855 			/* store the quoted string */
856 			*wp++ = OQUOTE;
857 			XcheckN(ws, wp, (dp - sp) * 2);
858 			dp = sp;
859 			while ((c = *dp++)) {
860 				if (c == '\\') {
861 					switch ((c = *dp++)) {
862 					case '\\':
863 					case '"':
864 					case '$':
865 					case '`':
866 						break;
867 					default:
868 						*wp++ = CHAR;
869 						*wp++ = '\\';
870 						break;
871 					}
872 				}
873 				*wp++ = CHAR;
874 				*wp++ = c;
875 			}
876 			afree(sp, ATEMP);
877 			*wp++ = CQUOTE;
878 			state = statep->type = SHEREDELIM;
879 			break;
880 
881 		/* in *(...|...) pattern (*+?@!) */
882 		case SPATTERN:
883 			if (c == /*(*/ ')') {
884 				*wp++ = CPAT;
885 				POP_STATE();
886 			} else if (c == '|') {
887 				*wp++ = SPAT;
888 			} else if (c == '(') {
889 				*wp++ = OPAT;
890 				/* simile for @ */
891 				*wp++ = ' ';
892 				PUSH_STATE(SPATTERN);
893 			} else
894 				goto Sbase1;
895 			break;
896 		}
897 	}
898  Done:
899 	Xcheck(ws, wp);
900 	if (statep != &states[1])
901 		/* XXX figure out what is missing */
902 		yyerror("no closing quote\n");
903 
904 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
905 	if (state == SHEREDELIM)
906 		state = SBASE;
907 
908 	dp = Xstring(ws, wp);
909 	if (state == SBASE && (
910 #ifndef MKSH_LEGACY_MODE
911 	    (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
912 #endif
913 	    c == '<' || c == '>')) {
914 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
915 
916 		if (Xlength(ws, wp) == 0)
917 			iop->unit = c == '<' ? 0 : 1;
918 		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
919 			if (dp[c2] != CHAR)
920 				goto no_iop;
921 			if (!ksh_isdigit(dp[c2 + 1]))
922 				goto no_iop;
923 			iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
924 			if (iop->unit >= FDBASE)
925 				goto no_iop;
926 		}
927 
928 		if (c == '&') {
929 			if ((c2 = getsc()) != '>') {
930 				ungetsc(c2);
931 				goto no_iop;
932 			}
933 			c = c2;
934 			iop->ioflag = IOBASH;
935 		} else
936 			iop->ioflag = 0;
937 
938 		c2 = getsc();
939 		/* <<, >>, <> are ok, >< is not */
940 		if (c == c2 || (c == '<' && c2 == '>')) {
941 			iop->ioflag |= c == c2 ?
942 			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
943 			if (iop->ioflag == IOHERE) {
944 				if ((c2 = getsc()) == '-') {
945 					iop->ioflag |= IOSKIP;
946 					c2 = getsc();
947 				} else if (c2 == '<')
948 					iop->ioflag |= IOHERESTR;
949 				ungetsc(c2);
950 				if (c2 == '\n')
951 					iop->ioflag |= IONDELIM;
952 			}
953 		} else if (c2 == '&')
954 			iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
955 		else {
956 			iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
957 			if (c == '>' && c2 == '|')
958 				iop->ioflag |= IOCLOB;
959 			else
960 				ungetsc(c2);
961 		}
962 
963 		iop->name = NULL;
964 		iop->delim = NULL;
965 		iop->heredoc = NULL;
966 		/* free word */
967 		Xfree(ws, wp);
968 		yylval.iop = iop;
969 		return (REDIR);
970  no_iop:
971 		afree(iop, ATEMP);
972 	}
973 
974 	if (wp == dp && state == SBASE) {
975 		/* free word */
976 		Xfree(ws, wp);
977 		/* no word, process LEX1 character */
978 		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
979 			if ((c2 = getsc()) == c)
980 				c = (c == ';') ? BREAK :
981 				    (c == '|') ? LOGOR :
982 				    (c == '&') ? LOGAND :
983 				    /* c == '(' ) */ MDPAREN;
984 			else if (c == '|' && c2 == '&')
985 				c = COPROC;
986 			else if (c == ';' && c2 == '|')
987 				c = BRKEV;
988 			else if (c == ';' && c2 == '&')
989 				c = BRKFT;
990 			else
991 				ungetsc(c2);
992 #ifndef MKSH_SMALL
993 			if (c == BREAK) {
994 				if ((c2 = getsc()) == '&')
995 					c = BRKEV;
996 				else
997 					ungetsc(c2);
998 			}
999 #endif
1000 		} else if (c == '\n') {
1001 			gethere(false);
1002 			if (cf & CONTIN)
1003 				goto Again;
1004 		} else if (c == '\0')
1005 			/* need here strings at EOF */
1006 			gethere(true);
1007 		return (c);
1008 	}
1009 
1010 	/* terminate word */
1011 	*wp++ = EOS;
1012 	yylval.cp = Xclose(ws, wp);
1013 	if (state == SWORD || state == SLETPAREN
1014 	    /* XXX ONEWORD? */)
1015 		return (LWORD);
1016 
1017 	/* unget terminator */
1018 	ungetsc(c);
1019 
1020 	/*
1021 	 * note: the alias-vs-function code below depends on several
1022 	 * interna: starting from here, source->str is not modified;
1023 	 * the way getsc() and ungetsc() operate; etc.
1024 	 */
1025 
1026 	/* copy word to unprefixed string ident */
1027 	sp = yylval.cp;
1028 	dp = ident;
1029 	if ((cf & HEREDELIM) && (sp[1] == '<')) {
1030  herestringloop:
1031 		switch ((c = *sp++)) {
1032 		case CHAR:
1033 			++sp;
1034 			/* FALLTHROUGH */
1035 		case OQUOTE:
1036 		case CQUOTE:
1037 			goto herestringloop;
1038 		default:
1039 			break;
1040 		}
1041 		/* dummy value */
1042 		*dp++ = 'x';
1043 	} else
1044 		while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1045 			*dp++ = *sp++;
1046 	if (c != EOS)
1047 		/* word is not unquoted */
1048 		dp = ident;
1049 	/* make sure the ident array stays NUL padded */
1050 	memset(dp, 0, (ident + IDENT) - dp + 1);
1051 
1052 	if (!(cf & (KEYWORD | ALIAS)))
1053 		return (LWORD);
1054 
1055 	if (*ident != '\0') {
1056 		struct tbl *p;
1057 		uint32_t h = hash(ident);
1058 
1059 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1060 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
1061 		    p->val.i == /*{*/ '}')) {
1062 			afree(yylval.cp, ATEMP);
1063 			return (p->val.i);
1064 		}
1065 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1066 		    (p->flag & ISSET)) {
1067 			/*
1068 			 * this still points to the same character as the
1069 			 * ungetsc'd terminator from above
1070 			 */
1071 			const char *cp = source->str;
1072 
1073 			/* prefer POSIX but not Korn functions over aliases */
1074 			while (*cp == ' ' || *cp == '\t')
1075 				/*
1076 				 * this is like getsc() without skipping
1077 				 * over Source boundaries (including not
1078 				 * parsing ungetsc'd characters that got
1079 				 * pushed into an SREREAD) which is what
1080 				 * we want here anyway: find out whether
1081 				 * the alias name is followed by a POSIX
1082 				 * function definition
1083 				 */
1084 				++cp;
1085 			/* prefer functions over aliases */
1086 			if (cp[0] != '(' || cp[1] != ')') {
1087 				Source *s = source;
1088 
1089 				while (s && (s->flags & SF_HASALIAS))
1090 					if (s->u.tblp == p)
1091 						return (LWORD);
1092 					else
1093 						s = s->next;
1094 				/* push alias expansion */
1095 				s = pushs(SALIAS, source->areap);
1096 				s->start = s->str = p->val.s;
1097 				s->u.tblp = p;
1098 				s->flags |= SF_HASALIAS;
1099 				s->next = source;
1100 				if (source->type == SEOF) {
1101 					/* prevent infinite recursion at EOS */
1102 					source->u.tblp = p;
1103 					source->flags |= SF_HASALIAS;
1104 				}
1105 				source = s;
1106 				afree(yylval.cp, ATEMP);
1107 				goto Again;
1108 			}
1109 		}
1110 	} else if (cf & ALIAS) {
1111 		/* retain typeset et al. even when quoted */
1112 		if (assign_command((dp = wdstrip(yylval.cp, 0))))
1113 			strlcpy(ident, dp, sizeof(ident));
1114 		afree(dp, ATEMP);
1115 	}
1116 
1117 	return (LWORD);
1118 }
1119 
1120 static void
gethere(bool iseof)1121 gethere(bool iseof)
1122 {
1123 	struct ioword **p;
1124 
1125 	for (p = heres; p < herep; p++)
1126 		if (iseof && !((*p)->ioflag & IOHERESTR))
1127 			/* only here strings at EOF */
1128 			return;
1129 		else
1130 			readhere(*p);
1131 	herep = heres;
1132 }
1133 
1134 /*
1135  * read "<<word" text into temp file
1136  */
1137 
1138 static void
readhere(struct ioword * iop)1139 readhere(struct ioword *iop)
1140 {
1141 	int c;
1142 	const char *eof, *eofp;
1143 	XString xs;
1144 	char *xp;
1145 	int xpos;
1146 
1147 	if (iop->ioflag & IOHERESTR) {
1148 		/* process the here string */
1149 		iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1150 		xpos = strlen(xp) - 1;
1151 		memmove(xp, xp + 1, xpos);
1152 		xp[xpos] = '\n';
1153 		return;
1154 	}
1155 
1156 	eof = iop->ioflag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1157 
1158 	if (!(iop->ioflag & IOEVAL))
1159 		ignore_backslash_newline++;
1160 
1161 	Xinit(xs, xp, 256, ATEMP);
1162 
1163  heredoc_read_line:
1164 	/* beginning of line */
1165 	eofp = eof;
1166 	xpos = Xsavepos(xs, xp);
1167 	if (iop->ioflag & IOSKIP) {
1168 		/* skip over leading tabs */
1169 		while ((c = getsc()) == '\t')
1170 			;	/* nothing */
1171 		goto heredoc_parse_char;
1172 	}
1173  heredoc_read_char:
1174 	c = getsc();
1175  heredoc_parse_char:
1176 	/* compare with here document marker */
1177 	if (!*eofp) {
1178 		/* end of here document marker, what to do? */
1179 		switch (c) {
1180 		case /*(*/ ')':
1181 			if (!subshell_nesting_type)
1182 				/*-
1183 				 * not allowed outside $(...) or (...)
1184 				 * => mismatch
1185 				 */
1186 				break;
1187 			/* allow $(...) or (...) to close here */
1188 			ungetsc(/*(*/ ')');
1189 			/* FALLTHROUGH */
1190 		case 0:
1191 			/*
1192 			 * Allow EOF here to commands without trailing
1193 			 * newlines (mksh -c '...') will work as well.
1194 			 */
1195 		case '\n':
1196 			/* Newline terminates here document marker */
1197 			goto heredoc_found_terminator;
1198 		}
1199 	} else if (c == *eofp++)
1200 		/* store; then read and compare next character */
1201 		goto heredoc_store_and_loop;
1202 	/* nope, mismatch; read until end of line */
1203 	while (c != '\n') {
1204 		if (!c)
1205 			/* oops, reached EOF */
1206 			yyerror("%s '%s' unclosed\n", "here document", eof);
1207 		/* store character */
1208 		Xcheck(xs, xp);
1209 		Xput(xs, xp, c);
1210 		/* read next character */
1211 		c = getsc();
1212 	}
1213 	/* we read a newline as last character */
1214  heredoc_store_and_loop:
1215 	/* store character */
1216 	Xcheck(xs, xp);
1217 	Xput(xs, xp, c);
1218 	if (c == '\n')
1219 		goto heredoc_read_line;
1220 	goto heredoc_read_char;
1221 
1222  heredoc_found_terminator:
1223 	/* jump back to saved beginning of line */
1224 	xp = Xrestpos(xs, xp, xpos);
1225 	/* terminate, close and store */
1226 	Xput(xs, xp, '\0');
1227 	iop->heredoc = Xclose(xs, xp);
1228 
1229 	if (!(iop->ioflag & IOEVAL))
1230 		ignore_backslash_newline--;
1231 }
1232 
1233 void
yyerror(const char * fmt,...)1234 yyerror(const char *fmt, ...)
1235 {
1236 	va_list va;
1237 
1238 	/* pop aliases and re-reads */
1239 	while (source->type == SALIAS || source->type == SREREAD)
1240 		source = source->next;
1241 	/* zap pending input */
1242 	source->str = null;
1243 
1244 	error_prefix(true);
1245 	va_start(va, fmt);
1246 	shf_vfprintf(shl_out, fmt, va);
1247 	va_end(va);
1248 	errorfz();
1249 }
1250 
1251 /*
1252  * input for yylex with alias expansion
1253  */
1254 
1255 Source *
pushs(int type,Area * areap)1256 pushs(int type, Area *areap)
1257 {
1258 	Source *s;
1259 
1260 	s = alloc(sizeof(Source), areap);
1261 	memset(s, 0, sizeof(Source));
1262 	s->type = type;
1263 	s->str = null;
1264 	s->areap = areap;
1265 	if (type == SFILE || type == SSTDIN)
1266 		XinitN(s->xs, 256, s->areap);
1267 	return (s);
1268 }
1269 
1270 static int
getsc_uu(void)1271 getsc_uu(void)
1272 {
1273 	Source *s = source;
1274 	int c;
1275 
1276 	while ((c = *s->str++) == 0) {
1277 		/* return 0 for EOF by default */
1278 		s->str = NULL;
1279 		switch (s->type) {
1280 		case SEOF:
1281 			s->str = null;
1282 			return (0);
1283 
1284 		case SSTDIN:
1285 		case SFILE:
1286 			getsc_line(s);
1287 			break;
1288 
1289 		case SWSTR:
1290 			break;
1291 
1292 		case SSTRING:
1293 		case SSTRINGCMDLINE:
1294 			break;
1295 
1296 		case SWORDS:
1297 			s->start = s->str = *s->u.strv++;
1298 			s->type = SWORDSEP;
1299 			break;
1300 
1301 		case SWORDSEP:
1302 			if (*s->u.strv == NULL) {
1303 				s->start = s->str = "\n";
1304 				s->type = SEOF;
1305 			} else {
1306 				s->start = s->str = " ";
1307 				s->type = SWORDS;
1308 			}
1309 			break;
1310 
1311 		case SALIAS:
1312 			if (s->flags & SF_ALIASEND) {
1313 				/* pass on an unused SF_ALIAS flag */
1314 				source = s->next;
1315 				source->flags |= s->flags & SF_ALIAS;
1316 				s = source;
1317 			} else if (*s->u.tblp->val.s &&
1318 			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1319 				/* pop source stack */
1320 				source = s = s->next;
1321 				/*
1322 				 * Note that this alias ended with a
1323 				 * space, enabling alias expansion on
1324 				 * the following word.
1325 				 */
1326 				s->flags |= SF_ALIAS;
1327 			} else {
1328 				/*
1329 				 * At this point, we need to keep the current
1330 				 * alias in the source list so recursive
1331 				 * aliases can be detected and we also need to
1332 				 * return the next character. Do this by
1333 				 * temporarily popping the alias to get the
1334 				 * next character and then put it back in the
1335 				 * source list with the SF_ALIASEND flag set.
1336 				 */
1337 				/* pop source stack */
1338 				source = s->next;
1339 				source->flags |= s->flags & SF_ALIAS;
1340 				c = getsc_uu();
1341 				if (c) {
1342 					s->flags |= SF_ALIASEND;
1343 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1344 					s->start = s->str = s->ugbuf;
1345 					s->next = source;
1346 					source = s;
1347 				} else {
1348 					s = source;
1349 					/* avoid reading EOF twice */
1350 					s->str = NULL;
1351 					break;
1352 				}
1353 			}
1354 			continue;
1355 
1356 		case SREREAD:
1357 			if (s->start != s->ugbuf)
1358 				/* yuck */
1359 				afree(s->u.freeme, ATEMP);
1360 			source = s = s->next;
1361 			continue;
1362 		}
1363 		if (s->str == NULL) {
1364 			s->type = SEOF;
1365 			s->start = s->str = null;
1366 			return ('\0');
1367 		}
1368 		if (s->flags & SF_ECHO) {
1369 			shf_puts(s->str, shl_out);
1370 			shf_flush(shl_out);
1371 		}
1372 	}
1373 	return (c);
1374 }
1375 
1376 static void
getsc_line(Source * s)1377 getsc_line(Source *s)
1378 {
1379 	char *xp = Xstring(s->xs, xp), *cp;
1380 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1381 	bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1382 
1383 	/* Done here to ensure nothing odd happens when a timeout occurs */
1384 	XcheckN(s->xs, xp, LINE);
1385 	*xp = '\0';
1386 	s->start = s->str = xp;
1387 
1388 	if (have_tty && ksh_tmout) {
1389 		ksh_tmout_state = TMOUT_READING;
1390 		alarm(ksh_tmout);
1391 	}
1392 	if (interactive)
1393 		change_winsz();
1394 #ifndef MKSH_NO_CMDLINE_EDITING
1395 	if (have_tty && (
1396 #if !MKSH_S_NOVI
1397 	    Flag(FVI) ||
1398 #endif
1399 	    Flag(FEMACS) || Flag(FGMACS))) {
1400 		int nread;
1401 
1402 		nread = x_read(xp);
1403 		if (nread < 0)
1404 			/* read error */
1405 			nread = 0;
1406 		xp[nread] = '\0';
1407 		xp += nread;
1408 	} else
1409 #endif
1410 	  {
1411 		if (interactive)
1412 			pprompt(prompt, 0);
1413 		else
1414 			s->line++;
1415 
1416 		while (/* CONSTCOND */ 1) {
1417 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1418 
1419 			if (!p && shf_error(s->u.shf) &&
1420 			    shf_errno(s->u.shf) == EINTR) {
1421 				shf_clearerr(s->u.shf);
1422 				if (trap)
1423 					runtraps(0);
1424 				continue;
1425 			}
1426 			if (!p || (xp = p, xp[-1] == '\n'))
1427 				break;
1428 			/* double buffer size */
1429 			/* move past NUL so doubling works... */
1430 			xp++;
1431 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1432 			/* ...and move back again */
1433 			xp--;
1434 		}
1435 		/*
1436 		 * flush any unwanted input so other programs/builtins
1437 		 * can read it. Not very optimal, but less error prone
1438 		 * than flushing else where, dealing with redirections,
1439 		 * etc.
1440 		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1441 		 */
1442 		if (s->type == SSTDIN)
1443 			shf_flush(s->u.shf);
1444 	}
1445 	/*
1446 	 * XXX: temporary kludge to restore source after a
1447 	 * trap may have been executed.
1448 	 */
1449 	source = s;
1450 	if (have_tty && ksh_tmout) {
1451 		ksh_tmout_state = TMOUT_EXECUTING;
1452 		alarm(0);
1453 	}
1454 	cp = Xstring(s->xs, xp);
1455 	rndpush(cp);
1456 	s->start = s->str = cp;
1457 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1458 	/* Note: if input is all nulls, this is not eof */
1459 	if (Xlength(s->xs, xp) == 0) {
1460 		/* EOF */
1461 		if (s->type == SFILE)
1462 			shf_fdclose(s->u.shf);
1463 		s->str = NULL;
1464 	} else if (interactive && *s->str &&
1465 	    (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1466 		histsave(&s->line, s->str, true, true);
1467 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1468 	} else if (interactive && cur_prompt == PS1) {
1469 		cp = Xstring(s->xs, xp);
1470 		while (*cp && ctype(*cp, C_IFSWS))
1471 			++cp;
1472 		if (!*cp)
1473 			histsync();
1474 #endif
1475 	}
1476 	if (interactive)
1477 		set_prompt(PS2, NULL);
1478 }
1479 
1480 void
set_prompt(int to,Source * s)1481 set_prompt(int to, Source *s)
1482 {
1483 	cur_prompt = (uint8_t)to;
1484 
1485 	switch (to) {
1486 	/* command */
1487 	case PS1:
1488 		/*
1489 		 * Substitute ! and !! here, before substitutions are done
1490 		 * so ! in expanded variables are not expanded.
1491 		 * NOTE: this is not what AT&T ksh does (it does it after
1492 		 * substitutions, POSIX doesn't say which is to be done.
1493 		 */
1494 		{
1495 			struct shf *shf;
1496 			char * volatile ps1;
1497 			Area *saved_atemp;
1498 
1499 			ps1 = str_val(global("PS1"));
1500 			shf = shf_sopen(NULL, strlen(ps1) * 2,
1501 			    SHF_WR | SHF_DYNAMIC, NULL);
1502 			while (*ps1)
1503 				if (*ps1 != '!' || *++ps1 == '!')
1504 					shf_putchar(*ps1++, shf);
1505 				else
1506 					shf_fprintf(shf, "%lu", s ?
1507 					    (unsigned long)s->line + 1 : 0UL);
1508 			ps1 = shf_sclose(shf);
1509 			saved_atemp = ATEMP;
1510 			newenv(E_ERRH);
1511 			if (kshsetjmp(e->jbuf)) {
1512 				prompt = safe_prompt;
1513 				/*
1514 				 * Don't print an error - assume it has already
1515 				 * been printed. Reason is we may have forked
1516 				 * to run a command and the child may be
1517 				 * unwinding its stack through this code as it
1518 				 * exits.
1519 				 */
1520 			} else {
1521 				char *cp = substitute(ps1, 0);
1522 				strdupx(prompt, cp, saved_atemp);
1523 			}
1524 			quitenv(NULL);
1525 		}
1526 		break;
1527 	/* command continuation */
1528 	case PS2:
1529 		prompt = str_val(global("PS2"));
1530 		break;
1531 	}
1532 }
1533 
1534 int
pprompt(const char * cp,int ntruncate)1535 pprompt(const char *cp, int ntruncate)
1536 {
1537 	char delimiter = 0;
1538 	bool doprint = (ntruncate != -1);
1539 	bool indelimit = false;
1540 	int columns = 0, lines = 0;
1541 
1542 	/*
1543 	 * Undocumented AT&T ksh feature:
1544 	 * If the second char in the prompt string is \r then the first
1545 	 * char is taken to be a non-printing delimiter and any chars
1546 	 * between two instances of the delimiter are not considered to
1547 	 * be part of the prompt length
1548 	 */
1549 	if (*cp && cp[1] == '\r') {
1550 		delimiter = *cp;
1551 		cp += 2;
1552 	}
1553 	for (; *cp; cp++) {
1554 		if (indelimit && *cp != delimiter)
1555 			;
1556 		else if (*cp == '\n' || *cp == '\r') {
1557 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1558 			columns = 0;
1559 		} else if (*cp == '\t') {
1560 			columns = (columns | 7) + 1;
1561 		} else if (*cp == '\b') {
1562 			if (columns > 0)
1563 				columns--;
1564 		} else if (*cp == delimiter)
1565 			indelimit = !indelimit;
1566 		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1567 			const char *cp2;
1568 			columns += utf_widthadj(cp, &cp2);
1569 			if (doprint && (indelimit ||
1570 			    (ntruncate < (x_cols * lines + columns))))
1571 				shf_write(cp, cp2 - cp, shl_out);
1572 			cp = cp2 - /* loop increment */ 1;
1573 			continue;
1574 		} else
1575 			columns++;
1576 		if (doprint && (*cp != delimiter) &&
1577 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
1578 			shf_putc(*cp, shl_out);
1579 	}
1580 	if (doprint)
1581 		shf_flush(shl_out);
1582 	return (x_cols * lines + columns);
1583 }
1584 
1585 /*
1586  * Read the variable part of a ${...} expression (i.e. up to but not
1587  * including the :[-+?=#%] or close-brace).
1588  */
1589 static char *
get_brace_var(XString * wsp,char * wp)1590 get_brace_var(XString *wsp, char *wp)
1591 {
1592 	char c;
1593 	enum parse_state {
1594 		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1595 		PS_NUMBER, PS_VAR1
1596 	} state = PS_INITIAL;
1597 
1598 	while (/* CONSTCOND */ 1) {
1599 		c = getsc();
1600 		/* State machine to figure out where the variable part ends. */
1601 		switch (state) {
1602 		case PS_INITIAL:
1603 			if (c == '#' || c == '!' || c == '%') {
1604 				state = PS_SAW_HASH;
1605 				break;
1606 			}
1607 			/* FALLTHROUGH */
1608 		case PS_SAW_HASH:
1609 			if (ksh_isalphx(c))
1610 				state = PS_IDENT;
1611 			else if (ksh_isdigit(c))
1612 				state = PS_NUMBER;
1613 			else if (c == '#') {
1614 				if (state == PS_SAW_HASH) {
1615 					char c2;
1616 
1617 					c2 = getsc();
1618 					ungetsc(c2);
1619 					if (c2 != /*{*/ '}') {
1620 						ungetsc(c);
1621 						goto out;
1622 					}
1623 				}
1624 				state = PS_VAR1;
1625 			} else if (ctype(c, C_VAR1))
1626 				state = PS_VAR1;
1627 			else
1628 				goto out;
1629 			break;
1630 		case PS_IDENT:
1631 			if (!ksh_isalnux(c)) {
1632 				if (c == '[') {
1633 					char *tmp, *p;
1634 
1635 					if (!arraysub(&tmp))
1636 						yyerror("missing ]\n");
1637 					*wp++ = c;
1638 					for (p = tmp; *p; ) {
1639 						Xcheck(*wsp, wp);
1640 						*wp++ = *p++;
1641 					}
1642 					afree(tmp, ATEMP);
1643 					/* the ] */
1644 					c = getsc();
1645 				}
1646 				goto out;
1647 			}
1648 			break;
1649 		case PS_NUMBER:
1650 			if (!ksh_isdigit(c))
1651 				goto out;
1652 			break;
1653 		case PS_VAR1:
1654 			goto out;
1655 		}
1656 		Xcheck(*wsp, wp);
1657 		*wp++ = c;
1658 	}
1659  out:
1660 	/* end of variable part */
1661 	*wp++ = '\0';
1662 	ungetsc(c);
1663 	return (wp);
1664 }
1665 
1666 /*
1667  * Save an array subscript - returns true if matching bracket found, false
1668  * if eof or newline was found.
1669  * (Returned string double null terminated)
1670  */
1671 static bool
arraysub(char ** strp)1672 arraysub(char **strp)
1673 {
1674 	XString ws;
1675 	char *wp, c;
1676 	/* we are just past the initial [ */
1677 	unsigned int depth = 1;
1678 
1679 	Xinit(ws, wp, 32, ATEMP);
1680 
1681 	do {
1682 		c = getsc();
1683 		Xcheck(ws, wp);
1684 		*wp++ = c;
1685 		if (c == '[')
1686 			depth++;
1687 		else if (c == ']')
1688 			depth--;
1689 	} while (depth > 0 && c && c != '\n');
1690 
1691 	*wp++ = '\0';
1692 	*strp = Xclose(ws, wp);
1693 
1694 	return (tobool(depth == 0));
1695 }
1696 
1697 /* Unget a char: handles case when we are already at the start of the buffer */
1698 static void
ungetsc(int c)1699 ungetsc(int c)
1700 {
1701 	struct sretrace_info *rp = retrace_info;
1702 
1703 	if (backslash_skip)
1704 		backslash_skip--;
1705 	/* Don't unget EOF... */
1706 	if (source->str == null && c == '\0')
1707 		return;
1708 	while (rp) {
1709 		if (Xlength(rp->xs, rp->xp))
1710 			rp->xp--;
1711 		rp = rp->next;
1712 	}
1713 	ungetsc_i(c);
1714 }
1715 static void
ungetsc_i(int c)1716 ungetsc_i(int c)
1717 {
1718 	if (source->str > source->start)
1719 		source->str--;
1720 	else {
1721 		Source *s;
1722 
1723 		s = pushs(SREREAD, source->areap);
1724 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1725 		s->start = s->str = s->ugbuf;
1726 		s->next = source;
1727 		source = s;
1728 	}
1729 }
1730 
1731 
1732 /* Called to get a char that isn't a \newline sequence. */
1733 static int
getsc_bn(void)1734 getsc_bn(void)
1735 {
1736 	int c, c2;
1737 
1738 	if (ignore_backslash_newline)
1739 		return (o_getsc_u());
1740 
1741 	if (backslash_skip == 1) {
1742 		backslash_skip = 2;
1743 		return (o_getsc_u());
1744 	}
1745 
1746 	backslash_skip = 0;
1747 
1748 	while (/* CONSTCOND */ 1) {
1749 		c = o_getsc_u();
1750 		if (c == '\\') {
1751 			if ((c2 = o_getsc_u()) == '\n')
1752 				/* ignore the \newline; get the next char... */
1753 				continue;
1754 			ungetsc_i(c2);
1755 			backslash_skip = 1;
1756 		}
1757 		return (c);
1758 	}
1759 }
1760 
1761 void
yyskiputf8bom(void)1762 yyskiputf8bom(void)
1763 {
1764 	int c;
1765 
1766 	if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1767 		ungetsc_i(c);
1768 		return;
1769 	}
1770 	if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1771 		ungetsc_i(c);
1772 		ungetsc_i(0xEF);
1773 		return;
1774 	}
1775 	if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1776 		ungetsc_i(c);
1777 		ungetsc_i(0xBB);
1778 		ungetsc_i(0xEF);
1779 		return;
1780 	}
1781 	UTFMODE |= 8;
1782 }
1783 
1784 static Lex_state *
push_state_i(State_info * si,Lex_state * old_end)1785 push_state_i(State_info *si, Lex_state *old_end)
1786 {
1787 	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1788 
1789 	news[0].ls_base = old_end;
1790 	si->base = &news[0];
1791 	si->end = &news[STATE_BSIZE];
1792 	return (&news[1]);
1793 }
1794 
1795 static Lex_state *
pop_state_i(State_info * si,Lex_state * old_end)1796 pop_state_i(State_info *si, Lex_state *old_end)
1797 {
1798 	Lex_state *old_base = si->base;
1799 
1800 	si->base = old_end->ls_base - STATE_BSIZE;
1801 	si->end = old_end->ls_base;
1802 
1803 	afree(old_base, ATEMP);
1804 
1805 	return (si->base + STATE_BSIZE - 1);
1806 }
1807 
1808 static int
s_get(void)1809 s_get(void)
1810 {
1811 	return (getsc());
1812 }
1813 
1814 static void
s_put(int c)1815 s_put(int c)
1816 {
1817 	ungetsc(c);
1818 }
1819