1 /* $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $ */
2
3 /*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 * 2011, 2012, 2013, 2014, 2015, 2016
6 * mirabilos <m@mirbsd.org>
7 *
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
13 *
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
22 */
23
24 #include "sh.h"
25
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.218 2016/01/20 21:34:12 tg Exp $");
27
28 /*
29 * states while lexing word
30 */
31 #define SBASE 0 /* outside any lexical constructs */
32 #define SWORD 1 /* implicit quoting for substitute() */
33 #define SLETPAREN 2 /* inside (( )), implicit quoting */
34 #define SSQUOTE 3 /* inside '' */
35 #define SDQUOTE 4 /* inside "" */
36 #define SEQUOTE 5 /* inside $'' */
37 #define SBRACE 6 /* inside ${} */
38 #define SQBRACE 7 /* inside "${}" */
39 #define SBQUOTE 8 /* inside `` */
40 #define SASPAREN 9 /* inside $(( )) */
41 #define SHEREDELIM 10 /* parsing << or <<- delimiter */
42 #define SHEREDQUOTE 11 /* parsing " in << or <<- delimiter */
43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM 13 /* like SBASE, looking for delimiter */
45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */
47 #define SINVALID 255 /* invalid state */
48
49 struct sretrace_info {
50 struct sretrace_info *next;
51 XString xs;
52 char *xp;
53 };
54
55 /*
56 * Structure to keep track of the lexing state and the various pieces of info
57 * needed for each particular state.
58 */
59 typedef struct lex_state {
60 union {
61 /* point to the next state block */
62 struct lex_state *base;
63 /* marks start of state output in output string */
64 size_t start;
65 /* SBQUOTE: true if in double quotes: "`...`" */
66 /* SEQUOTE: got NUL, ignore rest of string */
67 bool abool;
68 /* SADELIM information */
69 struct {
70 /* character to search for */
71 unsigned char delimiter;
72 /* max. number of delimiters */
73 unsigned char num;
74 } adelim;
75 } u;
76 /* count open parentheses */
77 short nparen;
78 /* type of this state */
79 uint8_t type;
80 } Lex_state;
81 #define ls_base u.base
82 #define ls_start u.start
83 #define ls_bool u.abool
84 #define ls_adelim u.adelim
85
86 typedef struct {
87 Lex_state *base;
88 Lex_state *end;
89 } State_info;
90
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int getsc_i(void);
98 static char *get_brace_var(XString *, char *);
99 static bool arraysub(char **);
100 static void gethere(void);
101 static Lex_state *push_state_i(State_info *, Lex_state *);
102 static Lex_state *pop_state_i(State_info *, Lex_state *);
103
104 static int backslash_skip;
105 static int ignore_backslash_newline;
106
107 /* optimised getsc_bn() */
108 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \
109 !backslash_skip ? *source->str++ : getsc_bn())
110 /* optimised getsc_uu() */
111 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu())
112
113 /* retrace helper */
114 #define o_getsc_r(carg) \
115 int cev = (carg); \
116 struct sretrace_info *rp = retrace_info; \
117 \
118 while (rp) { \
119 Xcheck(rp->xs, rp->xp); \
120 *rp->xp++ = cev; \
121 rp = rp->next; \
122 } \
123 \
124 return (cev);
125
126 /* callback */
127 static int
getsc_i(void)128 getsc_i(void)
129 {
130 o_getsc_r(o_getsc());
131 }
132
133 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
134 #define getsc getsc_i
135 #else
136 static int getsc_r(int);
137
138 static int
getsc_r(int c)139 getsc_r(int c)
140 {
141 o_getsc_r(c);
142 }
143
144 #define getsc() getsc_r(o_getsc())
145 #endif
146
147 #define STATE_BSIZE 8
148
149 #define PUSH_STATE(s) do { \
150 if (++statep == state_info.end) \
151 statep = push_state_i(&state_info, statep); \
152 state = statep->type = (s); \
153 } while (/* CONSTCOND */ 0)
154
155 #define POP_STATE() do { \
156 if (--statep == state_info.base) \
157 statep = pop_state_i(&state_info, statep); \
158 state = statep->type; \
159 } while (/* CONSTCOND */ 0)
160
161 #define PUSH_SRETRACE(s) do { \
162 struct sretrace_info *ri; \
163 \
164 PUSH_STATE(s); \
165 statep->ls_start = Xsavepos(ws, wp); \
166 ri = alloc(sizeof(struct sretrace_info), ATEMP); \
167 Xinit(ri->xs, ri->xp, 64, ATEMP); \
168 ri->next = retrace_info; \
169 retrace_info = ri; \
170 } while (/* CONSTCOND */ 0)
171
172 #define POP_SRETRACE() do { \
173 wp = Xrestpos(ws, wp, statep->ls_start); \
174 *retrace_info->xp = '\0'; \
175 sp = Xstring(retrace_info->xs, retrace_info->xp); \
176 dp = (void *)retrace_info; \
177 retrace_info = retrace_info->next; \
178 afree(dp, ATEMP); \
179 POP_STATE(); \
180 } while (/* CONSTCOND */ 0)
181
182 /**
183 * Lexical analyser
184 *
185 * tokens are not regular expressions, they are LL(1).
186 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
187 * hence the state stack. Note "$(...)" are now parsed recursively.
188 */
189
190 int
yylex(int cf)191 yylex(int cf)
192 {
193 Lex_state states[STATE_BSIZE], *statep, *s2, *base;
194 State_info state_info;
195 int c, c2, state;
196 size_t cz;
197 XString ws; /* expandable output word */
198 char *wp; /* output word pointer */
199 char *sp, *dp;
200
201 Again:
202 states[0].type = SINVALID;
203 states[0].ls_base = NULL;
204 statep = &states[1];
205 state_info.base = states;
206 state_info.end = &state_info.base[STATE_BSIZE];
207
208 Xinit(ws, wp, 64, ATEMP);
209
210 backslash_skip = 0;
211 ignore_backslash_newline = 0;
212
213 if (cf & ONEWORD)
214 state = SWORD;
215 else if (cf & LETEXPR) {
216 /* enclose arguments in (double) quotes */
217 *wp++ = OQUOTE;
218 state = SLETPAREN;
219 statep->nparen = 0;
220 } else {
221 /* normal lexing */
222 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
223 while ((c = getsc()) == ' ' || c == '\t')
224 ;
225 if (c == '#') {
226 ignore_backslash_newline++;
227 while ((c = getsc()) != '\0' && c != '\n')
228 ;
229 ignore_backslash_newline--;
230 }
231 ungetsc(c);
232 }
233 if (source->flags & SF_ALIAS) {
234 /* trailing ' ' in alias definition */
235 source->flags &= ~SF_ALIAS;
236 /* POSIX: trailing space only counts if parsing simple cmd */
237 if (!Flag(FPOSIX) || (cf & CMDWORD))
238 cf |= ALIAS;
239 }
240
241 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
242 statep->type = state;
243
244 /* collect non-special or quoted characters to form word */
245 while (!((c = getsc()) == 0 ||
246 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
247 if (state == SBASE &&
248 subshell_nesting_type == /*{*/ '}' &&
249 c == /*{*/ '}')
250 /* possibly end ${ :;} */
251 break;
252 Xcheck(ws, wp);
253 switch (state) {
254 case SADELIM:
255 if (c == '(')
256 statep->nparen++;
257 else if (c == ')')
258 statep->nparen--;
259 else if (statep->nparen == 0 && (c == /*{*/ '}' ||
260 c == (int)statep->ls_adelim.delimiter)) {
261 *wp++ = ADELIM;
262 *wp++ = c;
263 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
264 POP_STATE();
265 if (c == /*{*/ '}')
266 POP_STATE();
267 break;
268 }
269 /* FALLTHROUGH */
270 case SBASE:
271 if (c == '[' && (cf & CMDASN)) {
272 /* temporary */
273 *wp = EOS;
274 if (is_wdvarname(Xstring(ws, wp), false)) {
275 char *p, *tmp;
276
277 if (arraysub(&tmp)) {
278 *wp++ = CHAR;
279 *wp++ = c;
280 for (p = tmp; *p; ) {
281 Xcheck(ws, wp);
282 *wp++ = CHAR;
283 *wp++ = *p++;
284 }
285 afree(tmp, ATEMP);
286 break;
287 } else {
288 Source *s;
289
290 s = pushs(SREREAD,
291 source->areap);
292 s->start = s->str =
293 s->u.freeme = tmp;
294 s->next = source;
295 source = s;
296 }
297 }
298 *wp++ = CHAR;
299 *wp++ = c;
300 break;
301 }
302 /* FALLTHROUGH */
303 Sbase1: /* includes *(...|...) pattern (*+?@!) */
304 if (c == '*' || c == '@' || c == '+' || c == '?' ||
305 c == '!') {
306 c2 = getsc();
307 if (c2 == '(' /*)*/ ) {
308 *wp++ = OPAT;
309 *wp++ = c;
310 PUSH_STATE(SPATTERN);
311 break;
312 }
313 ungetsc(c2);
314 }
315 /* FALLTHROUGH */
316 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
317 switch (c) {
318 case '\\':
319 getsc_qchar:
320 if ((c = getsc())) {
321 /* trailing \ is lost */
322 *wp++ = QCHAR;
323 *wp++ = c;
324 }
325 break;
326 case '\'':
327 open_ssquote_unless_heredoc:
328 if ((cf & HEREDOC))
329 goto store_char;
330 *wp++ = OQUOTE;
331 ignore_backslash_newline++;
332 PUSH_STATE(SSQUOTE);
333 break;
334 case '"':
335 open_sdquote:
336 *wp++ = OQUOTE;
337 PUSH_STATE(SDQUOTE);
338 break;
339 case '$':
340 /*
341 * processing of dollar sign belongs into
342 * Subst, except for those which can open
343 * a string: $'…' and $"…"
344 */
345 subst_dollar_ex:
346 c = getsc();
347 switch (c) {
348 case '"':
349 goto open_sdquote;
350 case '\'':
351 goto open_sequote;
352 default:
353 goto SubstS;
354 }
355 default:
356 goto Subst;
357 }
358 break;
359
360 Subst:
361 switch (c) {
362 case '\\':
363 c = getsc();
364 switch (c) {
365 case '"':
366 if ((cf & HEREDOC))
367 goto heredocquote;
368 /* FALLTHROUGH */
369 case '\\':
370 case '$': case '`':
371 store_qchar:
372 *wp++ = QCHAR;
373 *wp++ = c;
374 break;
375 default:
376 heredocquote:
377 Xcheck(ws, wp);
378 if (c) {
379 /* trailing \ is lost */
380 *wp++ = CHAR;
381 *wp++ = '\\';
382 *wp++ = CHAR;
383 *wp++ = c;
384 }
385 break;
386 }
387 break;
388 case '$':
389 c = getsc();
390 SubstS:
391 if (c == '(') /*)*/ {
392 c = getsc();
393 if (c == '(') /*)*/ {
394 *wp++ = EXPRSUB;
395 PUSH_SRETRACE(SASPAREN);
396 statep->nparen = 2;
397 *retrace_info->xp++ = '(';
398 } else {
399 ungetsc(c);
400 subst_command:
401 c = COMSUB;
402 subst_command2:
403 sp = yyrecursive(c);
404 cz = strlen(sp) + 1;
405 XcheckN(ws, wp, cz);
406 *wp++ = c;
407 memcpy(wp, sp, cz);
408 wp += cz;
409 }
410 } else if (c == '{') /*}*/ {
411 if ((c = getsc()) == '|') {
412 /*
413 * non-subenvironment
414 * value substitution
415 */
416 c = VALSUB;
417 goto subst_command2;
418 } else if (ctype(c, C_IFSWS)) {
419 /*
420 * non-subenvironment
421 * "command" substitution
422 */
423 c = FUNSUB;
424 goto subst_command2;
425 }
426 ungetsc(c);
427 *wp++ = OSUBST;
428 *wp++ = '{'; /*}*/
429 wp = get_brace_var(&ws, wp);
430 c = getsc();
431 /* allow :# and :% (ksh88 compat) */
432 if (c == ':') {
433 *wp++ = CHAR;
434 *wp++ = c;
435 c = getsc();
436 if (c == ':') {
437 *wp++ = CHAR;
438 *wp++ = '0';
439 *wp++ = ADELIM;
440 *wp++ = ':';
441 PUSH_STATE(SBRACE);
442 PUSH_STATE(SADELIM);
443 statep->ls_adelim.delimiter = ':';
444 statep->ls_adelim.num = 1;
445 statep->nparen = 0;
446 break;
447 } else if (ksh_isdigit(c) ||
448 c == '('/*)*/ || c == ' ' ||
449 /*XXX what else? */
450 c == '$') {
451 /* substring subst. */
452 if (c != ' ') {
453 *wp++ = CHAR;
454 *wp++ = ' ';
455 }
456 ungetsc(c);
457 PUSH_STATE(SBRACE);
458 PUSH_STATE(SADELIM);
459 statep->ls_adelim.delimiter = ':';
460 statep->ls_adelim.num = 2;
461 statep->nparen = 0;
462 break;
463 }
464 } else if (c == '/') {
465 *wp++ = CHAR;
466 *wp++ = c;
467 if ((c = getsc()) == '/') {
468 *wp++ = ADELIM;
469 *wp++ = c;
470 } else
471 ungetsc(c);
472 PUSH_STATE(SBRACE);
473 PUSH_STATE(SADELIM);
474 statep->ls_adelim.delimiter = '/';
475 statep->ls_adelim.num = 1;
476 statep->nparen = 0;
477 break;
478 }
479 /*
480 * If this is a trim operation,
481 * treat (,|,) specially in STBRACE.
482 */
483 if (ctype(c, C_SUBOP2)) {
484 ungetsc(c);
485 if (Flag(FSH))
486 PUSH_STATE(STBRACEBOURNE);
487 else
488 PUSH_STATE(STBRACEKORN);
489 } else {
490 ungetsc(c);
491 if (state == SDQUOTE ||
492 state == SQBRACE)
493 PUSH_STATE(SQBRACE);
494 else
495 PUSH_STATE(SBRACE);
496 }
497 } else if (ksh_isalphx(c)) {
498 *wp++ = OSUBST;
499 *wp++ = 'X';
500 do {
501 Xcheck(ws, wp);
502 *wp++ = c;
503 c = getsc();
504 } while (ksh_isalnux(c));
505 *wp++ = '\0';
506 *wp++ = CSUBST;
507 *wp++ = 'X';
508 ungetsc(c);
509 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
510 Xcheck(ws, wp);
511 *wp++ = OSUBST;
512 *wp++ = 'X';
513 *wp++ = c;
514 *wp++ = '\0';
515 *wp++ = CSUBST;
516 *wp++ = 'X';
517 } else {
518 *wp++ = CHAR;
519 *wp++ = '$';
520 ungetsc(c);
521 }
522 break;
523 case '`':
524 subst_gravis:
525 PUSH_STATE(SBQUOTE);
526 *wp++ = COMSUB;
527 /*
528 * We need to know whether we are within double
529 * quotes, since most shells translate \" to "
530 * within "…`…\"…`…". This is not done in POSIX
531 * mode (§2.2.3 Double-Quotes: “The backquote
532 * shall retain its special meaning introducing
533 * the other form of command substitution (see
534 * Command Substitution). The portion of the
535 * quoted string from the initial backquote and
536 * the characters up to the next backquote that
537 * is not preceded by a <backslash>, having
538 * escape characters removed, defines that
539 * command whose output replaces "`...`" when
540 * the word is expanded.”; §2.6.3 Command
541 * Substitution: “Within the backquoted style
542 * of command substitution, <backslash> shall
543 * retain its literal meaning, except when
544 * followed by: '$', '`', or <backslash>. The
545 * search for the matching backquote shall be
546 * satisfied by the first unquoted non-escaped
547 * backquote; during this search, if a
548 * non-escaped backquote is encountered[…],
549 * undefined results occur.”).
550 */
551 statep->ls_bool = false;
552 #ifdef austingroupbugs1015_is_still_not_resolved
553 if (Flag(FPOSIX))
554 break;
555 #endif
556 s2 = statep;
557 base = state_info.base;
558 while (/* CONSTCOND */ 1) {
559 for (; s2 != base; s2--) {
560 if (s2->type == SDQUOTE) {
561 statep->ls_bool = true;
562 break;
563 }
564 }
565 if (s2 != base)
566 break;
567 if (!(s2 = s2->ls_base))
568 break;
569 base = s2-- - STATE_BSIZE;
570 }
571 break;
572 case QCHAR:
573 if (cf & LQCHAR) {
574 *wp++ = QCHAR;
575 *wp++ = getsc();
576 break;
577 }
578 /* FALLTHROUGH */
579 default:
580 store_char:
581 *wp++ = CHAR;
582 *wp++ = c;
583 }
584 break;
585
586 case SEQUOTE:
587 if (c == '\'') {
588 POP_STATE();
589 *wp++ = CQUOTE;
590 ignore_backslash_newline--;
591 } else if (c == '\\') {
592 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
593 c2 = getsc();
594 if (c2 == 0)
595 statep->ls_bool = true;
596 if (!statep->ls_bool) {
597 char ts[4];
598
599 if ((unsigned int)c2 < 0x100) {
600 *wp++ = QCHAR;
601 *wp++ = c2;
602 } else {
603 cz = utf_wctomb(ts, c2 - 0x100);
604 ts[cz] = 0;
605 cz = 0;
606 do {
607 *wp++ = QCHAR;
608 *wp++ = ts[cz];
609 } while (ts[++cz]);
610 }
611 }
612 } else if (!statep->ls_bool) {
613 *wp++ = QCHAR;
614 *wp++ = c;
615 }
616 break;
617
618 case SSQUOTE:
619 if (c == '\'') {
620 POP_STATE();
621 if ((cf & HEREDOC) || state == SQBRACE)
622 goto store_char;
623 *wp++ = CQUOTE;
624 ignore_backslash_newline--;
625 } else {
626 *wp++ = QCHAR;
627 *wp++ = c;
628 }
629 break;
630
631 case SDQUOTE:
632 if (c == '"') {
633 POP_STATE();
634 *wp++ = CQUOTE;
635 } else
636 goto Subst;
637 break;
638
639 /* $(( ... )) */
640 case SASPAREN:
641 if (c == '(')
642 statep->nparen++;
643 else if (c == ')') {
644 statep->nparen--;
645 if (statep->nparen == 1) {
646 /* end of EXPRSUB */
647 POP_SRETRACE();
648
649 if ((c2 = getsc()) == /*(*/ ')') {
650 cz = strlen(sp) - 2;
651 XcheckN(ws, wp, cz);
652 memcpy(wp, sp + 1, cz);
653 wp += cz;
654 afree(sp, ATEMP);
655 *wp++ = '\0';
656 break;
657 } else {
658 Source *s;
659
660 ungetsc(c2);
661 /*
662 * mismatched parenthesis -
663 * assume we were really
664 * parsing a $(...) expression
665 */
666 --wp;
667 s = pushs(SREREAD,
668 source->areap);
669 s->start = s->str =
670 s->u.freeme = sp;
671 s->next = source;
672 source = s;
673 goto subst_command;
674 }
675 }
676 }
677 /* reuse existing state machine */
678 goto Sbase2;
679
680 case SQBRACE:
681 if (c == '\\') {
682 /*
683 * perform POSIX "quote removal" if the back-
684 * slash is "special", i.e. same cases as the
685 * {case '\\':} in Subst: plus closing brace;
686 * in mksh code "quote removal" on '\c' means
687 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
688 * emitted (in heredocquote:)
689 */
690 if ((c = getsc()) == '"' || c == '\\' ||
691 c == '$' || c == '`' || c == /*{*/'}')
692 goto store_qchar;
693 goto heredocquote;
694 }
695 goto common_SQBRACE;
696
697 case SBRACE:
698 if (c == '\'')
699 goto open_ssquote_unless_heredoc;
700 else if (c == '\\')
701 goto getsc_qchar;
702 common_SQBRACE:
703 if (c == '"')
704 goto open_sdquote;
705 else if (c == '$')
706 goto subst_dollar_ex;
707 else if (c == '`')
708 goto subst_gravis;
709 else if (c != /*{*/ '}')
710 goto store_char;
711 POP_STATE();
712 *wp++ = CSUBST;
713 *wp++ = /*{*/ '}';
714 break;
715
716 /* Same as SBASE, except (,|,) treated specially */
717 case STBRACEKORN:
718 if (c == '|')
719 *wp++ = SPAT;
720 else if (c == '(') {
721 *wp++ = OPAT;
722 /* simile for @ */
723 *wp++ = ' ';
724 PUSH_STATE(SPATTERN);
725 } else /* FALLTHROUGH */
726 case STBRACEBOURNE:
727 if (c == /*{*/ '}') {
728 POP_STATE();
729 *wp++ = CSUBST;
730 *wp++ = /*{*/ '}';
731 } else
732 goto Sbase1;
733 break;
734
735 case SBQUOTE:
736 if (c == '`') {
737 *wp++ = 0;
738 POP_STATE();
739 } else if (c == '\\') {
740 switch (c = getsc()) {
741 case 0:
742 /* trailing \ is lost */
743 break;
744 case '$':
745 case '`':
746 case '\\':
747 *wp++ = c;
748 break;
749 case '"':
750 if (statep->ls_bool) {
751 *wp++ = c;
752 break;
753 }
754 /* FALLTHROUGH */
755 default:
756 *wp++ = '\\';
757 *wp++ = c;
758 break;
759 }
760 } else
761 *wp++ = c;
762 break;
763
764 /* ONEWORD */
765 case SWORD:
766 goto Subst;
767
768 /* LETEXPR: (( ... )) */
769 case SLETPAREN:
770 if (c == /*(*/ ')') {
771 if (statep->nparen > 0)
772 --statep->nparen;
773 else if ((c2 = getsc()) == /*(*/ ')') {
774 c = 0;
775 *wp++ = CQUOTE;
776 goto Done;
777 } else {
778 Source *s;
779
780 ungetsc(c2);
781 ungetsc(c);
782 /*
783 * mismatched parenthesis -
784 * assume we were really
785 * parsing a (...) expression
786 */
787 *wp = EOS;
788 sp = Xstring(ws, wp);
789 dp = wdstrip(sp + 1, WDS_TPUTS);
790 s = pushs(SREREAD, source->areap);
791 s->start = s->str = s->u.freeme = dp;
792 s->next = source;
793 source = s;
794 ungetsc('('/*)*/);
795 return ('('/*)*/);
796 }
797 } else if (c == '(')
798 /*
799 * parentheses inside quotes and
800 * backslashes are lost, but AT&T ksh
801 * doesn't count them either
802 */
803 ++statep->nparen;
804 goto Sbase2;
805
806 /* << or <<- delimiter */
807 case SHEREDELIM:
808 /*
809 * here delimiters need a special case since
810 * $ and `...` are not to be treated specially
811 */
812 switch (c) {
813 case '\\':
814 if ((c = getsc())) {
815 /* trailing \ is lost */
816 *wp++ = QCHAR;
817 *wp++ = c;
818 }
819 break;
820 case '\'':
821 goto open_ssquote_unless_heredoc;
822 case '$':
823 if ((c2 = getsc()) == '\'') {
824 open_sequote:
825 *wp++ = OQUOTE;
826 ignore_backslash_newline++;
827 PUSH_STATE(SEQUOTE);
828 statep->ls_bool = false;
829 break;
830 } else if (c2 == '"') {
831 /* FALLTHROUGH */
832 case '"':
833 PUSH_SRETRACE(SHEREDQUOTE);
834 break;
835 }
836 ungetsc(c2);
837 /* FALLTHROUGH */
838 default:
839 *wp++ = CHAR;
840 *wp++ = c;
841 }
842 break;
843
844 /* " in << or <<- delimiter */
845 case SHEREDQUOTE:
846 if (c != '"')
847 goto Subst;
848 POP_SRETRACE();
849 dp = strnul(sp) - 1;
850 /* remove the trailing double quote */
851 *dp = '\0';
852 /* store the quoted string */
853 *wp++ = OQUOTE;
854 XcheckN(ws, wp, (dp - sp) * 2);
855 dp = sp;
856 while ((c = *dp++)) {
857 if (c == '\\') {
858 switch ((c = *dp++)) {
859 case '\\':
860 case '"':
861 case '$':
862 case '`':
863 break;
864 default:
865 *wp++ = CHAR;
866 *wp++ = '\\';
867 break;
868 }
869 }
870 *wp++ = CHAR;
871 *wp++ = c;
872 }
873 afree(sp, ATEMP);
874 *wp++ = CQUOTE;
875 state = statep->type = SHEREDELIM;
876 break;
877
878 /* in *(...|...) pattern (*+?@!) */
879 case SPATTERN:
880 if (c == /*(*/ ')') {
881 *wp++ = CPAT;
882 POP_STATE();
883 } else if (c == '|') {
884 *wp++ = SPAT;
885 } else if (c == '(') {
886 *wp++ = OPAT;
887 /* simile for @ */
888 *wp++ = ' ';
889 PUSH_STATE(SPATTERN);
890 } else
891 goto Sbase1;
892 break;
893 }
894 }
895 Done:
896 Xcheck(ws, wp);
897 if (statep != &states[1])
898 /* XXX figure out what is missing */
899 yyerror("no closing quote\n");
900
901 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
902 if (state == SHEREDELIM)
903 state = SBASE;
904
905 dp = Xstring(ws, wp);
906 if (state == SBASE && (
907 #ifndef MKSH_LEGACY_MODE
908 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
909 #endif
910 c == '<' || c == '>')) {
911 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
912
913 if (Xlength(ws, wp) == 0)
914 iop->unit = c == '<' ? 0 : 1;
915 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
916 if (dp[c2] != CHAR)
917 goto no_iop;
918 if (!ksh_isdigit(dp[c2 + 1]))
919 goto no_iop;
920 iop->unit = iop->unit * 10 + ksh_numdig(dp[c2 + 1]);
921 if (iop->unit >= FDBASE)
922 goto no_iop;
923 }
924
925 if (c == '&') {
926 if ((c2 = getsc()) != '>') {
927 ungetsc(c2);
928 goto no_iop;
929 }
930 c = c2;
931 iop->ioflag = IOBASH;
932 } else
933 iop->ioflag = 0;
934
935 c2 = getsc();
936 /* <<, >>, <> are ok, >< is not */
937 if (c == c2 || (c == '<' && c2 == '>')) {
938 iop->ioflag |= c == c2 ?
939 (c == '>' ? IOCAT : IOHERE) : IORDWR;
940 if (iop->ioflag == IOHERE) {
941 if ((c2 = getsc()) == '-')
942 iop->ioflag |= IOSKIP;
943 else if (c2 == '<')
944 iop->ioflag |= IOHERESTR;
945 else
946 ungetsc(c2);
947 }
948 } else if (c2 == '&')
949 iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
950 else {
951 iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
952 if (c == '>' && c2 == '|')
953 iop->ioflag |= IOCLOB;
954 else
955 ungetsc(c2);
956 }
957
958 iop->ioname = NULL;
959 iop->delim = NULL;
960 iop->heredoc = NULL;
961 /* free word */
962 Xfree(ws, wp);
963 yylval.iop = iop;
964 return (REDIR);
965 no_iop:
966 afree(iop, ATEMP);
967 }
968
969 if (wp == dp && state == SBASE) {
970 /* free word */
971 Xfree(ws, wp);
972 /* no word, process LEX1 character */
973 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
974 if ((c2 = getsc()) == c)
975 c = (c == ';') ? BREAK :
976 (c == '|') ? LOGOR :
977 (c == '&') ? LOGAND :
978 /* c == '(' ) */ MDPAREN;
979 else if (c == '|' && c2 == '&')
980 c = COPROC;
981 else if (c == ';' && c2 == '|')
982 c = BRKEV;
983 else if (c == ';' && c2 == '&')
984 c = BRKFT;
985 else
986 ungetsc(c2);
987 #ifndef MKSH_SMALL
988 if (c == BREAK) {
989 if ((c2 = getsc()) == '&')
990 c = BRKEV;
991 else
992 ungetsc(c2);
993 }
994 #endif
995 } else if (c == '\n') {
996 if (cf & HEREDELIM)
997 ungetsc(c);
998 else {
999 gethere();
1000 if (cf & CONTIN)
1001 goto Again;
1002 }
1003 }
1004 return (c);
1005 }
1006
1007 /* terminate word */
1008 *wp++ = EOS;
1009 yylval.cp = Xclose(ws, wp);
1010 if (state == SWORD || state == SLETPAREN
1011 /* XXX ONEWORD? */)
1012 return (LWORD);
1013
1014 /* unget terminator */
1015 ungetsc(c);
1016
1017 /*
1018 * note: the alias-vs-function code below depends on several
1019 * interna: starting from here, source->str is not modified;
1020 * the way getsc() and ungetsc() operate; etc.
1021 */
1022
1023 /* copy word to unprefixed string ident */
1024 sp = yylval.cp;
1025 dp = ident;
1026 while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1027 *dp++ = *sp++;
1028 if (c != EOS)
1029 /* word is not unquoted */
1030 dp = ident;
1031 /* make sure the ident array stays NUL padded */
1032 memset(dp, 0, (ident + IDENT) - dp + 1);
1033
1034 if (!(cf & (KEYWORD | ALIAS)))
1035 return (LWORD);
1036
1037 if (*ident != '\0') {
1038 struct tbl *p;
1039 uint32_t h = hash(ident);
1040
1041 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1042 (!(cf & ESACONLY) || p->val.i == ESAC ||
1043 p->val.i == /*{*/ '}')) {
1044 afree(yylval.cp, ATEMP);
1045 return (p->val.i);
1046 }
1047 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1048 (p->flag & ISSET)) {
1049 /*
1050 * this still points to the same character as the
1051 * ungetsc'd terminator from above
1052 */
1053 const char *cp = source->str;
1054
1055 /* prefer POSIX but not Korn functions over aliases */
1056 while (*cp == ' ' || *cp == '\t')
1057 /*
1058 * this is like getsc() without skipping
1059 * over Source boundaries (including not
1060 * parsing ungetsc'd characters that got
1061 * pushed into an SREREAD) which is what
1062 * we want here anyway: find out whether
1063 * the alias name is followed by a POSIX
1064 * function definition
1065 */
1066 ++cp;
1067 /* prefer functions over aliases */
1068 if (cp[0] != '(' || cp[1] != ')') {
1069 Source *s = source;
1070
1071 while (s && (s->flags & SF_HASALIAS))
1072 if (s->u.tblp == p)
1073 return (LWORD);
1074 else
1075 s = s->next;
1076 /* push alias expansion */
1077 s = pushs(SALIAS, source->areap);
1078 s->start = s->str = p->val.s;
1079 s->u.tblp = p;
1080 s->flags |= SF_HASALIAS;
1081 s->next = source;
1082 if (source->type == SEOF) {
1083 /* prevent infinite recursion at EOS */
1084 source->u.tblp = p;
1085 source->flags |= SF_HASALIAS;
1086 }
1087 source = s;
1088 afree(yylval.cp, ATEMP);
1089 goto Again;
1090 }
1091 }
1092 } else if (cf & ALIAS) {
1093 /* retain typeset et al. even when quoted */
1094 if (assign_command((dp = wdstrip(yylval.cp, 0)), true))
1095 strlcpy(ident, dp, sizeof(ident));
1096 afree(dp, ATEMP);
1097 }
1098
1099 return (LWORD);
1100 }
1101
1102 static void
gethere(void)1103 gethere(void)
1104 {
1105 struct ioword **p;
1106
1107 for (p = heres; p < herep; p++)
1108 if (!((*p)->ioflag & IOHERESTR))
1109 readhere(*p);
1110 herep = heres;
1111 }
1112
1113 /*
1114 * read "<<word" text into temp file
1115 */
1116
1117 static void
readhere(struct ioword * iop)1118 readhere(struct ioword *iop)
1119 {
1120 int c;
1121 const char *eof, *eofp;
1122 XString xs;
1123 char *xp;
1124 size_t xpos;
1125
1126 eof = evalstr(iop->delim, 0);
1127
1128 if (!(iop->ioflag & IOEVAL))
1129 ignore_backslash_newline++;
1130
1131 Xinit(xs, xp, 256, ATEMP);
1132
1133 heredoc_read_line:
1134 /* beginning of line */
1135 eofp = eof;
1136 xpos = Xsavepos(xs, xp);
1137 if (iop->ioflag & IOSKIP) {
1138 /* skip over leading tabs */
1139 while ((c = getsc()) == '\t')
1140 ; /* nothing */
1141 goto heredoc_parse_char;
1142 }
1143 heredoc_read_char:
1144 c = getsc();
1145 heredoc_parse_char:
1146 /* compare with here document marker */
1147 if (!*eofp) {
1148 /* end of here document marker, what to do? */
1149 switch (c) {
1150 case /*(*/ ')':
1151 if (!subshell_nesting_type)
1152 /*-
1153 * not allowed outside $(...) or (...)
1154 * => mismatch
1155 */
1156 break;
1157 /* allow $(...) or (...) to close here */
1158 ungetsc(/*(*/ ')');
1159 /* FALLTHROUGH */
1160 case 0:
1161 /*
1162 * Allow EOF here to commands without trailing
1163 * newlines (mksh -c '...') will work as well.
1164 */
1165 case '\n':
1166 /* Newline terminates here document marker */
1167 goto heredoc_found_terminator;
1168 }
1169 } else if (c == *eofp++)
1170 /* store; then read and compare next character */
1171 goto heredoc_store_and_loop;
1172 /* nope, mismatch; read until end of line */
1173 while (c != '\n') {
1174 if (!c)
1175 /* oops, reached EOF */
1176 yyerror("%s '%s' unclosed\n", "here document", eof);
1177 /* store character */
1178 Xcheck(xs, xp);
1179 Xput(xs, xp, c);
1180 /* read next character */
1181 c = getsc();
1182 }
1183 /* we read a newline as last character */
1184 heredoc_store_and_loop:
1185 /* store character */
1186 Xcheck(xs, xp);
1187 Xput(xs, xp, c);
1188 if (c == '\n')
1189 goto heredoc_read_line;
1190 goto heredoc_read_char;
1191
1192 heredoc_found_terminator:
1193 /* jump back to saved beginning of line */
1194 xp = Xrestpos(xs, xp, xpos);
1195 /* terminate, close and store */
1196 Xput(xs, xp, '\0');
1197 iop->heredoc = Xclose(xs, xp);
1198
1199 if (!(iop->ioflag & IOEVAL))
1200 ignore_backslash_newline--;
1201 }
1202
1203 void
yyerror(const char * fmt,...)1204 yyerror(const char *fmt, ...)
1205 {
1206 va_list va;
1207
1208 /* pop aliases and re-reads */
1209 while (source->type == SALIAS || source->type == SREREAD)
1210 source = source->next;
1211 /* zap pending input */
1212 source->str = null;
1213
1214 error_prefix(true);
1215 va_start(va, fmt);
1216 shf_vfprintf(shl_out, fmt, va);
1217 va_end(va);
1218 errorfz();
1219 }
1220
1221 /*
1222 * input for yylex with alias expansion
1223 */
1224
1225 Source *
pushs(int type,Area * areap)1226 pushs(int type, Area *areap)
1227 {
1228 Source *s;
1229
1230 s = alloc(sizeof(Source), areap);
1231 memset(s, 0, sizeof(Source));
1232 s->type = type;
1233 s->str = null;
1234 s->areap = areap;
1235 if (type == SFILE || type == SSTDIN)
1236 XinitN(s->xs, 256, s->areap);
1237 return (s);
1238 }
1239
1240 static int
getsc_uu(void)1241 getsc_uu(void)
1242 {
1243 Source *s = source;
1244 int c;
1245
1246 while ((c = *s->str++) == 0) {
1247 /* return 0 for EOF by default */
1248 s->str = NULL;
1249 switch (s->type) {
1250 case SEOF:
1251 s->str = null;
1252 return (0);
1253
1254 case SSTDIN:
1255 case SFILE:
1256 getsc_line(s);
1257 break;
1258
1259 case SWSTR:
1260 break;
1261
1262 case SSTRING:
1263 case SSTRINGCMDLINE:
1264 break;
1265
1266 case SWORDS:
1267 s->start = s->str = *s->u.strv++;
1268 s->type = SWORDSEP;
1269 break;
1270
1271 case SWORDSEP:
1272 if (*s->u.strv == NULL) {
1273 s->start = s->str = "\n";
1274 s->type = SEOF;
1275 } else {
1276 s->start = s->str = " ";
1277 s->type = SWORDS;
1278 }
1279 break;
1280
1281 case SALIAS:
1282 if (s->flags & SF_ALIASEND) {
1283 /* pass on an unused SF_ALIAS flag */
1284 source = s->next;
1285 source->flags |= s->flags & SF_ALIAS;
1286 s = source;
1287 } else if (*s->u.tblp->val.s &&
1288 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1289 /* pop source stack */
1290 source = s = s->next;
1291 /*
1292 * Note that this alias ended with a
1293 * space, enabling alias expansion on
1294 * the following word.
1295 */
1296 s->flags |= SF_ALIAS;
1297 } else {
1298 /*
1299 * At this point, we need to keep the current
1300 * alias in the source list so recursive
1301 * aliases can be detected and we also need to
1302 * return the next character. Do this by
1303 * temporarily popping the alias to get the
1304 * next character and then put it back in the
1305 * source list with the SF_ALIASEND flag set.
1306 */
1307 /* pop source stack */
1308 source = s->next;
1309 source->flags |= s->flags & SF_ALIAS;
1310 c = getsc_uu();
1311 if (c) {
1312 s->flags |= SF_ALIASEND;
1313 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1314 s->start = s->str = s->ugbuf;
1315 s->next = source;
1316 source = s;
1317 } else {
1318 s = source;
1319 /* avoid reading EOF twice */
1320 s->str = NULL;
1321 break;
1322 }
1323 }
1324 continue;
1325
1326 case SREREAD:
1327 if (s->start != s->ugbuf)
1328 /* yuck */
1329 afree(s->u.freeme, ATEMP);
1330 source = s = s->next;
1331 continue;
1332 }
1333 if (s->str == NULL) {
1334 s->type = SEOF;
1335 s->start = s->str = null;
1336 return ('\0');
1337 }
1338 if (s->flags & SF_ECHO) {
1339 shf_puts(s->str, shl_out);
1340 shf_flush(shl_out);
1341 }
1342 }
1343 return (c);
1344 }
1345
1346 static void
getsc_line(Source * s)1347 getsc_line(Source *s)
1348 {
1349 char *xp = Xstring(s->xs, xp), *cp;
1350 bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1351 bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1352
1353 /* Done here to ensure nothing odd happens when a timeout occurs */
1354 XcheckN(s->xs, xp, LINE);
1355 *xp = '\0';
1356 s->start = s->str = xp;
1357
1358 if (have_tty && ksh_tmout) {
1359 ksh_tmout_state = TMOUT_READING;
1360 alarm(ksh_tmout);
1361 }
1362 if (interactive)
1363 change_winsz();
1364 #ifndef MKSH_NO_CMDLINE_EDITING
1365 if (have_tty && (
1366 #if !MKSH_S_NOVI
1367 Flag(FVI) ||
1368 #endif
1369 Flag(FEMACS) || Flag(FGMACS))) {
1370 int nread;
1371
1372 nread = x_read(xp);
1373 if (nread < 0)
1374 /* read error */
1375 nread = 0;
1376 xp[nread] = '\0';
1377 xp += nread;
1378 } else
1379 #endif
1380 {
1381 if (interactive)
1382 pprompt(prompt, 0);
1383 else
1384 s->line++;
1385
1386 while (/* CONSTCOND */ 1) {
1387 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1388
1389 if (!p && shf_error(s->u.shf) &&
1390 shf_errno(s->u.shf) == EINTR) {
1391 shf_clearerr(s->u.shf);
1392 if (trap)
1393 runtraps(0);
1394 continue;
1395 }
1396 if (!p || (xp = p, xp[-1] == '\n'))
1397 break;
1398 /* double buffer size */
1399 /* move past NUL so doubling works... */
1400 xp++;
1401 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1402 /* ...and move back again */
1403 xp--;
1404 }
1405 /*
1406 * flush any unwanted input so other programs/builtins
1407 * can read it. Not very optimal, but less error prone
1408 * than flushing else where, dealing with redirections,
1409 * etc.
1410 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1411 */
1412 if (s->type == SSTDIN)
1413 shf_flush(s->u.shf);
1414 }
1415 /*
1416 * XXX: temporary kludge to restore source after a
1417 * trap may have been executed.
1418 */
1419 source = s;
1420 if (have_tty && ksh_tmout) {
1421 ksh_tmout_state = TMOUT_EXECUTING;
1422 alarm(0);
1423 }
1424 cp = Xstring(s->xs, xp);
1425 rndpush(cp);
1426 s->start = s->str = cp;
1427 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1428 /* Note: if input is all nulls, this is not eof */
1429 if (Xlength(s->xs, xp) == 0) {
1430 /* EOF */
1431 if (s->type == SFILE)
1432 shf_fdclose(s->u.shf);
1433 s->str = NULL;
1434 } else if (interactive && *s->str) {
1435 if (cur_prompt != PS1)
1436 histsave(&s->line, s->str, HIST_APPEND, true);
1437 else if (!ctype(*s->str, C_IFS | C_IFSWS))
1438 histsave(&s->line, s->str, HIST_QUEUE, true);
1439 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1440 else
1441 goto check_for_sole_return;
1442 } else if (interactive && cur_prompt == PS1) {
1443 check_for_sole_return:
1444 cp = Xstring(s->xs, xp);
1445 while (*cp && ctype(*cp, C_IFSWS))
1446 ++cp;
1447 if (!*cp) {
1448 histsave(&s->line, NULL, HIST_FLUSH, true);
1449 histsync();
1450 }
1451 #endif
1452 }
1453 if (interactive)
1454 set_prompt(PS2, NULL);
1455 }
1456
1457 void
set_prompt(int to,Source * s)1458 set_prompt(int to, Source *s)
1459 {
1460 cur_prompt = (uint8_t)to;
1461
1462 switch (to) {
1463 /* command */
1464 case PS1:
1465 /*
1466 * Substitute ! and !! here, before substitutions are done
1467 * so ! in expanded variables are not expanded.
1468 * NOTE: this is not what AT&T ksh does (it does it after
1469 * substitutions, POSIX doesn't say which is to be done.
1470 */
1471 {
1472 struct shf *shf;
1473 char * volatile ps1;
1474 Area *saved_atemp;
1475 int saved_lineno;
1476
1477 ps1 = str_val(global("PS1"));
1478 shf = shf_sopen(NULL, strlen(ps1) * 2,
1479 SHF_WR | SHF_DYNAMIC, NULL);
1480 while (*ps1)
1481 if (*ps1 != '!' || *++ps1 == '!')
1482 shf_putchar(*ps1++, shf);
1483 else
1484 shf_fprintf(shf, "%lu", s ?
1485 (unsigned long)s->line + 1 : 0UL);
1486 ps1 = shf_sclose(shf);
1487 saved_lineno = current_lineno;
1488 if (s)
1489 current_lineno = s->line + 1;
1490 saved_atemp = ATEMP;
1491 newenv(E_ERRH);
1492 if (kshsetjmp(e->jbuf)) {
1493 prompt = safe_prompt;
1494 /*
1495 * Don't print an error - assume it has already
1496 * been printed. Reason is we may have forked
1497 * to run a command and the child may be
1498 * unwinding its stack through this code as it
1499 * exits.
1500 */
1501 } else {
1502 char *cp = substitute(ps1, 0);
1503 strdupx(prompt, cp, saved_atemp);
1504 }
1505 current_lineno = saved_lineno;
1506 quitenv(NULL);
1507 }
1508 break;
1509 /* command continuation */
1510 case PS2:
1511 prompt = str_val(global("PS2"));
1512 break;
1513 }
1514 }
1515
1516 int
pprompt(const char * cp,int ntruncate)1517 pprompt(const char *cp, int ntruncate)
1518 {
1519 char delimiter = 0;
1520 bool doprint = (ntruncate != -1);
1521 bool indelimit = false;
1522 int columns = 0, lines = 0;
1523
1524 /*
1525 * Undocumented AT&T ksh feature:
1526 * If the second char in the prompt string is \r then the first
1527 * char is taken to be a non-printing delimiter and any chars
1528 * between two instances of the delimiter are not considered to
1529 * be part of the prompt length
1530 */
1531 if (*cp && cp[1] == '\r') {
1532 delimiter = *cp;
1533 cp += 2;
1534 }
1535 for (; *cp; cp++) {
1536 if (indelimit && *cp != delimiter)
1537 ;
1538 else if (*cp == '\n' || *cp == '\r') {
1539 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1540 columns = 0;
1541 } else if (*cp == '\t') {
1542 columns = (columns | 7) + 1;
1543 } else if (*cp == '\b') {
1544 if (columns > 0)
1545 columns--;
1546 } else if (*cp == delimiter)
1547 indelimit = !indelimit;
1548 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1549 const char *cp2;
1550 columns += utf_widthadj(cp, &cp2);
1551 if (doprint && (indelimit ||
1552 (ntruncate < (x_cols * lines + columns))))
1553 shf_write(cp, cp2 - cp, shl_out);
1554 cp = cp2 - /* loop increment */ 1;
1555 continue;
1556 } else
1557 columns++;
1558 if (doprint && (*cp != delimiter) &&
1559 (indelimit || (ntruncate < (x_cols * lines + columns))))
1560 shf_putc(*cp, shl_out);
1561 }
1562 if (doprint)
1563 shf_flush(shl_out);
1564 return (x_cols * lines + columns);
1565 }
1566
1567 /*
1568 * Read the variable part of a ${...} expression (i.e. up to but not
1569 * including the :[-+?=#%] or close-brace).
1570 */
1571 static char *
get_brace_var(XString * wsp,char * wp)1572 get_brace_var(XString *wsp, char *wp)
1573 {
1574 char c;
1575 enum parse_state {
1576 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1577 PS_NUMBER, PS_VAR1
1578 } state = PS_INITIAL;
1579
1580 while (/* CONSTCOND */ 1) {
1581 c = getsc();
1582 /* State machine to figure out where the variable part ends. */
1583 switch (state) {
1584 case PS_INITIAL:
1585 if (c == '#' || c == '!' || c == '%') {
1586 state = PS_SAW_HASH;
1587 break;
1588 }
1589 /* FALLTHROUGH */
1590 case PS_SAW_HASH:
1591 if (ksh_isalphx(c))
1592 state = PS_IDENT;
1593 else if (ksh_isdigit(c))
1594 state = PS_NUMBER;
1595 else if (c == '#') {
1596 if (state == PS_SAW_HASH) {
1597 char c2;
1598
1599 c2 = getsc();
1600 ungetsc(c2);
1601 if (c2 != /*{*/ '}') {
1602 ungetsc(c);
1603 goto out;
1604 }
1605 }
1606 state = PS_VAR1;
1607 } else if (ctype(c, C_VAR1))
1608 state = PS_VAR1;
1609 else
1610 goto out;
1611 break;
1612 case PS_IDENT:
1613 if (!ksh_isalnux(c)) {
1614 if (c == '[') {
1615 char *tmp, *p;
1616
1617 if (!arraysub(&tmp))
1618 yyerror("missing ]\n");
1619 *wp++ = c;
1620 for (p = tmp; *p; ) {
1621 Xcheck(*wsp, wp);
1622 *wp++ = *p++;
1623 }
1624 afree(tmp, ATEMP);
1625 /* the ] */
1626 c = getsc();
1627 }
1628 goto out;
1629 }
1630 break;
1631 case PS_NUMBER:
1632 if (!ksh_isdigit(c))
1633 goto out;
1634 break;
1635 case PS_VAR1:
1636 goto out;
1637 }
1638 Xcheck(*wsp, wp);
1639 *wp++ = c;
1640 }
1641 out:
1642 /* end of variable part */
1643 *wp++ = '\0';
1644 ungetsc(c);
1645 return (wp);
1646 }
1647
1648 /*
1649 * Save an array subscript - returns true if matching bracket found, false
1650 * if eof or newline was found.
1651 * (Returned string double null terminated)
1652 */
1653 static bool
arraysub(char ** strp)1654 arraysub(char **strp)
1655 {
1656 XString ws;
1657 char *wp, c;
1658 /* we are just past the initial [ */
1659 unsigned int depth = 1;
1660
1661 Xinit(ws, wp, 32, ATEMP);
1662
1663 do {
1664 c = getsc();
1665 Xcheck(ws, wp);
1666 *wp++ = c;
1667 if (c == '[')
1668 depth++;
1669 else if (c == ']')
1670 depth--;
1671 } while (depth > 0 && c && c != '\n');
1672
1673 *wp++ = '\0';
1674 *strp = Xclose(ws, wp);
1675
1676 return (tobool(depth == 0));
1677 }
1678
1679 /* Unget a char: handles case when we are already at the start of the buffer */
1680 static void
ungetsc(int c)1681 ungetsc(int c)
1682 {
1683 struct sretrace_info *rp = retrace_info;
1684
1685 if (backslash_skip)
1686 backslash_skip--;
1687 /* Don't unget EOF... */
1688 if (source->str == null && c == '\0')
1689 return;
1690 while (rp) {
1691 if (Xlength(rp->xs, rp->xp))
1692 rp->xp--;
1693 rp = rp->next;
1694 }
1695 ungetsc_i(c);
1696 }
1697 static void
ungetsc_i(int c)1698 ungetsc_i(int c)
1699 {
1700 if (source->str > source->start)
1701 source->str--;
1702 else {
1703 Source *s;
1704
1705 s = pushs(SREREAD, source->areap);
1706 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1707 s->start = s->str = s->ugbuf;
1708 s->next = source;
1709 source = s;
1710 }
1711 }
1712
1713
1714 /* Called to get a char that isn't a \newline sequence. */
1715 static int
getsc_bn(void)1716 getsc_bn(void)
1717 {
1718 int c, c2;
1719
1720 if (ignore_backslash_newline)
1721 return (o_getsc_u());
1722
1723 if (backslash_skip == 1) {
1724 backslash_skip = 2;
1725 return (o_getsc_u());
1726 }
1727
1728 backslash_skip = 0;
1729
1730 while (/* CONSTCOND */ 1) {
1731 c = o_getsc_u();
1732 if (c == '\\') {
1733 if ((c2 = o_getsc_u()) == '\n')
1734 /* ignore the \newline; get the next char... */
1735 continue;
1736 ungetsc_i(c2);
1737 backslash_skip = 1;
1738 }
1739 return (c);
1740 }
1741 }
1742
1743 void
yyskiputf8bom(void)1744 yyskiputf8bom(void)
1745 {
1746 int c;
1747
1748 if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1749 ungetsc_i(c);
1750 return;
1751 }
1752 if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1753 ungetsc_i(c);
1754 ungetsc_i(0xEF);
1755 return;
1756 }
1757 if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1758 ungetsc_i(c);
1759 ungetsc_i(0xBB);
1760 ungetsc_i(0xEF);
1761 return;
1762 }
1763 UTFMODE |= 8;
1764 }
1765
1766 static Lex_state *
push_state_i(State_info * si,Lex_state * old_end)1767 push_state_i(State_info *si, Lex_state *old_end)
1768 {
1769 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1770
1771 news[0].ls_base = old_end;
1772 si->base = &news[0];
1773 si->end = &news[STATE_BSIZE];
1774 return (&news[1]);
1775 }
1776
1777 static Lex_state *
pop_state_i(State_info * si,Lex_state * old_end)1778 pop_state_i(State_info *si, Lex_state *old_end)
1779 {
1780 Lex_state *old_base = si->base;
1781
1782 si->base = old_end->ls_base - STATE_BSIZE;
1783 si->end = old_end->ls_base;
1784
1785 afree(old_base, ATEMP);
1786
1787 return (si->base + STATE_BSIZE - 1);
1788 }
1789