1 /* $OpenBSD: lex.c,v 1.49 2013/12/17 16:37:06 deraadt Exp $ */
2
3 /*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 * 2011, 2012, 2013, 2014, 2015
6 * Thorsten Glaser <tg@mirbsd.org>
7 *
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
13 *
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
22 */
23
24 #include "sh.h"
25
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.193.2.5 2015/04/19 19:18:19 tg Exp $");
27
28 /*
29 * states while lexing word
30 */
31 #define SBASE 0 /* outside any lexical constructs */
32 #define SWORD 1 /* implicit quoting for substitute() */
33 #define SLETPAREN 2 /* inside (( )), implicit quoting */
34 #define SSQUOTE 3 /* inside '' */
35 #define SDQUOTE 4 /* inside "" */
36 #define SEQUOTE 5 /* inside $'' */
37 #define SBRACE 6 /* inside ${} */
38 #define SQBRACE 7 /* inside "${}" */
39 #define SBQUOTE 8 /* inside `` */
40 #define SASPAREN 9 /* inside $(( )) */
41 #define SHEREDELIM 10 /* parsing <<,<<-,<<< delimiter */
42 #define SHEREDQUOTE 11 /* parsing " in <<,<<-,<<< delimiter */
43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM 13 /* like SBASE, looking for delimiter */
45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */
47 #define SINVALID 255 /* invalid state */
48
49 struct sretrace_info {
50 struct sretrace_info *next;
51 XString xs;
52 char *xp;
53 };
54
55 /*
56 * Structure to keep track of the lexing state and the various pieces of info
57 * needed for each particular state.
58 */
59 typedef struct lex_state {
60 union {
61 /* point to the next state block */
62 struct lex_state *base;
63 /* marks start of state output in output string */
64 int start;
65 /* SBQUOTE: true if in double quotes: "`...`" */
66 /* SEQUOTE: got NUL, ignore rest of string */
67 bool abool;
68 /* SADELIM information */
69 struct {
70 /* character to search for */
71 unsigned char delimiter;
72 /* max. number of delimiters */
73 unsigned char num;
74 } adelim;
75 } u;
76 /* count open parentheses */
77 short nparen;
78 /* type of this state */
79 uint8_t type;
80 } Lex_state;
81 #define ls_base u.base
82 #define ls_start u.start
83 #define ls_bool u.abool
84 #define ls_adelim u.adelim
85
86 typedef struct {
87 Lex_state *base;
88 Lex_state *end;
89 } State_info;
90
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int s_get(void);
98 static void s_put(int);
99 static char *get_brace_var(XString *, char *);
100 static bool arraysub(char **);
101 static void gethere(bool);
102 static Lex_state *push_state_i(State_info *, Lex_state *);
103 static Lex_state *pop_state_i(State_info *, Lex_state *);
104
105 static int backslash_skip;
106 static int ignore_backslash_newline;
107
108 /* optimised getsc_bn() */
109 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \
110 !backslash_skip ? *source->str++ : getsc_bn())
111 /* optimised getsc_uu() */
112 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu())
113
114 /* retrace helper */
115 #define o_getsc_r(carg) { \
116 int cev = (carg); \
117 struct sretrace_info *rp = retrace_info; \
118 \
119 while (rp) { \
120 Xcheck(rp->xs, rp->xp); \
121 *rp->xp++ = cev; \
122 rp = rp->next; \
123 } \
124 \
125 return (cev); \
126 }
127
128 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
129 static int getsc(void);
130
131 static int
getsc(void)132 getsc(void)
133 {
134 o_getsc_r(o_getsc());
135 }
136 #else
137 static int getsc_r(int);
138
139 static int
getsc_r(int c)140 getsc_r(int c)
141 {
142 o_getsc_r(c);
143 }
144
145 #define getsc() getsc_r(o_getsc())
146 #endif
147
148 #define STATE_BSIZE 8
149
150 #define PUSH_STATE(s) do { \
151 if (++statep == state_info.end) \
152 statep = push_state_i(&state_info, statep); \
153 state = statep->type = (s); \
154 } while (/* CONSTCOND */ 0)
155
156 #define POP_STATE() do { \
157 if (--statep == state_info.base) \
158 statep = pop_state_i(&state_info, statep); \
159 state = statep->type; \
160 } while (/* CONSTCOND */ 0)
161
162 #define PUSH_SRETRACE(s) do { \
163 struct sretrace_info *ri; \
164 \
165 PUSH_STATE(s); \
166 statep->ls_start = Xsavepos(ws, wp); \
167 ri = alloc(sizeof(struct sretrace_info), ATEMP); \
168 Xinit(ri->xs, ri->xp, 64, ATEMP); \
169 ri->next = retrace_info; \
170 retrace_info = ri; \
171 } while (/* CONSTCOND */ 0)
172
173 #define POP_SRETRACE() do { \
174 wp = Xrestpos(ws, wp, statep->ls_start); \
175 *retrace_info->xp = '\0'; \
176 sp = Xstring(retrace_info->xs, retrace_info->xp); \
177 dp = (void *)retrace_info; \
178 retrace_info = retrace_info->next; \
179 afree(dp, ATEMP); \
180 POP_STATE(); \
181 } while (/* CONSTCOND */ 0)
182
183 /**
184 * Lexical analyser
185 *
186 * tokens are not regular expressions, they are LL(1).
187 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
188 * hence the state stack. Note "$(...)" are now parsed recursively.
189 */
190
191 int
yylex(int cf)192 yylex(int cf)
193 {
194 Lex_state states[STATE_BSIZE], *statep, *s2, *base;
195 State_info state_info;
196 int c, c2, state;
197 size_t cz;
198 XString ws; /* expandable output word */
199 char *wp; /* output word pointer */
200 char *sp, *dp;
201
202 Again:
203 states[0].type = SINVALID;
204 states[0].ls_base = NULL;
205 statep = &states[1];
206 state_info.base = states;
207 state_info.end = &state_info.base[STATE_BSIZE];
208
209 Xinit(ws, wp, 64, ATEMP);
210
211 backslash_skip = 0;
212 ignore_backslash_newline = 0;
213
214 if (cf & ONEWORD)
215 state = SWORD;
216 else if (cf & LETEXPR) {
217 /* enclose arguments in (double) quotes */
218 *wp++ = OQUOTE;
219 state = SLETPAREN;
220 statep->nparen = 0;
221 } else {
222 /* normal lexing */
223 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
224 while ((c = getsc()) == ' ' || c == '\t')
225 ;
226 if (c == '#') {
227 ignore_backslash_newline++;
228 while ((c = getsc()) != '\0' && c != '\n')
229 ;
230 ignore_backslash_newline--;
231 }
232 ungetsc(c);
233 }
234 if (source->flags & SF_ALIAS) {
235 /* trailing ' ' in alias definition */
236 source->flags &= ~SF_ALIAS;
237 cf |= ALIAS;
238 }
239
240 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
241 statep->type = state;
242
243 /* check for here string */
244 if (state == SHEREDELIM) {
245 c = getsc();
246 if (c == '<') {
247 state = SHEREDELIM;
248 while ((c = getsc()) == ' ' || c == '\t')
249 ;
250 ungetsc(c);
251 c = '<';
252 goto accept_nonword;
253 }
254 ungetsc(c);
255 }
256
257 /* collect non-special or quoted characters to form word */
258 while (!((c = getsc()) == 0 ||
259 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
260 if (state == SBASE &&
261 subshell_nesting_type == /*{*/ '}' &&
262 c == /*{*/ '}')
263 /* possibly end ${ :;} */
264 break;
265 accept_nonword:
266 Xcheck(ws, wp);
267 switch (state) {
268 case SADELIM:
269 if (c == '(')
270 statep->nparen++;
271 else if (c == ')')
272 statep->nparen--;
273 else if (statep->nparen == 0 && (c == /*{*/ '}' ||
274 c == (int)statep->ls_adelim.delimiter)) {
275 *wp++ = ADELIM;
276 *wp++ = c;
277 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
278 POP_STATE();
279 if (c == /*{*/ '}')
280 POP_STATE();
281 break;
282 }
283 /* FALLTHROUGH */
284 case SBASE:
285 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
286 /* temporary */
287 *wp = EOS;
288 if (is_wdvarname(Xstring(ws, wp), false)) {
289 char *p, *tmp;
290
291 if (arraysub(&tmp)) {
292 *wp++ = CHAR;
293 *wp++ = c;
294 for (p = tmp; *p; ) {
295 Xcheck(ws, wp);
296 *wp++ = CHAR;
297 *wp++ = *p++;
298 }
299 afree(tmp, ATEMP);
300 break;
301 } else {
302 Source *s;
303
304 s = pushs(SREREAD,
305 source->areap);
306 s->start = s->str =
307 s->u.freeme = tmp;
308 s->next = source;
309 source = s;
310 }
311 }
312 *wp++ = CHAR;
313 *wp++ = c;
314 break;
315 }
316 /* FALLTHROUGH */
317 Sbase1: /* includes *(...|...) pattern (*+?@!) */
318 if (c == '*' || c == '@' || c == '+' || c == '?' ||
319 c == '!') {
320 c2 = getsc();
321 if (c2 == '(' /*)*/ ) {
322 *wp++ = OPAT;
323 *wp++ = c;
324 PUSH_STATE(SPATTERN);
325 break;
326 }
327 ungetsc(c2);
328 }
329 /* FALLTHROUGH */
330 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
331 switch (c) {
332 case '\\':
333 getsc_qchar:
334 if ((c = getsc())) {
335 /* trailing \ is lost */
336 *wp++ = QCHAR;
337 *wp++ = c;
338 }
339 break;
340 case '\'':
341 open_ssquote_unless_heredoc:
342 if ((cf & HEREDOC))
343 goto store_char;
344 *wp++ = OQUOTE;
345 ignore_backslash_newline++;
346 PUSH_STATE(SSQUOTE);
347 break;
348 case '"':
349 open_sdquote:
350 *wp++ = OQUOTE;
351 PUSH_STATE(SDQUOTE);
352 break;
353 case '$':
354 /*
355 * processing of dollar sign belongs into
356 * Subst, except for those which can open
357 * a string: $'…' and $"…"
358 */
359 subst_dollar_ex:
360 c = getsc();
361 switch (c) {
362 case '"':
363 goto open_sdquote;
364 case '\'':
365 goto open_sequote;
366 default:
367 goto SubstS;
368 }
369 default:
370 goto Subst;
371 }
372 break;
373
374 Subst:
375 switch (c) {
376 case '\\':
377 c = getsc();
378 switch (c) {
379 case '"':
380 if ((cf & HEREDOC))
381 goto heredocquote;
382 /* FALLTHROUGH */
383 case '\\':
384 case '$': case '`':
385 store_qchar:
386 *wp++ = QCHAR;
387 *wp++ = c;
388 break;
389 default:
390 heredocquote:
391 Xcheck(ws, wp);
392 if (c) {
393 /* trailing \ is lost */
394 *wp++ = CHAR;
395 *wp++ = '\\';
396 *wp++ = CHAR;
397 *wp++ = c;
398 }
399 break;
400 }
401 break;
402 case '$':
403 c = getsc();
404 SubstS:
405 if (c == '(') /*)*/ {
406 c = getsc();
407 if (c == '(') /*)*/ {
408 *wp++ = EXPRSUB;
409 PUSH_SRETRACE(SASPAREN);
410 statep->nparen = 2;
411 *retrace_info->xp++ = '(';
412 } else {
413 ungetsc(c);
414 subst_command:
415 c = COMSUB;
416 subst_command2:
417 sp = yyrecursive(c);
418 cz = strlen(sp) + 1;
419 XcheckN(ws, wp, cz);
420 *wp++ = c;
421 memcpy(wp, sp, cz);
422 wp += cz;
423 }
424 } else if (c == '{') /*}*/ {
425 if ((c = getsc()) == '|') {
426 /*
427 * non-subenvironment
428 * value substitution
429 */
430 c = VALSUB;
431 goto subst_command2;
432 } else if (ctype(c, C_IFSWS)) {
433 /*
434 * non-subenvironment
435 * "command" substitution
436 */
437 c = FUNSUB;
438 goto subst_command2;
439 }
440 ungetsc(c);
441 *wp++ = OSUBST;
442 *wp++ = '{'; /*}*/
443 wp = get_brace_var(&ws, wp);
444 c = getsc();
445 /* allow :# and :% (ksh88 compat) */
446 if (c == ':') {
447 *wp++ = CHAR;
448 *wp++ = c;
449 c = getsc();
450 if (c == ':') {
451 *wp++ = CHAR;
452 *wp++ = '0';
453 *wp++ = ADELIM;
454 *wp++ = ':';
455 PUSH_STATE(SBRACE);
456 PUSH_STATE(SADELIM);
457 statep->ls_adelim.delimiter = ':';
458 statep->ls_adelim.num = 1;
459 statep->nparen = 0;
460 break;
461 } else if (ksh_isdigit(c) ||
462 c == '('/*)*/ || c == ' ' ||
463 /*XXX what else? */
464 c == '$') {
465 /* substring subst. */
466 if (c != ' ') {
467 *wp++ = CHAR;
468 *wp++ = ' ';
469 }
470 ungetsc(c);
471 PUSH_STATE(SBRACE);
472 PUSH_STATE(SADELIM);
473 statep->ls_adelim.delimiter = ':';
474 statep->ls_adelim.num = 2;
475 statep->nparen = 0;
476 break;
477 }
478 } else if (c == '/') {
479 *wp++ = CHAR;
480 *wp++ = c;
481 if ((c = getsc()) == '/') {
482 *wp++ = ADELIM;
483 *wp++ = c;
484 } else
485 ungetsc(c);
486 PUSH_STATE(SBRACE);
487 PUSH_STATE(SADELIM);
488 statep->ls_adelim.delimiter = '/';
489 statep->ls_adelim.num = 1;
490 statep->nparen = 0;
491 break;
492 }
493 /*
494 * If this is a trim operation,
495 * treat (,|,) specially in STBRACE.
496 */
497 if (ctype(c, C_SUBOP2)) {
498 ungetsc(c);
499 if (Flag(FSH))
500 PUSH_STATE(STBRACEBOURNE);
501 else
502 PUSH_STATE(STBRACEKORN);
503 } else {
504 ungetsc(c);
505 if (state == SDQUOTE ||
506 state == SQBRACE)
507 PUSH_STATE(SQBRACE);
508 else
509 PUSH_STATE(SBRACE);
510 }
511 } else if (ksh_isalphx(c)) {
512 *wp++ = OSUBST;
513 *wp++ = 'X';
514 do {
515 Xcheck(ws, wp);
516 *wp++ = c;
517 c = getsc();
518 } while (ksh_isalnux(c));
519 *wp++ = '\0';
520 *wp++ = CSUBST;
521 *wp++ = 'X';
522 ungetsc(c);
523 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
524 Xcheck(ws, wp);
525 *wp++ = OSUBST;
526 *wp++ = 'X';
527 *wp++ = c;
528 *wp++ = '\0';
529 *wp++ = CSUBST;
530 *wp++ = 'X';
531 } else {
532 *wp++ = CHAR;
533 *wp++ = '$';
534 ungetsc(c);
535 }
536 break;
537 case '`':
538 subst_gravis:
539 PUSH_STATE(SBQUOTE);
540 *wp++ = COMSUB;
541 /*
542 * Need to know if we are inside double quotes
543 * since sh/AT&T-ksh translate the \" to " in
544 * "`...\"...`".
545 * This is not done in POSIX mode (section
546 * 3.2.3, Double Quotes: "The backquote shall
547 * retain its special meaning introducing the
548 * other form of command substitution (see
549 * 3.6.3). The portion of the quoted string
550 * from the initial backquote and the
551 * characters up to the next backquote that
552 * is not preceded by a backslash (having
553 * escape characters removed) defines that
554 * command whose output replaces `...` when
555 * the word is expanded."
556 * Section 3.6.3, Command Substitution:
557 * "Within the backquoted style of command
558 * substitution, backslash shall retain its
559 * literal meaning, except when followed by
560 * $ ` \.").
561 */
562 statep->ls_bool = false;
563 s2 = statep;
564 base = state_info.base;
565 while (/* CONSTCOND */ 1) {
566 for (; s2 != base; s2--) {
567 if (s2->type == SDQUOTE) {
568 statep->ls_bool = true;
569 break;
570 }
571 }
572 if (s2 != base)
573 break;
574 if (!(s2 = s2->ls_base))
575 break;
576 base = s2-- - STATE_BSIZE;
577 }
578 break;
579 case QCHAR:
580 if (cf & LQCHAR) {
581 *wp++ = QCHAR;
582 *wp++ = getsc();
583 break;
584 }
585 /* FALLTHROUGH */
586 default:
587 store_char:
588 *wp++ = CHAR;
589 *wp++ = c;
590 }
591 break;
592
593 case SEQUOTE:
594 if (c == '\'') {
595 POP_STATE();
596 *wp++ = CQUOTE;
597 ignore_backslash_newline--;
598 } else if (c == '\\') {
599 if ((c2 = unbksl(true, s_get, s_put)) == -1)
600 c2 = s_get();
601 if (c2 == 0)
602 statep->ls_bool = true;
603 if (!statep->ls_bool) {
604 char ts[4];
605
606 if ((unsigned int)c2 < 0x100) {
607 *wp++ = QCHAR;
608 *wp++ = c2;
609 } else {
610 cz = utf_wctomb(ts, c2 - 0x100);
611 ts[cz] = 0;
612 for (cz = 0; ts[cz]; ++cz) {
613 *wp++ = QCHAR;
614 *wp++ = ts[cz];
615 }
616 }
617 }
618 } else if (!statep->ls_bool) {
619 *wp++ = QCHAR;
620 *wp++ = c;
621 }
622 break;
623
624 case SSQUOTE:
625 if (c == '\'') {
626 POP_STATE();
627 if ((cf & HEREDOC) || state == SQBRACE)
628 goto store_char;
629 *wp++ = CQUOTE;
630 ignore_backslash_newline--;
631 } else {
632 *wp++ = QCHAR;
633 *wp++ = c;
634 }
635 break;
636
637 case SDQUOTE:
638 if (c == '"') {
639 POP_STATE();
640 *wp++ = CQUOTE;
641 } else
642 goto Subst;
643 break;
644
645 /* $(( ... )) */
646 case SASPAREN:
647 if (c == '(')
648 statep->nparen++;
649 else if (c == ')') {
650 statep->nparen--;
651 if (statep->nparen == 1) {
652 /* end of EXPRSUB */
653 POP_SRETRACE();
654
655 if ((c2 = getsc()) == /*(*/ ')') {
656 cz = strlen(sp) - 2;
657 XcheckN(ws, wp, cz);
658 memcpy(wp, sp + 1, cz);
659 wp += cz;
660 afree(sp, ATEMP);
661 *wp++ = '\0';
662 break;
663 } else {
664 Source *s;
665
666 ungetsc(c2);
667 /*
668 * mismatched parenthesis -
669 * assume we were really
670 * parsing a $(...) expression
671 */
672 --wp;
673 s = pushs(SREREAD,
674 source->areap);
675 s->start = s->str =
676 s->u.freeme = sp;
677 s->next = source;
678 source = s;
679 goto subst_command;
680 }
681 }
682 }
683 /* reuse existing state machine */
684 goto Sbase2;
685
686 case SQBRACE:
687 if (c == '\\') {
688 /*
689 * perform POSIX "quote removal" if the back-
690 * slash is "special", i.e. same cases as the
691 * {case '\\':} in Subst: plus closing brace;
692 * in mksh code "quote removal" on '\c' means
693 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
694 * emitted (in heredocquote:)
695 */
696 if ((c = getsc()) == '"' || c == '\\' ||
697 c == '$' || c == '`' || c == /*{*/'}')
698 goto store_qchar;
699 goto heredocquote;
700 }
701 goto common_SQBRACE;
702
703 case SBRACE:
704 if (c == '\'')
705 goto open_ssquote_unless_heredoc;
706 else if (c == '\\')
707 goto getsc_qchar;
708 common_SQBRACE:
709 if (c == '"')
710 goto open_sdquote;
711 else if (c == '$')
712 goto subst_dollar_ex;
713 else if (c == '`')
714 goto subst_gravis;
715 else if (c != /*{*/ '}')
716 goto store_char;
717 POP_STATE();
718 *wp++ = CSUBST;
719 *wp++ = /*{*/ '}';
720 break;
721
722 /* Same as SBASE, except (,|,) treated specially */
723 case STBRACEKORN:
724 if (c == '|')
725 *wp++ = SPAT;
726 else if (c == '(') {
727 *wp++ = OPAT;
728 /* simile for @ */
729 *wp++ = ' ';
730 PUSH_STATE(SPATTERN);
731 } else /* FALLTHROUGH */
732 case STBRACEBOURNE:
733 if (c == /*{*/ '}') {
734 POP_STATE();
735 *wp++ = CSUBST;
736 *wp++ = /*{*/ '}';
737 } else
738 goto Sbase1;
739 break;
740
741 case SBQUOTE:
742 if (c == '`') {
743 *wp++ = 0;
744 POP_STATE();
745 } else if (c == '\\') {
746 switch (c = getsc()) {
747 case 0:
748 /* trailing \ is lost */
749 break;
750 case '\\':
751 case '$': case '`':
752 *wp++ = c;
753 break;
754 case '"':
755 if (statep->ls_bool) {
756 *wp++ = c;
757 break;
758 }
759 /* FALLTHROUGH */
760 default:
761 *wp++ = '\\';
762 *wp++ = c;
763 break;
764 }
765 } else
766 *wp++ = c;
767 break;
768
769 /* ONEWORD */
770 case SWORD:
771 goto Subst;
772
773 /* LETEXPR: (( ... )) */
774 case SLETPAREN:
775 if (c == /*(*/ ')') {
776 if (statep->nparen > 0)
777 --statep->nparen;
778 else if ((c2 = getsc()) == /*(*/ ')') {
779 c = 0;
780 *wp++ = CQUOTE;
781 goto Done;
782 } else {
783 Source *s;
784
785 ungetsc(c2);
786 /*
787 * mismatched parenthesis -
788 * assume we were really
789 * parsing a (...) expression
790 */
791 *wp = EOS;
792 sp = Xstring(ws, wp);
793 dp = wdstrip(sp, WDS_KEEPQ);
794 s = pushs(SREREAD, source->areap);
795 s->start = s->str = s->u.freeme = dp;
796 s->next = source;
797 source = s;
798 return ('('/*)*/);
799 }
800 } else if (c == '(')
801 /*
802 * parentheses inside quotes and
803 * backslashes are lost, but AT&T ksh
804 * doesn't count them either
805 */
806 ++statep->nparen;
807 goto Sbase2;
808
809 /* <<, <<-, <<< delimiter */
810 case SHEREDELIM:
811 /*
812 * here delimiters need a special case since
813 * $ and `...` are not to be treated specially
814 */
815 switch (c) {
816 case '\\':
817 if ((c = getsc())) {
818 /* trailing \ is lost */
819 *wp++ = QCHAR;
820 *wp++ = c;
821 }
822 break;
823 case '\'':
824 goto open_ssquote_unless_heredoc;
825 case '$':
826 if ((c2 = getsc()) == '\'') {
827 open_sequote:
828 *wp++ = OQUOTE;
829 ignore_backslash_newline++;
830 PUSH_STATE(SEQUOTE);
831 statep->ls_bool = false;
832 break;
833 } else if (c2 == '"') {
834 /* FALLTHROUGH */
835 case '"':
836 PUSH_SRETRACE(SHEREDQUOTE);
837 break;
838 }
839 ungetsc(c2);
840 /* FALLTHROUGH */
841 default:
842 *wp++ = CHAR;
843 *wp++ = c;
844 }
845 break;
846
847 /* " in <<, <<-, <<< delimiter */
848 case SHEREDQUOTE:
849 if (c != '"')
850 goto Subst;
851 POP_SRETRACE();
852 dp = strnul(sp) - 1;
853 /* remove the trailing double quote */
854 *dp = '\0';
855 /* store the quoted string */
856 *wp++ = OQUOTE;
857 XcheckN(ws, wp, (dp - sp) * 2);
858 dp = sp;
859 while ((c = *dp++)) {
860 if (c == '\\') {
861 switch ((c = *dp++)) {
862 case '\\':
863 case '"':
864 case '$':
865 case '`':
866 break;
867 default:
868 *wp++ = CHAR;
869 *wp++ = '\\';
870 break;
871 }
872 }
873 *wp++ = CHAR;
874 *wp++ = c;
875 }
876 afree(sp, ATEMP);
877 *wp++ = CQUOTE;
878 state = statep->type = SHEREDELIM;
879 break;
880
881 /* in *(...|...) pattern (*+?@!) */
882 case SPATTERN:
883 if (c == /*(*/ ')') {
884 *wp++ = CPAT;
885 POP_STATE();
886 } else if (c == '|') {
887 *wp++ = SPAT;
888 } else if (c == '(') {
889 *wp++ = OPAT;
890 /* simile for @ */
891 *wp++ = ' ';
892 PUSH_STATE(SPATTERN);
893 } else
894 goto Sbase1;
895 break;
896 }
897 }
898 Done:
899 Xcheck(ws, wp);
900 if (statep != &states[1])
901 /* XXX figure out what is missing */
902 yyerror("no closing quote\n");
903
904 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
905 if (state == SHEREDELIM)
906 state = SBASE;
907
908 dp = Xstring(ws, wp);
909 if (state == SBASE && (
910 #ifndef MKSH_LEGACY_MODE
911 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
912 #endif
913 c == '<' || c == '>')) {
914 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
915
916 if (Xlength(ws, wp) == 0)
917 iop->unit = c == '<' ? 0 : 1;
918 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
919 if (dp[c2] != CHAR)
920 goto no_iop;
921 if (!ksh_isdigit(dp[c2 + 1]))
922 goto no_iop;
923 iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
924 if (iop->unit >= FDBASE)
925 goto no_iop;
926 }
927
928 if (c == '&') {
929 if ((c2 = getsc()) != '>') {
930 ungetsc(c2);
931 goto no_iop;
932 }
933 c = c2;
934 iop->ioflag = IOBASH;
935 } else
936 iop->ioflag = 0;
937
938 c2 = getsc();
939 /* <<, >>, <> are ok, >< is not */
940 if (c == c2 || (c == '<' && c2 == '>')) {
941 iop->ioflag |= c == c2 ?
942 (c == '>' ? IOCAT : IOHERE) : IORDWR;
943 if (iop->ioflag == IOHERE) {
944 if ((c2 = getsc()) == '-') {
945 iop->ioflag |= IOSKIP;
946 c2 = getsc();
947 } else if (c2 == '<')
948 iop->ioflag |= IOHERESTR;
949 ungetsc(c2);
950 if (c2 == '\n')
951 iop->ioflag |= IONDELIM;
952 }
953 } else if (c2 == '&')
954 iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
955 else {
956 iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
957 if (c == '>' && c2 == '|')
958 iop->ioflag |= IOCLOB;
959 else
960 ungetsc(c2);
961 }
962
963 iop->name = NULL;
964 iop->delim = NULL;
965 iop->heredoc = NULL;
966 /* free word */
967 Xfree(ws, wp);
968 yylval.iop = iop;
969 return (REDIR);
970 no_iop:
971 afree(iop, ATEMP);
972 }
973
974 if (wp == dp && state == SBASE) {
975 /* free word */
976 Xfree(ws, wp);
977 /* no word, process LEX1 character */
978 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
979 if ((c2 = getsc()) == c)
980 c = (c == ';') ? BREAK :
981 (c == '|') ? LOGOR :
982 (c == '&') ? LOGAND :
983 /* c == '(' ) */ MDPAREN;
984 else if (c == '|' && c2 == '&')
985 c = COPROC;
986 else if (c == ';' && c2 == '|')
987 c = BRKEV;
988 else if (c == ';' && c2 == '&')
989 c = BRKFT;
990 else
991 ungetsc(c2);
992 #ifndef MKSH_SMALL
993 if (c == BREAK) {
994 if ((c2 = getsc()) == '&')
995 c = BRKEV;
996 else
997 ungetsc(c2);
998 }
999 #endif
1000 } else if (c == '\n') {
1001 gethere(false);
1002 if (cf & CONTIN)
1003 goto Again;
1004 } else if (c == '\0')
1005 /* need here strings at EOF */
1006 gethere(true);
1007 return (c);
1008 }
1009
1010 /* terminate word */
1011 *wp++ = EOS;
1012 yylval.cp = Xclose(ws, wp);
1013 if (state == SWORD || state == SLETPAREN
1014 /* XXX ONEWORD? */)
1015 return (LWORD);
1016
1017 /* unget terminator */
1018 ungetsc(c);
1019
1020 /*
1021 * note: the alias-vs-function code below depends on several
1022 * interna: starting from here, source->str is not modified;
1023 * the way getsc() and ungetsc() operate; etc.
1024 */
1025
1026 /* copy word to unprefixed string ident */
1027 sp = yylval.cp;
1028 dp = ident;
1029 if ((cf & HEREDELIM) && (sp[1] == '<')) {
1030 herestringloop:
1031 switch ((c = *sp++)) {
1032 case CHAR:
1033 ++sp;
1034 /* FALLTHROUGH */
1035 case OQUOTE:
1036 case CQUOTE:
1037 goto herestringloop;
1038 default:
1039 break;
1040 }
1041 /* dummy value */
1042 *dp++ = 'x';
1043 } else
1044 while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1045 *dp++ = *sp++;
1046 if (c != EOS)
1047 /* word is not unquoted */
1048 dp = ident;
1049 /* make sure the ident array stays NUL padded */
1050 memset(dp, 0, (ident + IDENT) - dp + 1);
1051
1052 if (!(cf & (KEYWORD | ALIAS)))
1053 return (LWORD);
1054
1055 if (*ident != '\0') {
1056 struct tbl *p;
1057 uint32_t h = hash(ident);
1058
1059 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1060 (!(cf & ESACONLY) || p->val.i == ESAC ||
1061 p->val.i == /*{*/ '}')) {
1062 afree(yylval.cp, ATEMP);
1063 return (p->val.i);
1064 }
1065 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1066 (p->flag & ISSET)) {
1067 /*
1068 * this still points to the same character as the
1069 * ungetsc'd terminator from above
1070 */
1071 const char *cp = source->str;
1072
1073 /* prefer POSIX but not Korn functions over aliases */
1074 while (*cp == ' ' || *cp == '\t')
1075 /*
1076 * this is like getsc() without skipping
1077 * over Source boundaries (including not
1078 * parsing ungetsc'd characters that got
1079 * pushed into an SREREAD) which is what
1080 * we want here anyway: find out whether
1081 * the alias name is followed by a POSIX
1082 * function definition
1083 */
1084 ++cp;
1085 /* prefer functions over aliases */
1086 if (cp[0] != '(' || cp[1] != ')') {
1087 Source *s = source;
1088
1089 while (s && (s->flags & SF_HASALIAS))
1090 if (s->u.tblp == p)
1091 return (LWORD);
1092 else
1093 s = s->next;
1094 /* push alias expansion */
1095 s = pushs(SALIAS, source->areap);
1096 s->start = s->str = p->val.s;
1097 s->u.tblp = p;
1098 s->flags |= SF_HASALIAS;
1099 s->next = source;
1100 if (source->type == SEOF) {
1101 /* prevent infinite recursion at EOS */
1102 source->u.tblp = p;
1103 source->flags |= SF_HASALIAS;
1104 }
1105 source = s;
1106 afree(yylval.cp, ATEMP);
1107 goto Again;
1108 }
1109 }
1110 } else if (cf & ALIAS) {
1111 /* retain typeset et al. even when quoted */
1112 if (assign_command((dp = wdstrip(yylval.cp, 0))))
1113 strlcpy(ident, dp, sizeof(ident));
1114 afree(dp, ATEMP);
1115 }
1116
1117 return (LWORD);
1118 }
1119
1120 static void
gethere(bool iseof)1121 gethere(bool iseof)
1122 {
1123 struct ioword **p;
1124
1125 for (p = heres; p < herep; p++)
1126 if (iseof && !((*p)->ioflag & IOHERESTR))
1127 /* only here strings at EOF */
1128 return;
1129 else
1130 readhere(*p);
1131 herep = heres;
1132 }
1133
1134 /*
1135 * read "<<word" text into temp file
1136 */
1137
1138 static void
readhere(struct ioword * iop)1139 readhere(struct ioword *iop)
1140 {
1141 int c;
1142 const char *eof, *eofp;
1143 XString xs;
1144 char *xp;
1145 int xpos;
1146
1147 if (iop->ioflag & IOHERESTR) {
1148 /* process the here string */
1149 iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1150 xpos = strlen(xp) - 1;
1151 memmove(xp, xp + 1, xpos);
1152 xp[xpos] = '\n';
1153 return;
1154 }
1155
1156 eof = iop->ioflag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1157
1158 if (!(iop->ioflag & IOEVAL))
1159 ignore_backslash_newline++;
1160
1161 Xinit(xs, xp, 256, ATEMP);
1162
1163 heredoc_read_line:
1164 /* beginning of line */
1165 eofp = eof;
1166 xpos = Xsavepos(xs, xp);
1167 if (iop->ioflag & IOSKIP) {
1168 /* skip over leading tabs */
1169 while ((c = getsc()) == '\t')
1170 ; /* nothing */
1171 goto heredoc_parse_char;
1172 }
1173 heredoc_read_char:
1174 c = getsc();
1175 heredoc_parse_char:
1176 /* compare with here document marker */
1177 if (!*eofp) {
1178 /* end of here document marker, what to do? */
1179 switch (c) {
1180 case /*(*/ ')':
1181 if (!subshell_nesting_type)
1182 /*-
1183 * not allowed outside $(...) or (...)
1184 * => mismatch
1185 */
1186 break;
1187 /* allow $(...) or (...) to close here */
1188 ungetsc(/*(*/ ')');
1189 /* FALLTHROUGH */
1190 case 0:
1191 /*
1192 * Allow EOF here to commands without trailing
1193 * newlines (mksh -c '...') will work as well.
1194 */
1195 case '\n':
1196 /* Newline terminates here document marker */
1197 goto heredoc_found_terminator;
1198 }
1199 } else if (c == *eofp++)
1200 /* store; then read and compare next character */
1201 goto heredoc_store_and_loop;
1202 /* nope, mismatch; read until end of line */
1203 while (c != '\n') {
1204 if (!c)
1205 /* oops, reached EOF */
1206 yyerror("%s '%s' unclosed\n", "here document", eof);
1207 /* store character */
1208 Xcheck(xs, xp);
1209 Xput(xs, xp, c);
1210 /* read next character */
1211 c = getsc();
1212 }
1213 /* we read a newline as last character */
1214 heredoc_store_and_loop:
1215 /* store character */
1216 Xcheck(xs, xp);
1217 Xput(xs, xp, c);
1218 if (c == '\n')
1219 goto heredoc_read_line;
1220 goto heredoc_read_char;
1221
1222 heredoc_found_terminator:
1223 /* jump back to saved beginning of line */
1224 xp = Xrestpos(xs, xp, xpos);
1225 /* terminate, close and store */
1226 Xput(xs, xp, '\0');
1227 iop->heredoc = Xclose(xs, xp);
1228
1229 if (!(iop->ioflag & IOEVAL))
1230 ignore_backslash_newline--;
1231 }
1232
1233 void
yyerror(const char * fmt,...)1234 yyerror(const char *fmt, ...)
1235 {
1236 va_list va;
1237
1238 /* pop aliases and re-reads */
1239 while (source->type == SALIAS || source->type == SREREAD)
1240 source = source->next;
1241 /* zap pending input */
1242 source->str = null;
1243
1244 error_prefix(true);
1245 va_start(va, fmt);
1246 shf_vfprintf(shl_out, fmt, va);
1247 va_end(va);
1248 errorfz();
1249 }
1250
1251 /*
1252 * input for yylex with alias expansion
1253 */
1254
1255 Source *
pushs(int type,Area * areap)1256 pushs(int type, Area *areap)
1257 {
1258 Source *s;
1259
1260 s = alloc(sizeof(Source), areap);
1261 memset(s, 0, sizeof(Source));
1262 s->type = type;
1263 s->str = null;
1264 s->areap = areap;
1265 if (type == SFILE || type == SSTDIN)
1266 XinitN(s->xs, 256, s->areap);
1267 return (s);
1268 }
1269
1270 static int
getsc_uu(void)1271 getsc_uu(void)
1272 {
1273 Source *s = source;
1274 int c;
1275
1276 while ((c = *s->str++) == 0) {
1277 /* return 0 for EOF by default */
1278 s->str = NULL;
1279 switch (s->type) {
1280 case SEOF:
1281 s->str = null;
1282 return (0);
1283
1284 case SSTDIN:
1285 case SFILE:
1286 getsc_line(s);
1287 break;
1288
1289 case SWSTR:
1290 break;
1291
1292 case SSTRING:
1293 case SSTRINGCMDLINE:
1294 break;
1295
1296 case SWORDS:
1297 s->start = s->str = *s->u.strv++;
1298 s->type = SWORDSEP;
1299 break;
1300
1301 case SWORDSEP:
1302 if (*s->u.strv == NULL) {
1303 s->start = s->str = "\n";
1304 s->type = SEOF;
1305 } else {
1306 s->start = s->str = " ";
1307 s->type = SWORDS;
1308 }
1309 break;
1310
1311 case SALIAS:
1312 if (s->flags & SF_ALIASEND) {
1313 /* pass on an unused SF_ALIAS flag */
1314 source = s->next;
1315 source->flags |= s->flags & SF_ALIAS;
1316 s = source;
1317 } else if (*s->u.tblp->val.s &&
1318 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1319 /* pop source stack */
1320 source = s = s->next;
1321 /*
1322 * Note that this alias ended with a
1323 * space, enabling alias expansion on
1324 * the following word.
1325 */
1326 s->flags |= SF_ALIAS;
1327 } else {
1328 /*
1329 * At this point, we need to keep the current
1330 * alias in the source list so recursive
1331 * aliases can be detected and we also need to
1332 * return the next character. Do this by
1333 * temporarily popping the alias to get the
1334 * next character and then put it back in the
1335 * source list with the SF_ALIASEND flag set.
1336 */
1337 /* pop source stack */
1338 source = s->next;
1339 source->flags |= s->flags & SF_ALIAS;
1340 c = getsc_uu();
1341 if (c) {
1342 s->flags |= SF_ALIASEND;
1343 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1344 s->start = s->str = s->ugbuf;
1345 s->next = source;
1346 source = s;
1347 } else {
1348 s = source;
1349 /* avoid reading EOF twice */
1350 s->str = NULL;
1351 break;
1352 }
1353 }
1354 continue;
1355
1356 case SREREAD:
1357 if (s->start != s->ugbuf)
1358 /* yuck */
1359 afree(s->u.freeme, ATEMP);
1360 source = s = s->next;
1361 continue;
1362 }
1363 if (s->str == NULL) {
1364 s->type = SEOF;
1365 s->start = s->str = null;
1366 return ('\0');
1367 }
1368 if (s->flags & SF_ECHO) {
1369 shf_puts(s->str, shl_out);
1370 shf_flush(shl_out);
1371 }
1372 }
1373 return (c);
1374 }
1375
1376 static void
getsc_line(Source * s)1377 getsc_line(Source *s)
1378 {
1379 char *xp = Xstring(s->xs, xp), *cp;
1380 bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1381 bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1382
1383 /* Done here to ensure nothing odd happens when a timeout occurs */
1384 XcheckN(s->xs, xp, LINE);
1385 *xp = '\0';
1386 s->start = s->str = xp;
1387
1388 if (have_tty && ksh_tmout) {
1389 ksh_tmout_state = TMOUT_READING;
1390 alarm(ksh_tmout);
1391 }
1392 if (interactive)
1393 change_winsz();
1394 #ifndef MKSH_NO_CMDLINE_EDITING
1395 if (have_tty && (
1396 #if !MKSH_S_NOVI
1397 Flag(FVI) ||
1398 #endif
1399 Flag(FEMACS) || Flag(FGMACS))) {
1400 int nread;
1401
1402 nread = x_read(xp);
1403 if (nread < 0)
1404 /* read error */
1405 nread = 0;
1406 xp[nread] = '\0';
1407 xp += nread;
1408 } else
1409 #endif
1410 {
1411 if (interactive)
1412 pprompt(prompt, 0);
1413 else
1414 s->line++;
1415
1416 while (/* CONSTCOND */ 1) {
1417 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1418
1419 if (!p && shf_error(s->u.shf) &&
1420 shf_errno(s->u.shf) == EINTR) {
1421 shf_clearerr(s->u.shf);
1422 if (trap)
1423 runtraps(0);
1424 continue;
1425 }
1426 if (!p || (xp = p, xp[-1] == '\n'))
1427 break;
1428 /* double buffer size */
1429 /* move past NUL so doubling works... */
1430 xp++;
1431 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1432 /* ...and move back again */
1433 xp--;
1434 }
1435 /*
1436 * flush any unwanted input so other programs/builtins
1437 * can read it. Not very optimal, but less error prone
1438 * than flushing else where, dealing with redirections,
1439 * etc.
1440 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1441 */
1442 if (s->type == SSTDIN)
1443 shf_flush(s->u.shf);
1444 }
1445 /*
1446 * XXX: temporary kludge to restore source after a
1447 * trap may have been executed.
1448 */
1449 source = s;
1450 if (have_tty && ksh_tmout) {
1451 ksh_tmout_state = TMOUT_EXECUTING;
1452 alarm(0);
1453 }
1454 cp = Xstring(s->xs, xp);
1455 rndpush(cp);
1456 s->start = s->str = cp;
1457 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1458 /* Note: if input is all nulls, this is not eof */
1459 if (Xlength(s->xs, xp) == 0) {
1460 /* EOF */
1461 if (s->type == SFILE)
1462 shf_fdclose(s->u.shf);
1463 s->str = NULL;
1464 } else if (interactive && *s->str &&
1465 (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1466 histsave(&s->line, s->str, true, true);
1467 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1468 } else if (interactive && cur_prompt == PS1) {
1469 cp = Xstring(s->xs, xp);
1470 while (*cp && ctype(*cp, C_IFSWS))
1471 ++cp;
1472 if (!*cp)
1473 histsync();
1474 #endif
1475 }
1476 if (interactive)
1477 set_prompt(PS2, NULL);
1478 }
1479
1480 void
set_prompt(int to,Source * s)1481 set_prompt(int to, Source *s)
1482 {
1483 cur_prompt = (uint8_t)to;
1484
1485 switch (to) {
1486 /* command */
1487 case PS1:
1488 /*
1489 * Substitute ! and !! here, before substitutions are done
1490 * so ! in expanded variables are not expanded.
1491 * NOTE: this is not what AT&T ksh does (it does it after
1492 * substitutions, POSIX doesn't say which is to be done.
1493 */
1494 {
1495 struct shf *shf;
1496 char * volatile ps1;
1497 Area *saved_atemp;
1498
1499 ps1 = str_val(global("PS1"));
1500 shf = shf_sopen(NULL, strlen(ps1) * 2,
1501 SHF_WR | SHF_DYNAMIC, NULL);
1502 while (*ps1)
1503 if (*ps1 != '!' || *++ps1 == '!')
1504 shf_putchar(*ps1++, shf);
1505 else
1506 shf_fprintf(shf, "%lu", s ?
1507 (unsigned long)s->line + 1 : 0UL);
1508 ps1 = shf_sclose(shf);
1509 saved_atemp = ATEMP;
1510 newenv(E_ERRH);
1511 if (kshsetjmp(e->jbuf)) {
1512 prompt = safe_prompt;
1513 /*
1514 * Don't print an error - assume it has already
1515 * been printed. Reason is we may have forked
1516 * to run a command and the child may be
1517 * unwinding its stack through this code as it
1518 * exits.
1519 */
1520 } else {
1521 char *cp = substitute(ps1, 0);
1522 strdupx(prompt, cp, saved_atemp);
1523 }
1524 quitenv(NULL);
1525 }
1526 break;
1527 /* command continuation */
1528 case PS2:
1529 prompt = str_val(global("PS2"));
1530 break;
1531 }
1532 }
1533
1534 int
pprompt(const char * cp,int ntruncate)1535 pprompt(const char *cp, int ntruncate)
1536 {
1537 char delimiter = 0;
1538 bool doprint = (ntruncate != -1);
1539 bool indelimit = false;
1540 int columns = 0, lines = 0;
1541
1542 /*
1543 * Undocumented AT&T ksh feature:
1544 * If the second char in the prompt string is \r then the first
1545 * char is taken to be a non-printing delimiter and any chars
1546 * between two instances of the delimiter are not considered to
1547 * be part of the prompt length
1548 */
1549 if (*cp && cp[1] == '\r') {
1550 delimiter = *cp;
1551 cp += 2;
1552 }
1553 for (; *cp; cp++) {
1554 if (indelimit && *cp != delimiter)
1555 ;
1556 else if (*cp == '\n' || *cp == '\r') {
1557 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1558 columns = 0;
1559 } else if (*cp == '\t') {
1560 columns = (columns | 7) + 1;
1561 } else if (*cp == '\b') {
1562 if (columns > 0)
1563 columns--;
1564 } else if (*cp == delimiter)
1565 indelimit = !indelimit;
1566 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1567 const char *cp2;
1568 columns += utf_widthadj(cp, &cp2);
1569 if (doprint && (indelimit ||
1570 (ntruncate < (x_cols * lines + columns))))
1571 shf_write(cp, cp2 - cp, shl_out);
1572 cp = cp2 - /* loop increment */ 1;
1573 continue;
1574 } else
1575 columns++;
1576 if (doprint && (*cp != delimiter) &&
1577 (indelimit || (ntruncate < (x_cols * lines + columns))))
1578 shf_putc(*cp, shl_out);
1579 }
1580 if (doprint)
1581 shf_flush(shl_out);
1582 return (x_cols * lines + columns);
1583 }
1584
1585 /*
1586 * Read the variable part of a ${...} expression (i.e. up to but not
1587 * including the :[-+?=#%] or close-brace).
1588 */
1589 static char *
get_brace_var(XString * wsp,char * wp)1590 get_brace_var(XString *wsp, char *wp)
1591 {
1592 char c;
1593 enum parse_state {
1594 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1595 PS_NUMBER, PS_VAR1
1596 } state = PS_INITIAL;
1597
1598 while (/* CONSTCOND */ 1) {
1599 c = getsc();
1600 /* State machine to figure out where the variable part ends. */
1601 switch (state) {
1602 case PS_INITIAL:
1603 if (c == '#' || c == '!' || c == '%') {
1604 state = PS_SAW_HASH;
1605 break;
1606 }
1607 /* FALLTHROUGH */
1608 case PS_SAW_HASH:
1609 if (ksh_isalphx(c))
1610 state = PS_IDENT;
1611 else if (ksh_isdigit(c))
1612 state = PS_NUMBER;
1613 else if (c == '#') {
1614 if (state == PS_SAW_HASH) {
1615 char c2;
1616
1617 c2 = getsc();
1618 ungetsc(c2);
1619 if (c2 != /*{*/ '}') {
1620 ungetsc(c);
1621 goto out;
1622 }
1623 }
1624 state = PS_VAR1;
1625 } else if (ctype(c, C_VAR1))
1626 state = PS_VAR1;
1627 else
1628 goto out;
1629 break;
1630 case PS_IDENT:
1631 if (!ksh_isalnux(c)) {
1632 if (c == '[') {
1633 char *tmp, *p;
1634
1635 if (!arraysub(&tmp))
1636 yyerror("missing ]\n");
1637 *wp++ = c;
1638 for (p = tmp; *p; ) {
1639 Xcheck(*wsp, wp);
1640 *wp++ = *p++;
1641 }
1642 afree(tmp, ATEMP);
1643 /* the ] */
1644 c = getsc();
1645 }
1646 goto out;
1647 }
1648 break;
1649 case PS_NUMBER:
1650 if (!ksh_isdigit(c))
1651 goto out;
1652 break;
1653 case PS_VAR1:
1654 goto out;
1655 }
1656 Xcheck(*wsp, wp);
1657 *wp++ = c;
1658 }
1659 out:
1660 /* end of variable part */
1661 *wp++ = '\0';
1662 ungetsc(c);
1663 return (wp);
1664 }
1665
1666 /*
1667 * Save an array subscript - returns true if matching bracket found, false
1668 * if eof or newline was found.
1669 * (Returned string double null terminated)
1670 */
1671 static bool
arraysub(char ** strp)1672 arraysub(char **strp)
1673 {
1674 XString ws;
1675 char *wp, c;
1676 /* we are just past the initial [ */
1677 unsigned int depth = 1;
1678
1679 Xinit(ws, wp, 32, ATEMP);
1680
1681 do {
1682 c = getsc();
1683 Xcheck(ws, wp);
1684 *wp++ = c;
1685 if (c == '[')
1686 depth++;
1687 else if (c == ']')
1688 depth--;
1689 } while (depth > 0 && c && c != '\n');
1690
1691 *wp++ = '\0';
1692 *strp = Xclose(ws, wp);
1693
1694 return (tobool(depth == 0));
1695 }
1696
1697 /* Unget a char: handles case when we are already at the start of the buffer */
1698 static void
ungetsc(int c)1699 ungetsc(int c)
1700 {
1701 struct sretrace_info *rp = retrace_info;
1702
1703 if (backslash_skip)
1704 backslash_skip--;
1705 /* Don't unget EOF... */
1706 if (source->str == null && c == '\0')
1707 return;
1708 while (rp) {
1709 if (Xlength(rp->xs, rp->xp))
1710 rp->xp--;
1711 rp = rp->next;
1712 }
1713 ungetsc_i(c);
1714 }
1715 static void
ungetsc_i(int c)1716 ungetsc_i(int c)
1717 {
1718 if (source->str > source->start)
1719 source->str--;
1720 else {
1721 Source *s;
1722
1723 s = pushs(SREREAD, source->areap);
1724 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1725 s->start = s->str = s->ugbuf;
1726 s->next = source;
1727 source = s;
1728 }
1729 }
1730
1731
1732 /* Called to get a char that isn't a \newline sequence. */
1733 static int
getsc_bn(void)1734 getsc_bn(void)
1735 {
1736 int c, c2;
1737
1738 if (ignore_backslash_newline)
1739 return (o_getsc_u());
1740
1741 if (backslash_skip == 1) {
1742 backslash_skip = 2;
1743 return (o_getsc_u());
1744 }
1745
1746 backslash_skip = 0;
1747
1748 while (/* CONSTCOND */ 1) {
1749 c = o_getsc_u();
1750 if (c == '\\') {
1751 if ((c2 = o_getsc_u()) == '\n')
1752 /* ignore the \newline; get the next char... */
1753 continue;
1754 ungetsc_i(c2);
1755 backslash_skip = 1;
1756 }
1757 return (c);
1758 }
1759 }
1760
1761 void
yyskiputf8bom(void)1762 yyskiputf8bom(void)
1763 {
1764 int c;
1765
1766 if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1767 ungetsc_i(c);
1768 return;
1769 }
1770 if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1771 ungetsc_i(c);
1772 ungetsc_i(0xEF);
1773 return;
1774 }
1775 if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1776 ungetsc_i(c);
1777 ungetsc_i(0xBB);
1778 ungetsc_i(0xEF);
1779 return;
1780 }
1781 UTFMODE |= 8;
1782 }
1783
1784 static Lex_state *
push_state_i(State_info * si,Lex_state * old_end)1785 push_state_i(State_info *si, Lex_state *old_end)
1786 {
1787 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1788
1789 news[0].ls_base = old_end;
1790 si->base = &news[0];
1791 si->end = &news[STATE_BSIZE];
1792 return (&news[1]);
1793 }
1794
1795 static Lex_state *
pop_state_i(State_info * si,Lex_state * old_end)1796 pop_state_i(State_info *si, Lex_state *old_end)
1797 {
1798 Lex_state *old_base = si->base;
1799
1800 si->base = old_end->ls_base - STATE_BSIZE;
1801 si->end = old_end->ls_base;
1802
1803 afree(old_base, ATEMP);
1804
1805 return (si->base + STATE_BSIZE - 1);
1806 }
1807
1808 static int
s_get(void)1809 s_get(void)
1810 {
1811 return (getsc());
1812 }
1813
1814 static void
s_put(int c)1815 s_put(int c)
1816 {
1817 ungetsc(c);
1818 }
1819