1 /*	$OpenBSD: eval.c,v 1.40 2013/09/14 20:09:30 millert Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *		 2011, 2012, 2013, 2014, 2015, 2016
6  *	mirabilos <m@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23 
24 #include "sh.h"
25 
26 __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.194 2016/11/11 23:31:34 tg Exp $");
27 
28 /*
29  * string expansion
30  *
31  * first pass: quoting, IFS separation, ~, ${}, $() and $(()) substitution.
32  * second pass: alternation ({,}), filename expansion (*?[]).
33  */
34 
35 /* expansion generator state */
36 typedef struct {
37 	/* not including an "int type;" member, see expand() */
38 	/* string */
39 	const char *str;
40 	/* source */
41 	union {
42 		/* string[] */
43 		const char **strv;
44 		/* file */
45 		struct shf *shf;
46 	} u;
47 	/* variable in ${var...} */
48 	struct tbl *var;
49 	/* split "$@" / call waitlast in $() */
50 	bool split;
51 } Expand;
52 
53 #define	XBASE		0	/* scanning original */
54 #define	XSUB		1	/* expanding ${} string */
55 #define	XARGSEP		2	/* ifs0 between "$*" */
56 #define	XARG		3	/* expanding $*, $@ */
57 #define	XCOM		4	/* expanding $() */
58 #define XNULLSUB	5	/* "$@" when $# is 0 (don't generate word) */
59 #define XSUBMID		6	/* middle of expanding ${} */
60 
61 /* States used for field splitting */
62 #define IFS_WORD	0	/* word has chars (or quotes except "$@") */
63 #define IFS_WS		1	/* have seen IFS white-space */
64 #define IFS_NWS		2	/* have seen IFS non-white-space */
65 #define IFS_IWS		3	/* beginning of word, ignore IFS WS */
66 #define IFS_QUOTE	4	/* beg.w/quote, become IFS_WORD unless "$@" */
67 
68 static int varsub(Expand *, const char *, const char *, int *, int *);
69 static int comsub(Expand *, const char *, int);
70 static char *valsub(struct op *, Area *);
71 static char *trimsub(char *, char *, int);
72 static void glob(char *, XPtrV *, bool);
73 static void globit(XString *, char **, char *, XPtrV *, int);
74 static const char *maybe_expand_tilde(const char *, XString *, char **, bool);
75 #ifndef MKSH_NOPWNAM
76 static char *homedir(char *);
77 #endif
78 static void alt_expand(XPtrV *, char *, char *, char *, int);
79 static int utflen(const char *) MKSH_A_PURE;
80 static void utfincptr(const char *, mksh_ari_t *);
81 
82 /* UTFMODE functions */
83 static int
utflen(const char * s)84 utflen(const char *s)
85 {
86 	size_t n;
87 
88 	if (UTFMODE) {
89 		n = 0;
90 		while (*s) {
91 			s += utf_ptradj(s);
92 			++n;
93 		}
94 	} else
95 		n = strlen(s);
96 
97 	if (n > 2147483647)
98 		n = 2147483647;
99 	return ((int)n);
100 }
101 
102 static void
utfincptr(const char * s,mksh_ari_t * lp)103 utfincptr(const char *s, mksh_ari_t *lp)
104 {
105 	const char *cp = s;
106 
107 	while ((*lp)--)
108 		cp += utf_ptradj(cp);
109 	*lp = cp - s;
110 }
111 
112 /* compile and expand word */
113 char *
substitute(const char * cp,int f)114 substitute(const char *cp, int f)
115 {
116 	struct source *s, *sold;
117 
118 	sold = source;
119 	s = pushs(SWSTR, ATEMP);
120 	s->start = s->str = cp;
121 	source = s;
122 	if (yylex(ONEWORD) != LWORD)
123 		internal_errorf(Tbadsubst);
124 	source = sold;
125 	afree(s, ATEMP);
126 	return (evalstr(yylval.cp, f));
127 }
128 
129 /*
130  * expand arg-list
131  */
132 char **
eval(const char ** ap,int f)133 eval(const char **ap, int f)
134 {
135 	XPtrV w;
136 
137 	if (*ap == NULL) {
138 		union mksh_ccphack vap;
139 
140 		vap.ro = ap;
141 		return (vap.rw);
142 	}
143 	XPinit(w, 32);
144 	/* space for shell name */
145 	XPput(w, NULL);
146 	while (*ap != NULL)
147 		expand(*ap++, &w, f);
148 	XPput(w, NULL);
149 	return ((char **)XPclose(w) + 1);
150 }
151 
152 /*
153  * expand string
154  */
155 char *
evalstr(const char * cp,int f)156 evalstr(const char *cp, int f)
157 {
158 	XPtrV w;
159 	char *dp = null;
160 
161 	XPinit(w, 1);
162 	expand(cp, &w, f);
163 	if (XPsize(w))
164 		dp = *XPptrv(w);
165 	XPfree(w);
166 	return (dp);
167 }
168 
169 /*
170  * expand string - return only one component
171  * used from iosetup to expand redirection files
172  */
173 char *
evalonestr(const char * cp,int f)174 evalonestr(const char *cp, int f)
175 {
176 	XPtrV w;
177 	char *rv;
178 
179 	XPinit(w, 1);
180 	expand(cp, &w, f);
181 	switch (XPsize(w)) {
182 	case 0:
183 		rv = null;
184 		break;
185 	case 1:
186 		rv = (char *) *XPptrv(w);
187 		break;
188 	default:
189 		rv = evalstr(cp, f & ~DOGLOB);
190 		break;
191 	}
192 	XPfree(w);
193 	return (rv);
194 }
195 
196 /* for nested substitution: ${var:=$var2} */
197 typedef struct SubType {
198 	struct tbl *var;	/* variable for ${var..} */
199 	struct SubType *prev;	/* old type */
200 	struct SubType *next;	/* poped type (to avoid re-allocating) */
201 	size_t	base;		/* start position of expanded word */
202 	short	stype;		/* [=+-?%#] action after expanded word */
203 	short	f;		/* saved value of f (DOPAT, etc) */
204 	uint8_t	quotep;		/* saved value of quote (for ${..[%#]..}) */
205 	uint8_t	quotew;		/* saved value of quote (for ${..[+-=]..}) */
206 } SubType;
207 
208 void
expand(const char * ccp,XPtrV * wp,int f)209 expand(
210     /* input word */
211     const char *ccp,
212     /* output words */
213     XPtrV *wp,
214     /* DO* flags */
215     int f)
216 {
217 	int c = 0;
218 	/* expansion type */
219 	int type;
220 	/* quoted */
221 	int quote = 0;
222 	/* destination string and live pointer */
223 	XString ds;
224 	char *dp;
225 	/* source */
226 	const char *sp;
227 	/* second pass flags */
228 	int fdo;
229 	/* have word */
230 	int word;
231 	/* field splitting of parameter/command substitution */
232 	int doblank;
233 	/* expansion variables */
234 	Expand x = {
235 		NULL, { NULL }, NULL, 0
236 	};
237 	SubType st_head, *st;
238 	/* record number of trailing newlines in COMSUB */
239 	int newlines = 0;
240 	bool saw_eq, make_magic;
241 	unsigned int tilde_ok;
242 	size_t len;
243 	char *cp;
244 
245 	if (ccp == NULL)
246 		internal_errorf("expand(NULL)");
247 	/* for alias, readonly, set, typeset commands */
248 	if ((f & DOVACHECK) && is_wdvarassign(ccp)) {
249 		f &= ~(DOVACHECK | DOBLANK | DOGLOB | DOTILDE);
250 		f |= DOASNTILDE | DOSCALAR;
251 	}
252 	if (Flag(FNOGLOB))
253 		f &= ~DOGLOB;
254 	if (Flag(FMARKDIRS))
255 		f |= DOMARKDIRS;
256 	if (Flag(FBRACEEXPAND) && (f & DOGLOB))
257 		f |= DOBRACE;
258 
259 	/* init destination string */
260 	Xinit(ds, dp, 128, ATEMP);
261 	type = XBASE;
262 	sp = ccp;
263 	fdo = 0;
264 	saw_eq = false;
265 	/* must be 1/0 */
266 	tilde_ok = (f & (DOTILDE | DOASNTILDE)) ? 1 : 0;
267 	doblank = 0;
268 	make_magic = false;
269 	word = (f&DOBLANK) ? IFS_WS : IFS_WORD;
270 	/* clang doesn't know OSUBST comes before CSUBST */
271 	memset(&st_head, 0, sizeof(st_head));
272 	st = &st_head;
273 
274 	while (/* CONSTCOND */ 1) {
275 		Xcheck(ds, dp);
276 
277 		switch (type) {
278 		case XBASE:
279 			/* original prefixed string */
280 			c = *sp++;
281 			switch (c) {
282 			case EOS:
283 				c = 0;
284 				break;
285 			case CHAR:
286 				c = *sp++;
287 				break;
288 			case QCHAR:
289 				/* temporary quote */
290 				quote |= 2;
291 				c = *sp++;
292 				break;
293 			case OQUOTE:
294 				if (word != IFS_WORD)
295 					word = IFS_QUOTE;
296 				tilde_ok = 0;
297 				quote = 1;
298 				continue;
299 			case CQUOTE:
300 				if (word == IFS_QUOTE)
301 					word = IFS_WORD;
302 				quote = st->quotew;
303 				continue;
304 			case COMSUB:
305 			case FUNSUB:
306 			case VALSUB:
307 				tilde_ok = 0;
308 				if (f & DONTRUNCOMMAND) {
309 					word = IFS_WORD;
310 					*dp++ = '$';
311 					*dp++ = c == COMSUB ? '(' : '{';
312 					if (c != COMSUB)
313 						*dp++ = c == FUNSUB ? ' ' : '|';
314 					while (*sp != '\0') {
315 						Xcheck(ds, dp);
316 						*dp++ = *sp++;
317 					}
318 					if (c != COMSUB) {
319 						*dp++ = ';';
320 						*dp++ = '}';
321 					} else
322 						*dp++ = ')';
323 				} else {
324 					type = comsub(&x, sp, c);
325 					if (type != XBASE && (f & DOBLANK))
326 						doblank++;
327 					sp = strnul(sp) + 1;
328 					newlines = 0;
329 				}
330 				continue;
331 			case EXPRSUB:
332 				tilde_ok = 0;
333 				if (f & DONTRUNCOMMAND) {
334 					word = IFS_WORD;
335 					*dp++ = '$'; *dp++ = '('; *dp++ = '(';
336 					while (*sp != '\0') {
337 						Xcheck(ds, dp);
338 						*dp++ = *sp++;
339 					}
340 					*dp++ = ')'; *dp++ = ')';
341 				} else {
342 					struct tbl v;
343 
344 					v.flag = DEFINED|ISSET|INTEGER;
345 					/* not default */
346 					v.type = 10;
347 					v.name[0] = '\0';
348 					v_evaluate(&v, substitute(sp, 0),
349 					    KSH_UNWIND_ERROR, true);
350 					sp = strnul(sp) + 1;
351 					x.str = str_val(&v);
352 					type = XSUB;
353 					if (f & DOBLANK)
354 						doblank++;
355 				}
356 				continue;
357 			case OSUBST: {
358 				/* ${{#}var{:}[=+-?#%]word} */
359 			/*-
360 			 * format is:
361 			 *	OSUBST [{x] plain-variable-part \0
362 			 *	    compiled-word-part CSUBST [}x]
363 			 * This is where all syntax checking gets done...
364 			 */
365 				/* skip the { or x (}) */
366 				const char *varname = ++sp;
367 				int stype;
368 				int slen = 0;
369 
370 				/* skip variable */
371 				sp = cstrchr(sp, '\0') + 1;
372 				type = varsub(&x, varname, sp, &stype, &slen);
373 				if (type < 0) {
374 					char *beg, *end, *str;
375  unwind_substsyn:
376 					/* restore sp */
377 					sp = varname - 2;
378 					beg = wdcopy(sp, ATEMP);
379 					end = (wdscan(cstrchr(sp, '\0') + 1,
380 					    CSUBST) - sp) + beg;
381 					/* ({) the } or x is already skipped */
382 					if (end < wdscan(beg, EOS))
383 						*end = EOS;
384 					str = snptreef(NULL, 64, Tf_S, beg);
385 					afree(beg, ATEMP);
386 					errorf(Tf_sD_s, str, Tbadsubst);
387 				}
388 				if (f & DOBLANK)
389 					doblank++;
390 				tilde_ok = 0;
391 				if (word == IFS_QUOTE && type != XNULLSUB)
392 					word = IFS_WORD;
393 				if (type == XBASE) {
394 					/* expand? */
395 					if (!st->next) {
396 						SubType *newst;
397 
398 						newst = alloc(sizeof(SubType), ATEMP);
399 						newst->next = NULL;
400 						newst->prev = st;
401 						st->next = newst;
402 					}
403 					st = st->next;
404 					st->stype = stype;
405 					st->base = Xsavepos(ds, dp);
406 					st->f = f;
407 					if (x.var == vtemp) {
408 						st->var = tempvar(vtemp->name);
409 						st->var->flag &= ~INTEGER;
410 						/* can't fail here */
411 						setstr(st->var,
412 						    str_val(x.var),
413 						    KSH_RETURN_ERROR | 0x4);
414 					} else
415 						st->var = x.var;
416 
417 					st->quotew = st->quotep = quote;
418 					/* skip qualifier(s) */
419 					if (stype)
420 						sp += slen;
421 					switch (stype & 0x17F) {
422 					case 0x100 | '#':
423 						x.str = shf_smprintf("%08X",
424 						    (unsigned int)hash(str_val(st->var)));
425 						break;
426 					case 0x100 | 'Q': {
427 						struct shf shf;
428 
429 						shf_sopen(NULL, 0, SHF_WR|SHF_DYNAMIC, &shf);
430 						print_value_quoted(&shf, str_val(st->var));
431 						x.str = shf_sclose(&shf);
432 						break;
433 					    }
434 					case '0': {
435 						char *beg, *mid, *end, *stg;
436 						mksh_ari_t from = 0, num = -1, flen, finc = 0;
437 
438 						beg = wdcopy(sp, ATEMP);
439 						mid = beg + (wdscan(sp, ADELIM) - sp);
440 						stg = beg + (wdscan(sp, CSUBST) - sp);
441 						mid[-2] = EOS;
442 						if (mid[-1] == /*{*/'}') {
443 							sp += mid - beg - 1;
444 							end = NULL;
445 						} else {
446 							end = mid +
447 							    (wdscan(mid, ADELIM) - mid);
448 							if (end[-1] != /*{*/ '}')
449 								/* more than max delimiters */
450 								goto unwind_substsyn;
451 							end[-2] = EOS;
452 							sp += end - beg - 1;
453 						}
454 						evaluate(substitute(stg = wdstrip(beg, 0), 0),
455 						    &from, KSH_UNWIND_ERROR, true);
456 						afree(stg, ATEMP);
457 						if (end) {
458 							evaluate(substitute(stg = wdstrip(mid, 0), 0),
459 							    &num, KSH_UNWIND_ERROR, true);
460 							afree(stg, ATEMP);
461 						}
462 						afree(beg, ATEMP);
463 						beg = str_val(st->var);
464 						flen = utflen(beg);
465 						if (from < 0) {
466 							if (-from < flen)
467 								finc = flen + from;
468 						} else
469 							finc = from < flen ? from : flen;
470 						if (UTFMODE)
471 							utfincptr(beg, &finc);
472 						beg += finc;
473 						flen = utflen(beg);
474 						if (num < 0 || num > flen)
475 							num = flen;
476 						if (UTFMODE)
477 							utfincptr(beg, &num);
478 						strndupx(x.str, beg, num, ATEMP);
479 						goto do_CSUBST;
480 					    }
481 					case 0x100 | '/':
482 					case '/': {
483 						char *s, *p, *d, *sbeg, *end;
484 						char *pat = NULL, *rrep = null;
485 						char fpat = 0, *tpat1, *tpat2;
486 						char *ws, *wpat, *wrep;
487 
488 						s = ws = wdcopy(sp, ATEMP);
489 						p = s + (wdscan(sp, ADELIM) - sp);
490 						d = s + (wdscan(sp, CSUBST) - sp);
491 						p[-2] = EOS;
492 						if (p[-1] == /*{*/'}')
493 							d = NULL;
494 						else
495 							d[-2] = EOS;
496 						sp += (d ? d : p) - s - 1;
497 						if (!(stype & 0x180) &&
498 						    s[0] == CHAR &&
499 						    (s[1] == '#' || s[1] == '%'))
500 							fpat = s[1];
501 						wpat = s + (fpat ? 2 : 0);
502 						wrep = d ? p : NULL;
503 						if (!(stype & 0x100)) {
504 							rrep = wrep ? evalstr(wrep,
505 							    DOTILDE | DOSCALAR) :
506 							    null;
507 						}
508 
509 						/* prepare string on which to work */
510 						strdupx(s, str_val(st->var), ATEMP);
511 						sbeg = s;
512  again_search:
513 						pat = evalstr(wpat,
514 						    DOTILDE | DOSCALAR | DOPAT);
515 						/* check for special cases */
516 						if (!*pat && !fpat) {
517 							/*
518 							 * empty unanchored
519 							 * pattern => reject
520 							 */
521 							goto no_repl;
522 						}
523 						if ((stype & 0x180) &&
524 						    gmatchx(null, pat, false)) {
525 							/*
526 							 * pattern matches empty
527 							 * string => don't loop
528 							 */
529 							stype &= ~0x180;
530 						}
531 
532 						/* first see if we have any match at all */
533 						if (fpat == '#') {
534 							/* anchor at the beginning */
535 							tpat1 = shf_smprintf("%s%c*", pat, MAGIC);
536 							tpat2 = tpat1;
537 						} else if (fpat == '%') {
538 							/* anchor at the end */
539 							tpat1 = shf_smprintf("%c*%s", MAGIC, pat);
540 							tpat2 = pat;
541 						} else {
542 							/* float */
543 							tpat1 = shf_smprintf("%c*%s%c*", MAGIC, pat, MAGIC);
544 							tpat2 = tpat1 + 2;
545 						}
546  again_repl:
547 						/*
548 						 * this would not be necessary if gmatchx would return
549 						 * the start and end values of a match found, like re*
550 						 */
551 						if (!gmatchx(sbeg, tpat1, false))
552 							goto end_repl;
553 						end = strnul(s);
554 						/* now anchor the beginning of the match */
555 						if (fpat != '#')
556 							while (sbeg <= end) {
557 								if (gmatchx(sbeg, tpat2, false))
558 									break;
559 								else
560 									sbeg++;
561 							}
562 						/* now anchor the end of the match */
563 						p = end;
564 						if (fpat != '%')
565 							while (p >= sbeg) {
566 								bool gotmatch;
567 
568 								c = *p;
569 								*p = '\0';
570 								gotmatch = tobool(gmatchx(sbeg, pat, false));
571 								*p = c;
572 								if (gotmatch)
573 									break;
574 								p--;
575 							}
576 						strndupx(end, sbeg, p - sbeg, ATEMP);
577 						record_match(end);
578 						afree(end, ATEMP);
579 						if (stype & 0x100) {
580 							if (rrep != null)
581 								afree(rrep, ATEMP);
582 							rrep = wrep ? evalstr(wrep,
583 							    DOTILDE | DOSCALAR) :
584 							    null;
585 						}
586 						strndupx(end, s, sbeg - s, ATEMP);
587 						d = shf_smprintf(Tf_sss, end, rrep, p);
588 						afree(end, ATEMP);
589 						sbeg = d + (sbeg - s) + strlen(rrep);
590 						afree(s, ATEMP);
591 						s = d;
592 						if (stype & 0x100) {
593 							afree(tpat1, ATEMP);
594 							afree(pat, ATEMP);
595 							goto again_search;
596 						} else if (stype & 0x80)
597 							goto again_repl;
598  end_repl:
599 						afree(tpat1, ATEMP);
600 						x.str = s;
601  no_repl:
602 						afree(pat, ATEMP);
603 						if (rrep != null)
604 							afree(rrep, ATEMP);
605 						afree(ws, ATEMP);
606 						goto do_CSUBST;
607 					    }
608 					case '#':
609 					case '%':
610 						/* ! DOBLANK,DOBRACE */
611 						f = (f & DONTRUNCOMMAND) |
612 						    DOPAT | DOTILDE |
613 						    DOTEMP | DOSCALAR;
614 						tilde_ok = 1;
615 						st->quotew = quote = 0;
616 						/*
617 						 * Prepend open pattern (so |
618 						 * in a trim will work as
619 						 * expected)
620 						 */
621 						if (!Flag(FSH)) {
622 							*dp++ = MAGIC;
623 							*dp++ = 0x80 | '@';
624 						}
625 						break;
626 					case '=':
627 						/*
628 						 * Enabling tilde expansion
629 						 * after :s here is
630 						 * non-standard ksh, but is
631 						 * consistent with rules for
632 						 * other assignments. Not
633 						 * sure what POSIX thinks of
634 						 * this.
635 						 * Not doing tilde expansion
636 						 * for integer variables is a
637 						 * non-POSIX thing - makes
638 						 * sense though, since ~ is
639 						 * a arithmetic operator.
640 						 */
641 						if (!(x.var->flag & INTEGER))
642 							f |= DOASNTILDE | DOTILDE;
643 						f |= DOTEMP;
644 						/*
645 						 * These will be done after the
646 						 * value has been assigned.
647 						 */
648 						f &= ~(DOBLANK|DOGLOB|DOBRACE);
649 						tilde_ok = 1;
650 						break;
651 					case '?':
652 						if (*sp == CSUBST)
653 							errorf("%s: parameter null or not set",
654 							    st->var->name);
655 						f &= ~DOBLANK;
656 						f |= DOTEMP;
657 						/* FALLTHROUGH */
658 					default:
659 						/* '-' '+' '?' */
660 						if (quote)
661 							word = IFS_WORD;
662 						else if (dp == Xstring(ds, dp))
663 							word = IFS_IWS;
664 						/* Enable tilde expansion */
665 						tilde_ok = 1;
666 						f |= DOTILDE;
667 					}
668 				} else
669 					/* skip word */
670 					sp += wdscan(sp, CSUBST) - sp;
671 				continue;
672 			    }
673 			case CSUBST:
674 				/* only get here if expanding word */
675  do_CSUBST:
676 				/* ({) skip the } or x */
677 				sp++;
678 				/* in case of ${unset:-} */
679 				tilde_ok = 0;
680 				*dp = '\0';
681 				quote = st->quotep;
682 				f = st->f;
683 				if (f & DOBLANK)
684 					doblank--;
685 				switch (st->stype & 0x17F) {
686 				case '#':
687 				case '%':
688 					if (!Flag(FSH)) {
689 						/* Append end-pattern */
690 						*dp++ = MAGIC;
691 						*dp++ = ')';
692 					}
693 					*dp = '\0';
694 					dp = Xrestpos(ds, dp, st->base);
695 					/*
696 					 * Must use st->var since calling
697 					 * global would break things
698 					 * like x[i+=1].
699 					 */
700 					x.str = trimsub(str_val(st->var),
701 						dp, st->stype);
702 					if (x.str[0] != '\0') {
703 						word = IFS_IWS;
704 						type = XSUB;
705 					} else if (quote) {
706 						word = IFS_WORD;
707 						type = XSUB;
708 					} else {
709 						if (dp == Xstring(ds, dp))
710 							word = IFS_IWS;
711 						type = XNULLSUB;
712 					}
713 					if (f & DOBLANK)
714 						doblank++;
715 					st = st->prev;
716 					continue;
717 				case '=':
718 					/*
719 					 * Restore our position and substitute
720 					 * the value of st->var (may not be
721 					 * the assigned value in the presence
722 					 * of integer/right-adj/etc attributes).
723 					 */
724 					dp = Xrestpos(ds, dp, st->base);
725 					/*
726 					 * Must use st->var since calling
727 					 * global would cause with things
728 					 * like x[i+=1] to be evaluated twice.
729 					 */
730 					/*
731 					 * Note: not exported by FEXPORT
732 					 * in AT&T ksh.
733 					 */
734 					/*
735 					 * XXX POSIX says readonly is only
736 					 * fatal for special builtins (setstr
737 					 * does readonly check).
738 					 */
739 					len = strlen(dp) + 1;
740 					setstr(st->var,
741 					    debunk(alloc(len, ATEMP),
742 					    dp, len), KSH_UNWIND_ERROR);
743 					x.str = str_val(st->var);
744 					type = XSUB;
745 					if (f & DOBLANK)
746 						doblank++;
747 					st = st->prev;
748 					word = quote || (!*x.str && (f & DOSCALAR)) ? IFS_WORD : IFS_IWS;
749 					continue;
750 				case '?':
751 					dp = Xrestpos(ds, dp, st->base);
752 
753 					errorf(Tf_sD_s, st->var->name,
754 					    debunk(dp, dp, strlen(dp) + 1));
755 					break;
756 				case '0':
757 				case 0x100 | '/':
758 				case '/':
759 				case 0x100 | '#':
760 				case 0x100 | 'Q':
761 					dp = Xrestpos(ds, dp, st->base);
762 					type = XSUB;
763 					word = quote || (!*x.str && (f & DOSCALAR)) ? IFS_WORD : IFS_IWS;
764 					if (f & DOBLANK)
765 						doblank++;
766 					st = st->prev;
767 					continue;
768 				/* default: '-' '+' */
769 				}
770 				st = st->prev;
771 				type = XBASE;
772 				continue;
773 
774 			case OPAT:
775 				/* open pattern: *(foo|bar) */
776 				/* Next char is the type of pattern */
777 				make_magic = true;
778 				c = *sp++ | 0x80;
779 				break;
780 
781 			case SPAT:
782 				/* pattern separator (|) */
783 				make_magic = true;
784 				c = '|';
785 				break;
786 
787 			case CPAT:
788 				/* close pattern */
789 				make_magic = true;
790 				c = /*(*/ ')';
791 				break;
792 			}
793 			break;
794 
795 		case XNULLSUB:
796 			/*
797 			 * Special case for "$@" (and "${foo[@]}") - no
798 			 * word is generated if $# is 0 (unless there is
799 			 * other stuff inside the quotes).
800 			 */
801 			type = XBASE;
802 			if (f & DOBLANK) {
803 				doblank--;
804 				if (dp == Xstring(ds, dp) && word != IFS_WORD)
805 					word = IFS_IWS;
806 			}
807 			continue;
808 
809 		case XSUB:
810 		case XSUBMID:
811 			if ((c = *x.str++) == 0) {
812 				type = XBASE;
813 				if (f & DOBLANK)
814 					doblank--;
815 				continue;
816 			}
817 			break;
818 
819 		case XARGSEP:
820 			type = XARG;
821 			quote = 1;
822 			/* FALLTHROUGH */
823 		case XARG:
824 			if ((c = *x.str++) == '\0') {
825 				/*
826 				 * force null words to be created so
827 				 * set -- "" 2 ""; echo "$@" will do
828 				 * the right thing
829 				 */
830 				if (quote && x.split)
831 					word = IFS_WORD;
832 				if ((x.str = *x.u.strv++) == NULL) {
833 					type = XBASE;
834 					if (f & DOBLANK)
835 						doblank--;
836 					continue;
837 				}
838 				c = ifs0;
839 				if ((f & DOHEREDOC)) {
840 					/* pseudo-field-split reliably */
841 					if (c == 0)
842 						c = ' ';
843 					break;
844 				}
845 				if ((f & DOSCALAR)) {
846 					/* do not field-split */
847 					if (x.split) {
848 						c = ' ';
849 						break;
850 					}
851 					if (c == 0)
852 						continue;
853 				}
854 				if (c == 0) {
855 					if (quote && !x.split)
856 						continue;
857 					if (!quote && word == IFS_WS)
858 						continue;
859 					/* this is so we don't terminate */
860 					c = ' ';
861 					/* now force-emit a word */
862 					goto emit_word;
863 				}
864 				if (quote && x.split) {
865 					/* terminate word for "$@" */
866 					type = XARGSEP;
867 					quote = 0;
868 				}
869 			}
870 			break;
871 
872 		case XCOM:
873 			if (x.u.shf == NULL) {
874 				/* $(<...) failed */
875 				subst_exstat = 1;
876 				/* fake EOF */
877 				c = -1;
878 			} else if (newlines) {
879 				/* spit out saved NLs */
880 				c = '\n';
881 				--newlines;
882 			} else {
883 				while ((c = shf_getc(x.u.shf)) == 0 || c == '\n')
884 					if (c == '\n')
885 						/* save newlines */
886 						newlines++;
887 				if (newlines && c != -1) {
888 					shf_ungetc(c, x.u.shf);
889 					c = '\n';
890 					--newlines;
891 				}
892 			}
893 			if (c == -1) {
894 				newlines = 0;
895 				if (x.u.shf)
896 					shf_close(x.u.shf);
897 				if (x.split)
898 					subst_exstat = waitlast();
899 				type = XBASE;
900 				if (f & DOBLANK)
901 					doblank--;
902 				continue;
903 			}
904 			break;
905 		}
906 
907 		/* check for end of word or IFS separation */
908 		if (c == 0 || (!quote && (f & DOBLANK) && doblank &&
909 		    !make_magic && ctype(c, C_IFS))) {
910 			/*-
911 			 * How words are broken up:
912 			 *			|	value of c
913 			 *	word		|	ws	nws	0
914 			 *	-----------------------------------
915 			 *	IFS_WORD		w/WS	w/NWS	w
916 			 *	IFS_WS			-/WS	-/NWS	-
917 			 *	IFS_NWS			-/NWS	w/NWS	-
918 			 *	IFS_IWS			-/WS	w/NWS	-
919 			 * (w means generate a word)
920 			 */
921 			if ((word == IFS_WORD) || (word == IFS_QUOTE) || (c &&
922 			    (word == IFS_IWS || word == IFS_NWS) &&
923 			    !ctype(c, C_IFSWS))) {
924  emit_word:
925 				if (f & DOHERESTR)
926 					*dp++ = '\n';
927 				*dp++ = '\0';
928 				cp = Xclose(ds, dp);
929 				if (fdo & DOBRACE)
930 					/* also does globbing */
931 					alt_expand(wp, cp, cp,
932 					    cp + Xlength(ds, (dp - 1)),
933 					    fdo | (f & DOMARKDIRS));
934 				else if (fdo & DOGLOB)
935 					glob(cp, wp, tobool(f & DOMARKDIRS));
936 				else if ((f & DOPAT) || !(fdo & DOMAGIC))
937 					XPput(*wp, cp);
938 				else
939 					XPput(*wp, debunk(cp, cp,
940 					    strlen(cp) + 1));
941 				fdo = 0;
942 				saw_eq = false;
943 				/* must be 1/0 */
944 				tilde_ok = (f & (DOTILDE | DOASNTILDE)) ? 1 : 0;
945 				if (c == 0)
946 					return;
947 				Xinit(ds, dp, 128, ATEMP);
948 			} else if (c == 0) {
949 				return;
950 			} else if (type == XSUB && ctype(c, C_IFS) &&
951 			    !ctype(c, C_IFSWS) && Xlength(ds, dp) == 0) {
952 				*(cp = alloc(1, ATEMP)) = '\0';
953 				XPput(*wp, cp);
954 				type = XSUBMID;
955 			}
956 			if (word != IFS_NWS)
957 				word = ctype(c, C_IFSWS) ? IFS_WS : IFS_NWS;
958 		} else {
959 			if (type == XSUB) {
960 				if (word == IFS_NWS &&
961 				    Xlength(ds, dp) == 0) {
962 					*(cp = alloc(1, ATEMP)) = '\0';
963 					XPput(*wp, cp);
964 				}
965 				type = XSUBMID;
966 			}
967 
968 			/* age tilde_ok info - ~ code tests second bit */
969 			tilde_ok <<= 1;
970 			/* mark any special second pass chars */
971 			if (!quote)
972 				switch (c) {
973 				case '[':
974 				case '!':
975 				case '-':
976 				case ']':
977 					/*
978 					 * For character classes - doesn't hurt
979 					 * to have magic !,-,]s outside of
980 					 * [...] expressions.
981 					 */
982 					if (f & (DOPAT | DOGLOB)) {
983 						fdo |= DOMAGIC;
984 						if (c == '[')
985 							fdo |= f & DOGLOB;
986 						*dp++ = MAGIC;
987 					}
988 					break;
989 				case '*':
990 				case '?':
991 					if (f & (DOPAT | DOGLOB)) {
992 						fdo |= DOMAGIC | (f & DOGLOB);
993 						*dp++ = MAGIC;
994 					}
995 					break;
996 				case '{':
997 				case '}':
998 				case ',':
999 					if ((f & DOBRACE) && (c == '{' /*}*/ ||
1000 					    (fdo & DOBRACE))) {
1001 						fdo |= DOBRACE|DOMAGIC;
1002 						*dp++ = MAGIC;
1003 					}
1004 					break;
1005 				case '=':
1006 					/* Note first unquoted = for ~ */
1007 					if (!(f & DOTEMP) && (!Flag(FPOSIX) ||
1008 					    (f & DOASNTILDE)) && !saw_eq) {
1009 						saw_eq = true;
1010 						tilde_ok = 1;
1011 					}
1012 					break;
1013 				case ':':
1014 					/* : */
1015 					/* Note unquoted : for ~ */
1016 					if (!(f & DOTEMP) && (f & DOASNTILDE))
1017 						tilde_ok = 1;
1018 					break;
1019 				case '~':
1020 					/*
1021 					 * tilde_ok is reset whenever
1022 					 * any of ' " $( $(( ${ } are seen.
1023 					 * Note that tilde_ok must be preserved
1024 					 * through the sequence ${A=a=}~
1025 					 */
1026 					if (type == XBASE &&
1027 					    (f & (DOTILDE | DOASNTILDE)) &&
1028 					    (tilde_ok & 2)) {
1029 						const char *tcp;
1030 						char *tdp = dp;
1031 
1032 						tcp = maybe_expand_tilde(sp,
1033 						    &ds, &tdp,
1034 						    tobool(f & DOASNTILDE));
1035 						if (tcp) {
1036 							if (dp != tdp)
1037 								word = IFS_WORD;
1038 							dp = tdp;
1039 							sp = tcp;
1040 							continue;
1041 						}
1042 					}
1043 					break;
1044 				}
1045 			else
1046 				/* undo temporary */
1047 				quote &= ~2;
1048 
1049 			if (make_magic) {
1050 				make_magic = false;
1051 				fdo |= DOMAGIC | (f & DOGLOB);
1052 				*dp++ = MAGIC;
1053 			} else if (ISMAGIC(c)) {
1054 				fdo |= DOMAGIC;
1055 				*dp++ = MAGIC;
1056 			}
1057 			/* save output char */
1058 			*dp++ = c;
1059 			word = IFS_WORD;
1060 		}
1061 	}
1062 }
1063 
1064 static bool
hasnonempty(const char ** strv)1065 hasnonempty(const char **strv)
1066 {
1067 	size_t i = 0;
1068 
1069 	while (strv[i])
1070 		if (*strv[i++])
1071 			return (true);
1072 	return (false);
1073 }
1074 
1075 /*
1076  * Prepare to generate the string returned by ${} substitution.
1077  */
1078 static int
varsub(Expand * xp,const char * sp,const char * word,int * stypep,int * slenp)1079 varsub(Expand *xp, const char *sp, const char *word,
1080     int *stypep,	/* becomes qualifier type */
1081     int *slenp)		/* " " len (=, :=, etc.) valid iff *stypep != 0 */
1082 {
1083 	int c;
1084 	int state;	/* next state: XBASE, XARG, XSUB, XNULLSUB */
1085 	int stype;	/* substitution type */
1086 	int slen = 0;
1087 	const char *p;
1088 	struct tbl *vp;
1089 	bool zero_ok = false;
1090 
1091 	if ((stype = sp[0]) == '\0')
1092 		/* Bad variable name */
1093 		return (-1);
1094 
1095 	xp->var = NULL;
1096 
1097 	/*-
1098 	 * ${#var}, string length (-U: characters, +U: octets) or array size
1099 	 * ${%var}, string width (-U: screen columns, +U: octets)
1100 	 */
1101 	c = sp[1];
1102 	if (stype == '%' && c == '\0')
1103 		return (-1);
1104 	if ((stype == '#' || stype == '%') && c != '\0') {
1105 		/* Can't have any modifiers for ${#...} or ${%...} */
1106 		if (*word != CSUBST)
1107 			return (-1);
1108 		sp++;
1109 		/* Check for size of array */
1110 		if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
1111 		    p[2] == ']') {
1112 			int n = 0;
1113 
1114 			if (stype != '#')
1115 				return (-1);
1116 			vp = global(arrayname(sp));
1117 			if (vp->flag & (ISSET|ARRAY))
1118 				zero_ok = true;
1119 			for (; vp; vp = vp->u.array)
1120 				if (vp->flag & ISSET)
1121 					n++;
1122 			c = n;
1123 		} else if (c == '*' || c == '@') {
1124 			if (stype != '#')
1125 				return (-1);
1126 			c = e->loc->argc;
1127 		} else {
1128 			p = str_val(global(sp));
1129 			zero_ok = p != null;
1130 			if (stype == '#')
1131 				c = utflen(p);
1132 			else {
1133 				/* partial utf_mbswidth reimplementation */
1134 				const char *s = p;
1135 				unsigned int wc;
1136 				size_t len;
1137 				int cw;
1138 
1139 				c = 0;
1140 				while (*s) {
1141 					if (!UTFMODE || (len = utf_mbtowc(&wc,
1142 					    s)) == (size_t)-1)
1143 						/* not UTFMODE or not UTF-8 */
1144 						wc = (unsigned char)(*s++);
1145 					else
1146 						/* UTFMODE and UTF-8 */
1147 						s += len;
1148 					/* wc == char or wchar at s++ */
1149 					if ((cw = utf_wcwidth(wc)) == -1) {
1150 						/* 646, 8859-1, 10646 C0/C1 */
1151 						c = -1;
1152 						break;
1153 					}
1154 					c += cw;
1155 				}
1156 			}
1157 		}
1158 		if (Flag(FNOUNSET) && c == 0 && !zero_ok)
1159 			errorf(Tf_parm, sp);
1160 		/* unqualified variable/string substitution */
1161 		*stypep = 0;
1162 		xp->str = shf_smprintf(Tf_d, c);
1163 		return (XSUB);
1164 	}
1165 	if (stype == '!' && c != '\0' && *word == CSUBST) {
1166 		sp++;
1167 		if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
1168 		    p[2] == ']') {
1169 			c = '!';
1170 			stype = 0;
1171 			goto arraynames;
1172 		}
1173 		xp->var = global(sp);
1174 		xp->str = p ? shf_smprintf("%s[%lu]",
1175 		    xp->var->name, arrayindex(xp->var)) : xp->var->name;
1176 		*stypep = 0;
1177 		return (XSUB);
1178 	}
1179 
1180 	/* Check for qualifiers in word part */
1181 	stype = 0;
1182 	c = word[slen + 0] == CHAR ? word[slen + 1] : 0;
1183 	if (c == ':') {
1184 		slen += 2;
1185 		stype = 0x80;
1186 		c = word[slen + 0] == CHAR ? word[slen + 1] : 0;
1187 	}
1188 	if (!stype && c == '/') {
1189 		slen += 2;
1190 		stype = c;
1191 		if (word[slen] == ADELIM && word[slen + 1] == c) {
1192 			slen += 2;
1193 			stype |= 0x80;
1194 		}
1195 	} else if (stype == 0x80 && (c == ' ' || c == '0')) {
1196 		stype |= '0';
1197 	} else if (ctype(c, C_SUBOP1)) {
1198 		slen += 2;
1199 		stype |= c;
1200 	} else if (ctype(c, C_SUBOP2)) {
1201 		/* Note: ksh88 allows :%, :%%, etc */
1202 		slen += 2;
1203 		stype = c;
1204 		if (word[slen + 0] == CHAR && c == word[slen + 1]) {
1205 			stype |= 0x80;
1206 			slen += 2;
1207 		}
1208 	} else if (c == '@') {
1209 		/* @x where x is command char */
1210 		slen += 2;
1211 		stype |= 0x100;
1212 		if (word[slen] == CHAR) {
1213 			stype |= word[slen + 1];
1214 			slen += 2;
1215 		}
1216 	} else if (stype)
1217 		/* : is not ok */
1218 		return (-1);
1219 	if (!stype && *word != CSUBST)
1220 		return (-1);
1221 
1222 	c = sp[0];
1223 	if (c == '*' || c == '@') {
1224 		switch (stype & 0x17F) {
1225 		/* can't assign to a vector */
1226 		case '=':
1227 		/* can't trim a vector (yet) */
1228 		case '%':
1229 		case '#':
1230 		case '?':
1231 		case '0':
1232 		case 0x100 | '/':
1233 		case '/':
1234 		case 0x100 | '#':
1235 		case 0x100 | 'Q':
1236 			return (-1);
1237 		}
1238 		if (e->loc->argc == 0) {
1239 			xp->str = null;
1240 			xp->var = global(sp);
1241 			state = c == '@' ? XNULLSUB : XSUB;
1242 		} else {
1243 			xp->u.strv = (const char **)e->loc->argv + 1;
1244 			xp->str = *xp->u.strv++;
1245 			/* $@ */
1246 			xp->split = tobool(c == '@');
1247 			state = XARG;
1248 		}
1249 		/* POSIX 2009? */
1250 		zero_ok = true;
1251 	} else if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
1252 	    p[2] == ']') {
1253 		XPtrV wv;
1254 
1255 		switch (stype & 0x17F) {
1256 		/* can't assign to a vector */
1257 		case '=':
1258 		/* can't trim a vector (yet) */
1259 		case '%':
1260 		case '#':
1261 		case '?':
1262 		case '0':
1263 		case 0x100 | '/':
1264 		case '/':
1265 		case 0x100 | '#':
1266 		case 0x100 | 'Q':
1267 			return (-1);
1268 		}
1269 		c = 0;
1270  arraynames:
1271 		XPinit(wv, 32);
1272 		vp = global(arrayname(sp));
1273 		for (; vp; vp = vp->u.array) {
1274 			if (!(vp->flag&ISSET))
1275 				continue;
1276 			XPput(wv, c == '!' ? shf_smprintf(Tf_lu,
1277 			    arrayindex(vp)) :
1278 			    str_val(vp));
1279 		}
1280 		if (XPsize(wv) == 0) {
1281 			xp->str = null;
1282 			state = p[1] == '@' ? XNULLSUB : XSUB;
1283 			XPfree(wv);
1284 		} else {
1285 			XPput(wv, 0);
1286 			xp->u.strv = (const char **)XPptrv(wv);
1287 			xp->str = *xp->u.strv++;
1288 			/* ${foo[@]} */
1289 			xp->split = tobool(p[1] == '@');
1290 			state = XARG;
1291 		}
1292 	} else {
1293 		xp->var = global(sp);
1294 		xp->str = str_val(xp->var);
1295 		/* can't assign things like $! or $1 */
1296 		if ((stype & 0x17F) == '=' && !*xp->str &&
1297 		    ctype(*sp, C_VAR1 | C_DIGIT))
1298 			return (-1);
1299 		state = XSUB;
1300 	}
1301 
1302 	c = stype & 0x7F;
1303 	/* test the compiler's code generator */
1304 	if (((stype < 0x100) && (ctype(c, C_SUBOP2) ||
1305 	    (((stype & 0x80) ? *xp->str == '\0' : xp->str == null) &&
1306 	    (state != XARG || (ifs0 || xp->split ?
1307 	    (xp->u.strv[0] == NULL) : !hasnonempty(xp->u.strv))) ?
1308 	    c == '=' || c == '-' || c == '?' : c == '+'))) ||
1309 	    stype == (0x80 | '0') || stype == (0x100 | '#') ||
1310 	    stype == (0x100 | 'Q') || (stype & 0x7F) == '/')
1311 		/* expand word instead of variable value */
1312 		state = XBASE;
1313 	if (Flag(FNOUNSET) && xp->str == null && !zero_ok &&
1314 	    (ctype(c, C_SUBOP2) || (state != XBASE && c != '+')))
1315 		errorf(Tf_parm, sp);
1316 	*stypep = stype;
1317 	*slenp = slen;
1318 	return (state);
1319 }
1320 
1321 /*
1322  * Run the command in $(...) and read its output.
1323  */
1324 static int
comsub(Expand * xp,const char * cp,int fn MKSH_A_UNUSED)1325 comsub(Expand *xp, const char *cp, int fn MKSH_A_UNUSED)
1326 {
1327 	Source *s, *sold;
1328 	struct op *t;
1329 	struct shf *shf;
1330 	uint8_t old_utfmode = UTFMODE;
1331 
1332 	s = pushs(SSTRING, ATEMP);
1333 	s->start = s->str = cp;
1334 	sold = source;
1335 	t = compile(s, true);
1336 	afree(s, ATEMP);
1337 	source = sold;
1338 
1339 	UTFMODE = old_utfmode;
1340 
1341 	if (t == NULL)
1342 		return (XBASE);
1343 
1344 	/* no waitlast() unless specifically enabled later */
1345 	xp->split = false;
1346 
1347 	if (t->type == TCOM &&
1348 	    *t->args == NULL && *t->vars == NULL && t->ioact != NULL) {
1349 		/* $(<file) */
1350 		struct ioword *io = *t->ioact;
1351 		char *name;
1352 
1353 		switch (io->ioflag & IOTYPE) {
1354 		case IOREAD:
1355 			shf = shf_open(name = evalstr(io->ioname, DOTILDE),
1356 				O_RDONLY, 0, SHF_MAPHI | SHF_CLEXEC);
1357 			if (shf == NULL)
1358 				warningf(!Flag(FTALKING), Tf_sD_s_sD_s,
1359 				    name, Tcant_open, "$(<...) input",
1360 				    cstrerror(errno));
1361 			break;
1362 		case IOHERE:
1363 			if (!herein(io, &name)) {
1364 				xp->str = name;
1365 				/* as $(…) requires, trim trailing newlines */
1366 				name += strlen(name);
1367 				while (name > xp->str && name[-1] == '\n')
1368 					--name;
1369 				*name = '\0';
1370 				return (XSUB);
1371 			}
1372 			shf = NULL;
1373 			break;
1374 		default:
1375 			errorf(Tf_sD_s, T_funny_command,
1376 			    snptreef(NULL, 32, Tft_R, io));
1377 		}
1378 	} else if (fn == FUNSUB) {
1379 		int ofd1;
1380 		struct temp *tf = NULL;
1381 
1382 		/*
1383 		 * create a temporary file, open for reading and writing,
1384 		 * with an shf open for reading (buffered) but yet unused
1385 		 */
1386 		maketemp(ATEMP, TT_FUNSUB, &tf);
1387 		if (!tf->shf) {
1388 			errorf(Tf_temp,
1389 			    Tcreate, tf->tffn, cstrerror(errno));
1390 		}
1391 		/* extract shf from temporary file, unlink and free it */
1392 		shf = tf->shf;
1393 		unlink(tf->tffn);
1394 		afree(tf, ATEMP);
1395 		/* save stdout and let it point to the tempfile */
1396 		ofd1 = savefd(1);
1397 		ksh_dup2(shf_fileno(shf), 1, false);
1398 		/*
1399 		 * run tree, with output thrown into the tempfile,
1400 		 * in a new function block
1401 		 */
1402 		valsub(t, NULL);
1403 		subst_exstat = exstat & 0xFF;
1404 		/* rewind the tempfile and restore regular stdout */
1405 		lseek(shf_fileno(shf), (off_t)0, SEEK_SET);
1406 		restfd(1, ofd1);
1407 	} else if (fn == VALSUB) {
1408 		xp->str = valsub(t, ATEMP);
1409 		subst_exstat = exstat & 0xFF;
1410 		return (XSUB);
1411 	} else {
1412 		int ofd1, pv[2];
1413 
1414 		openpipe(pv);
1415 		shf = shf_fdopen(pv[0], SHF_RD, NULL);
1416 		ofd1 = savefd(1);
1417 		if (pv[1] != 1) {
1418 			ksh_dup2(pv[1], 1, false);
1419 			close(pv[1]);
1420 		}
1421 		execute(t, XXCOM | XPIPEO | XFORK, NULL);
1422 		restfd(1, ofd1);
1423 		startlast();
1424 		/* waitlast() */
1425 		xp->split = true;
1426 	}
1427 
1428 	xp->u.shf = shf;
1429 	return (XCOM);
1430 }
1431 
1432 /*
1433  * perform #pattern and %pattern substitution in ${}
1434  */
1435 static char *
trimsub(char * str,char * pat,int how)1436 trimsub(char *str, char *pat, int how)
1437 {
1438 	char *end = strnul(str);
1439 	char *p, c;
1440 
1441 	switch (how & 0xFF) {
1442 	case '#':
1443 		/* shortest match at beginning */
1444 		for (p = str; p <= end; p += utf_ptradj(p)) {
1445 			c = *p; *p = '\0';
1446 			if (gmatchx(str, pat, false)) {
1447 				record_match(str);
1448 				*p = c;
1449 				return (p);
1450 			}
1451 			*p = c;
1452 		}
1453 		break;
1454 	case '#'|0x80:
1455 		/* longest match at beginning */
1456 		for (p = end; p >= str; p--) {
1457 			c = *p; *p = '\0';
1458 			if (gmatchx(str, pat, false)) {
1459 				record_match(str);
1460 				*p = c;
1461 				return (p);
1462 			}
1463 			*p = c;
1464 		}
1465 		break;
1466 	case '%':
1467 		/* shortest match at end */
1468 		p = end;
1469 		while (p >= str) {
1470 			if (gmatchx(p, pat, false))
1471 				goto trimsub_match;
1472 			if (UTFMODE) {
1473 				char *op = p;
1474 				while ((p-- > str) && ((*p & 0xC0) == 0x80))
1475 					;
1476 				if ((p < str) || (p + utf_ptradj(p) != op))
1477 					p = op - 1;
1478 			} else
1479 				--p;
1480 		}
1481 		break;
1482 	case '%'|0x80:
1483 		/* longest match at end */
1484 		for (p = str; p <= end; p++)
1485 			if (gmatchx(p, pat, false)) {
1486  trimsub_match:
1487 				record_match(p);
1488 				strndupx(end, str, p - str, ATEMP);
1489 				return (end);
1490 			}
1491 		break;
1492 	}
1493 
1494 	/* no match, return string */
1495 	return (str);
1496 }
1497 
1498 /*
1499  * glob
1500  * Name derived from V6's /etc/glob, the program that expanded filenames.
1501  */
1502 
1503 /* XXX cp not const 'cause slashes are temporarily replaced with NULs... */
1504 static void
glob(char * cp,XPtrV * wp,bool markdirs)1505 glob(char *cp, XPtrV *wp, bool markdirs)
1506 {
1507 	int oldsize = XPsize(*wp);
1508 
1509 	if (glob_str(cp, wp, markdirs) == 0)
1510 		XPput(*wp, debunk(cp, cp, strlen(cp) + 1));
1511 	else
1512 		qsort(XPptrv(*wp) + oldsize, XPsize(*wp) - oldsize,
1513 		    sizeof(void *), xstrcmp);
1514 }
1515 
1516 #define GF_NONE		0
1517 #define GF_EXCHECK	BIT(0)		/* do existence check on file */
1518 #define GF_GLOBBED	BIT(1)		/* some globbing has been done */
1519 #define GF_MARKDIR	BIT(2)		/* add trailing / to directories */
1520 
1521 /*
1522  * Apply file globbing to cp and store the matching files in wp. Returns
1523  * the number of matches found.
1524  */
1525 int
glob_str(char * cp,XPtrV * wp,bool markdirs)1526 glob_str(char *cp, XPtrV *wp, bool markdirs)
1527 {
1528 	int oldsize = XPsize(*wp);
1529 	XString xs;
1530 	char *xp;
1531 
1532 	Xinit(xs, xp, 256, ATEMP);
1533 	globit(&xs, &xp, cp, wp, markdirs ? GF_MARKDIR : GF_NONE);
1534 	Xfree(xs, xp);
1535 
1536 	return (XPsize(*wp) - oldsize);
1537 }
1538 
1539 static void
globit(XString * xs,char ** xpp,char * sp,XPtrV * wp,int check)1540 globit(XString *xs,	/* dest string */
1541     char **xpp,		/* ptr to dest end */
1542     char *sp,		/* source path */
1543     XPtrV *wp,		/* output list */
1544     int check)		/* GF_* flags */
1545 {
1546 	char *np;		/* next source component */
1547 	char *xp = *xpp;
1548 	char *se;
1549 	char odirsep;
1550 
1551 	/* This to allow long expansions to be interrupted */
1552 	intrcheck();
1553 
1554 	if (sp == NULL) {
1555 		/* end of source path */
1556 		/*
1557 		 * We only need to check if the file exists if a pattern
1558 		 * is followed by a non-pattern (eg, foo*x/bar; no check
1559 		 * is needed for foo* since the match must exist) or if
1560 		 * any patterns were expanded and the markdirs option is set.
1561 		 * Symlinks make things a bit tricky...
1562 		 */
1563 		if ((check & GF_EXCHECK) ||
1564 		    ((check & GF_MARKDIR) && (check & GF_GLOBBED))) {
1565 #define stat_check()	(stat_done ? stat_done : (stat_done = \
1566 			    stat(Xstring(*xs, xp), &statb) < 0 ? -1 : 1))
1567 			struct stat lstatb, statb;
1568 			/* -1: failed, 1 ok, 0 not yet done */
1569 			int stat_done = 0;
1570 
1571 			if (mksh_lstat(Xstring(*xs, xp), &lstatb) < 0)
1572 				return;
1573 			/*
1574 			 * special case for systems which strip trailing
1575 			 * slashes from regular files (eg, /etc/passwd/).
1576 			 * SunOS 4.1.3 does this...
1577 			 */
1578 			if ((check & GF_EXCHECK) && xp > Xstring(*xs, xp) &&
1579 			    mksh_cdirsep(xp[-1]) && !S_ISDIR(lstatb.st_mode) &&
1580 			    (!S_ISLNK(lstatb.st_mode) ||
1581 			    stat_check() < 0 || !S_ISDIR(statb.st_mode)))
1582 				return;
1583 			/*
1584 			 * Possibly tack on a trailing / if there isn't already
1585 			 * one and if the file is a directory or a symlink to a
1586 			 * directory
1587 			 */
1588 			if (((check & GF_MARKDIR) && (check & GF_GLOBBED)) &&
1589 			    xp > Xstring(*xs, xp) && !mksh_cdirsep(xp[-1]) &&
1590 			    (S_ISDIR(lstatb.st_mode) ||
1591 			    (S_ISLNK(lstatb.st_mode) && stat_check() > 0 &&
1592 			    S_ISDIR(statb.st_mode)))) {
1593 				*xp++ = '/';
1594 				*xp = '\0';
1595 			}
1596 		}
1597 		strndupx(np, Xstring(*xs, xp), Xlength(*xs, xp), ATEMP);
1598 		XPput(*wp, np);
1599 		return;
1600 	}
1601 
1602 	if (xp > Xstring(*xs, xp))
1603 		*xp++ = '/';
1604 	while (mksh_cdirsep(*sp)) {
1605 		Xcheck(*xs, xp);
1606 		*xp++ = *sp++;
1607 	}
1608 	np = mksh_sdirsep(sp);
1609 	if (np != NULL) {
1610 		se = np;
1611 		/* don't assume '/', can be multiple kinds */
1612 		odirsep = *np;
1613 		*np++ = '\0';
1614 	} else {
1615 		odirsep = '\0'; /* keep gcc quiet */
1616 		se = sp + strlen(sp);
1617 	}
1618 
1619 
1620 	/*
1621 	 * Check if sp needs globbing - done to avoid pattern checks for strings
1622 	 * containing MAGIC characters, open [s without the matching close ],
1623 	 * etc. (otherwise opendir() will be called which may fail because the
1624 	 * directory isn't readable - if no globbing is needed, only execute
1625 	 * permission should be required (as per POSIX)).
1626 	 */
1627 	if (!has_globbing(sp, se)) {
1628 		XcheckN(*xs, xp, se - sp + 1);
1629 		debunk(xp, sp, Xnleft(*xs, xp));
1630 		xp += strlen(xp);
1631 		*xpp = xp;
1632 		globit(xs, xpp, np, wp, check);
1633 	} else {
1634 		DIR *dirp;
1635 		struct dirent *d;
1636 		char *name;
1637 		size_t len, prefix_len;
1638 
1639 		/* xp = *xpp;	copy_non_glob() may have re-alloc'd xs */
1640 		*xp = '\0';
1641 		prefix_len = Xlength(*xs, xp);
1642 		dirp = opendir(prefix_len ? Xstring(*xs, xp) : Tdot);
1643 		if (dirp == NULL)
1644 			goto Nodir;
1645 		while ((d = readdir(dirp)) != NULL) {
1646 			name = d->d_name;
1647 			if (name[0] == '.' &&
1648 			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
1649 				/* always ignore . and .. */
1650 				continue;
1651 			if ((*name == '.' && *sp != '.') ||
1652 			    !gmatchx(name, sp, true))
1653 				continue;
1654 
1655 			len = strlen(d->d_name) + 1;
1656 			XcheckN(*xs, xp, len);
1657 			memcpy(xp, name, len);
1658 			*xpp = xp + len - 1;
1659 			globit(xs, xpp, np, wp,
1660 				(check & GF_MARKDIR) | GF_GLOBBED
1661 				| (np ? GF_EXCHECK : GF_NONE));
1662 			xp = Xstring(*xs, xp) + prefix_len;
1663 		}
1664 		closedir(dirp);
1665  Nodir:
1666 		;
1667 	}
1668 
1669 	if (np != NULL)
1670 		*--np = odirsep;
1671 }
1672 
1673 /* remove MAGIC from string */
1674 char *
debunk(char * dp,const char * sp,size_t dlen)1675 debunk(char *dp, const char *sp, size_t dlen)
1676 {
1677 	char *d;
1678 	const char *s;
1679 
1680 	if ((s = cstrchr(sp, MAGIC))) {
1681 		if (s - sp >= (ssize_t)dlen)
1682 			return (dp);
1683 		memmove(dp, sp, s - sp);
1684 		for (d = dp + (s - sp); *s && (d - dp < (ssize_t)dlen); s++)
1685 			if (!ISMAGIC(*s) || !(*++s & 0x80) ||
1686 			    !vstrchr("*+?@! ", *s & 0x7f))
1687 				*d++ = *s;
1688 			else {
1689 				/* extended pattern operators: *+?@! */
1690 				if ((*s & 0x7f) != ' ')
1691 					*d++ = *s & 0x7f;
1692 				if (d - dp < (ssize_t)dlen)
1693 					*d++ = '(';
1694 			}
1695 		*d = '\0';
1696 	} else if (dp != sp)
1697 		strlcpy(dp, sp, dlen);
1698 	return (dp);
1699 }
1700 
1701 /*
1702  * Check if p is an unquoted name, possibly followed by a / or :. If so
1703  * puts the expanded version in *dcp,dp and returns a pointer in p just
1704  * past the name, otherwise returns 0.
1705  */
1706 static const char *
maybe_expand_tilde(const char * p,XString * dsp,char ** dpp,bool isassign)1707 maybe_expand_tilde(const char *p, XString *dsp, char **dpp, bool isassign)
1708 {
1709 	XString ts;
1710 	char *dp = *dpp;
1711 	char *tp;
1712 	const char *r;
1713 
1714 	Xinit(ts, tp, 16, ATEMP);
1715 	/* : only for DOASNTILDE form */
1716 	while (p[0] == CHAR && !mksh_cdirsep(p[1]) &&
1717 	    (!isassign || p[1] != ':')) {
1718 		Xcheck(ts, tp);
1719 		*tp++ = p[1];
1720 		p += 2;
1721 	}
1722 	*tp = '\0';
1723 	r = (p[0] == EOS || p[0] == CHAR || p[0] == CSUBST) ?
1724 	    do_tilde(Xstring(ts, tp)) : NULL;
1725 	Xfree(ts, tp);
1726 	if (r) {
1727 		while (*r) {
1728 			Xcheck(*dsp, dp);
1729 			if (ISMAGIC(*r))
1730 				*dp++ = MAGIC;
1731 			*dp++ = *r++;
1732 		}
1733 		*dpp = dp;
1734 		r = p;
1735 	}
1736 	return (r);
1737 }
1738 
1739 /*
1740  * tilde expansion
1741  *
1742  * based on a version by Arnold Robbins
1743  */
1744 char *
do_tilde(char * cp)1745 do_tilde(char *cp)
1746 {
1747 	char *dp = null;
1748 #ifndef MKSH_NOPWNAM
1749 	bool do_simplify = true;
1750 #endif
1751 
1752 	if (cp[0] == '\0')
1753 		dp = str_val(global("HOME"));
1754 	else if (cp[0] == '+' && cp[1] == '\0')
1755 		dp = str_val(global(TPWD));
1756 	else if (ksh_isdash(cp))
1757 		dp = str_val(global(TOLDPWD));
1758 #ifndef MKSH_NOPWNAM
1759 	else {
1760 		dp = homedir(cp);
1761 		do_simplify = false;
1762 	}
1763 #endif
1764 
1765 	/* if parameters aren't set, don't expand ~ */
1766 	if (dp == NULL || dp == null)
1767 		return (NULL);
1768 
1769 	/* simplify parameters as if cwd upon entry */
1770 #ifndef MKSH_NOPWNAM
1771 	if (do_simplify)
1772 #endif
1773 	  {
1774 		strdupx(dp, dp, ATEMP);
1775 		simplify_path(dp);
1776 	}
1777 	return (dp);
1778 }
1779 
1780 #ifndef MKSH_NOPWNAM
1781 /*
1782  * map userid to user's home directory.
1783  * note that 4.3's getpw adds more than 6K to the shell,
1784  * and the YP version probably adds much more.
1785  * we might consider our own version of getpwnam() to keep the size down.
1786  */
1787 static char *
homedir(char * name)1788 homedir(char *name)
1789 {
1790 	struct tbl *ap;
1791 
1792 	ap = ktenter(&homedirs, name, hash(name));
1793 	if (!(ap->flag & ISSET)) {
1794 		struct passwd *pw;
1795 
1796 		pw = getpwnam(name);
1797 		if (pw == NULL)
1798 			return (NULL);
1799 		strdupx(ap->val.s, pw->pw_dir, APERM);
1800 		ap->flag |= DEFINED|ISSET|ALLOC;
1801 	}
1802 	return (ap->val.s);
1803 }
1804 #endif
1805 
1806 static void
alt_expand(XPtrV * wp,char * start,char * exp_start,char * end,int fdo)1807 alt_expand(XPtrV *wp, char *start, char *exp_start, char *end, int fdo)
1808 {
1809 	unsigned int count = 0;
1810 	char *brace_start, *brace_end, *comma = NULL;
1811 	char *field_start;
1812 	char *p = exp_start;
1813 
1814 	/* search for open brace */
1815 	while ((p = strchr(p, MAGIC)) && p[1] != '{' /*}*/)
1816 		p += 2;
1817 	brace_start = p;
1818 
1819 	/* find matching close brace, if any */
1820 	if (p) {
1821 		comma = NULL;
1822 		count = 1;
1823 		p += 2;
1824 		while (*p && count) {
1825 			if (ISMAGIC(*p++)) {
1826 				if (*p == '{' /*}*/)
1827 					++count;
1828 				else if (*p == /*{*/ '}')
1829 					--count;
1830 				else if (*p == ',' && count == 1)
1831 					comma = p;
1832 				++p;
1833 			}
1834 		}
1835 	}
1836 	/* no valid expansions... */
1837 	if (!p || count != 0) {
1838 		/*
1839 		 * Note that given a{{b,c} we do not expand anything (this is
1840 		 * what AT&T ksh does. This may be changed to do the {b,c}
1841 		 * expansion. }
1842 		 */
1843 		if (fdo & DOGLOB)
1844 			glob(start, wp, tobool(fdo & DOMARKDIRS));
1845 		else
1846 			XPput(*wp, debunk(start, start, end - start));
1847 		return;
1848 	}
1849 	brace_end = p;
1850 	if (!comma) {
1851 		alt_expand(wp, start, brace_end, end, fdo);
1852 		return;
1853 	}
1854 
1855 	/* expand expression */
1856 	field_start = brace_start + 2;
1857 	count = 1;
1858 	for (p = brace_start + 2; p != brace_end; p++) {
1859 		if (ISMAGIC(*p)) {
1860 			if (*++p == '{' /*}*/)
1861 				++count;
1862 			else if ((*p == /*{*/ '}' && --count == 0) ||
1863 			    (*p == ',' && count == 1)) {
1864 				char *news;
1865 				int l1, l2, l3;
1866 
1867 				/*
1868 				 * addition safe since these operate on
1869 				 * one string (separate substrings)
1870 				 */
1871 				l1 = brace_start - start;
1872 				l2 = (p - 1) - field_start;
1873 				l3 = end - brace_end;
1874 				news = alloc(l1 + l2 + l3 + 1, ATEMP);
1875 				memcpy(news, start, l1);
1876 				memcpy(news + l1, field_start, l2);
1877 				memcpy(news + l1 + l2, brace_end, l3);
1878 				news[l1 + l2 + l3] = '\0';
1879 				alt_expand(wp, news, news + l1,
1880 				    news + l1 + l2 + l3, fdo);
1881 				field_start = p + 1;
1882 			}
1883 		}
1884 	}
1885 	return;
1886 }
1887 
1888 /* helper function due to setjmp/longjmp woes */
1889 static char *
valsub(struct op * t,Area * ap)1890 valsub(struct op *t, Area *ap)
1891 {
1892 	char * volatile cp = NULL;
1893 	struct tbl * volatile vp = NULL;
1894 
1895 	newenv(E_FUNC);
1896 	newblock();
1897 	if (ap)
1898 		vp = local("REPLY", false);
1899 	if (!kshsetjmp(e->jbuf))
1900 		execute(t, XXCOM | XERROK, NULL);
1901 	if (vp)
1902 		strdupx(cp, str_val(vp), ap);
1903 	quitenv(NULL);
1904 
1905 	return (cp);
1906 }
1907