1 /*  GNU SED, a batch stream editor.
2     Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008
3     Free Software Foundation, Inc.
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 3, or (at your option)
8     any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 
19 /* compile.c: translate sed source into internal form */
20 
21 #include "sed.h"
22 #include <stdio.h>
23 #include <ctype.h>
24 
25 #ifdef HAVE_STRINGS_H
26 # include <strings.h>
27 # ifdef HAVE_MEMORY_H
28 #  include <memory.h>
29 # endif
30 #else
31 # include <string.h>
32 #endif /* HAVE_STRINGS_H */
33 
34 #ifdef HAVE_STDLIB_H
35 # include <stdlib.h>
36 #endif
37 #ifndef EXIT_FAILURE
38 # define EXIT_FAILURE 1
39 #endif
40 
41 #ifdef HAVE_SYS_TYPES_H
42 # include <sys/types.h>
43 #endif
44 
45 #include <obstack.h>
46 
47 
48 #define YMAP_LENGTH		256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/
49 #define VECTOR_ALLOC_INCREMENT	40
50 
51 /* let's not confuse text editors that have only dumb bracket-matching... */
52 #define OPEN_BRACKET	'['
53 #define CLOSE_BRACKET	']'
54 #define OPEN_BRACE	'{'
55 #define CLOSE_BRACE	'}'
56 
57 struct prog_info {
58   /* When we're reading a script command from a string, `prog.base'
59      points to the first character in the string, 'prog.cur' points
60      to the current character in the string, and 'prog.end' points
61      to the end of the string.  This allows us to compile script
62      strings that contain nulls. */
63   const unsigned char *base;
64   const unsigned char *cur;
65   const unsigned char *end;
66 
67   /* This is the current script file.  If it is NULL, we are reading
68      from a string stored at `prog.cur' instead.  If both `prog.file'
69      and `prog.cur' are NULL, we're in trouble! */
70   FILE *file;
71 };
72 
73 /* Information used to give out useful and informative error messages. */
74 struct error_info {
75   /* This is the name of the current script file. */
76   const char *name;
77 
78   /* This is the number of the current script line that we're compiling. */
79   countT line;
80 
81   /* This is the index of the "-e" expressions on the command line. */
82   countT string_expr_count;
83 };
84 
85 
86 /* Label structure used to resolve GOTO's, labels, and block beginnings. */
87 struct sed_label {
88   countT v_index;		/* index of vector element being referenced */
89   char *name;			/* NUL-terminated name of the label */
90   struct error_info err_info;	/* track where `{}' blocks start */
91   struct sed_label *next;	/* linked list (stack) */
92 };
93 
94 struct special_files {
95   struct output outf;
96   FILE **pfp;
97 };
98 
99 FILE *my_stdin, *my_stdout, *my_stderr;
100 struct special_files special_files[] = {
101   { { "/dev/stdin", false, NULL, NULL }, &my_stdin },
102   { { "/dev/stdout", false, NULL, NULL }, &my_stdout },
103   { { "/dev/stderr", false, NULL, NULL }, &my_stderr },
104   { { NULL, false, NULL, NULL }, NULL }
105 };
106 
107 
108 /* Where we are in the processing of the input. */
109 static struct prog_info prog;
110 static struct error_info cur_input;
111 
112 /* Information about labels and jumps-to-labels.  This is used to do
113    the required backpatching after we have compiled all the scripts. */
114 static struct sed_label *jumps = NULL;
115 static struct sed_label *labels = NULL;
116 
117 /* We wish to detect #n magic only in the first input argument;
118    this flag tracks when we have consumed the first file of input. */
119 static bool first_script = true;
120 
121 /* Allow for scripts like "sed -e 'i\' -e foo": */
122 static struct buffer *pending_text = NULL;
123 static struct text_buf *old_text_buf = NULL;
124 
125 /* Information about block start positions.  This is used to backpatch
126    block end positions. */
127 static struct sed_label *blocks = NULL;
128 
129 /* Use an obstack for compilation. */
130 static struct obstack obs;
131 
132 /* Various error messages we may want to print */
133 static const char errors[] =
134   "multiple `!'s\0"
135   "unexpected `,'\0"
136   "invalid usage of +N or ~N as first address\0"
137   "unmatched `{'\0"
138   "unexpected `}'\0"
139   "extra characters after command\0"
140   "expected \\ after `a', `c' or `i'\0"
141   "`}' doesn't want any addresses\0"
142   ": doesn't want any addresses\0"
143   "comments don't accept any addresses\0"
144   "missing command\0"
145   "command only uses one address\0"
146   "unterminated address regex\0"
147   "unterminated `s' command\0"
148   "unterminated `y' command\0"
149   "unknown option to `s'\0"
150   "multiple `p' options to `s' command\0"
151   "multiple `g' options to `s' command\0"
152   "multiple number options to `s' command\0"
153   "number option to `s' command may not be zero\0"
154   "strings for `y' command are different lengths\0"
155   "delimiter character is not a single-byte character\0"
156   "expected newer version of sed\0"
157   "invalid usage of line address 0\0"
158   "unknown command: `%c'";
159 
160 #define BAD_BANG (errors)
161 #define BAD_COMMA (BAD_BANG + sizeof(N_("multiple `!'s")))
162 #define BAD_STEP (BAD_COMMA + sizeof(N_("unexpected `,'")))
163 #define EXCESS_OPEN_BRACE (BAD_STEP + sizeof(N_("invalid usage of +N or ~N as first address")))
164 #define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof(N_("unmatched `{'")))
165 #define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof(N_("unexpected `}'")))
166 #define EXPECTED_SLASH (EXCESS_JUNK + sizeof(N_("extra characters after command")))
167 #define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH + sizeof(N_("expected \\ after `a', `c' or `i'")))
168 #define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR + sizeof(N_("`}' doesn't want any addresses")))
169 #define NO_SHARP_ADDR (NO_COLON_ADDR + sizeof(N_(": doesn't want any addresses")))
170 #define NO_COMMAND (NO_SHARP_ADDR + sizeof(N_("comments don't accept any addresses")))
171 #define ONE_ADDR (NO_COMMAND + sizeof(N_("missing command")))
172 #define UNTERM_ADDR_RE (ONE_ADDR + sizeof(N_("command only uses one address")))
173 #define UNTERM_S_CMD (UNTERM_ADDR_RE + sizeof(N_("unterminated address regex")))
174 #define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof(N_("unterminated `s' command")))
175 #define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof(N_("unterminated `y' command")))
176 #define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof(N_("unknown option to `s'")))
177 #define EXCESS_G_OPT (EXCESS_P_OPT + sizeof(N_("multiple `p' options to `s' command")))
178 #define EXCESS_N_OPT (EXCESS_G_OPT + sizeof(N_("multiple `g' options to `s' command")))
179 #define ZERO_N_OPT (EXCESS_N_OPT + sizeof(N_("multiple number options to `s' command")))
180 #define Y_CMD_LEN (ZERO_N_OPT + sizeof(N_("number option to `s' command may not be zero")))
181 #define BAD_DELIM (Y_CMD_LEN + sizeof(N_("strings for `y' command are different lengths")))
182 #define ANCIENT_VERSION (BAD_DELIM + sizeof(N_("delimiter character is not a single-byte character")))
183 #define INVALID_LINE_0 (ANCIENT_VERSION + sizeof(N_("expected newer version of sed")))
184 #define UNKNOWN_CMD (INVALID_LINE_0 + sizeof(N_("invalid usage of line address 0")))
185 #define END_ERRORS (UNKNOWN_CMD + sizeof(N_("unknown command: `%c'")))
186 
187 static struct output *file_read = NULL;
188 static struct output *file_write = NULL;
189 
190 
191 /* Complain about an unknown command and exit. */
192 void
bad_command(ch)193 bad_command(ch)
194   char ch;
195 {
196   const char *msg = _(UNKNOWN_CMD);
197   char *unknown_cmd = xmalloc(strlen(msg));
198   sprintf(unknown_cmd, msg, ch);
199   bad_prog(unknown_cmd);
200 }
201 
202 /* Complain about a programming error and exit. */
203 void
bad_prog(why)204 bad_prog(why)
205   const char *why;
206 {
207   if (cur_input.name)
208     fprintf(stderr, _("%s: file %s line %lu: %s\n"),
209 	    myname, cur_input.name, CAST(unsigned long)cur_input.line, why);
210   else
211     fprintf(stderr, _("%s: -e expression #%lu, char %lu: %s\n"),
212 	    myname,
213 	    CAST(unsigned long)cur_input.string_expr_count,
214 	    CAST(unsigned long)(prog.cur-prog.base),
215 	    why);
216   exit(EXIT_FAILURE);
217 }
218 
219 
220 /* Read the next character from the program.  Return EOF if there isn't
221    anything to read.  Keep cur_input.line up to date, so error messages
222    can be meaningful. */
223 static int inchar P_((void));
224 static int
inchar()225 inchar()
226 {
227   int ch = EOF;
228 
229   if (prog.cur)
230     {
231       if (prog.cur < prog.end)
232 	ch = *prog.cur++;
233     }
234   else if (prog.file)
235     {
236       if (!feof(prog.file))
237 	ch = getc(prog.file);
238     }
239   if (ch == '\n')
240     ++cur_input.line;
241   return ch;
242 }
243 
244 /* unget `ch' so the next call to inchar will return it.   */
245 static void savchar P_((int ch));
246 static void
savchar(ch)247 savchar(ch)
248   int ch;
249 {
250   if (ch == EOF)
251     return;
252   if (ch == '\n' && cur_input.line > 0)
253     --cur_input.line;
254   if (prog.cur)
255     {
256       if (prog.cur <= prog.base || *--prog.cur != ch)
257 	panic("Called savchar() with unexpected pushback (%x)",
258 	      CAST(unsigned char)ch);
259     }
260   else
261     ungetc(ch, prog.file);
262 }
263 
264 /* Read the next non-blank character from the program.  */
265 static int in_nonblank P_((void));
266 static int
in_nonblank()267 in_nonblank()
268 {
269   int ch;
270   do
271     ch = inchar();
272     while (ISBLANK(ch));
273   return ch;
274 }
275 
276 /* Read an integer value from the program.  */
277 static countT in_integer P_((int ch));
278 static countT
in_integer(ch)279 in_integer(ch)
280   int ch;
281 {
282   countT num = 0;
283 
284   while (ISDIGIT(ch))
285     {
286       num = num * 10 + ch - '0';
287       ch = inchar();
288     }
289   savchar(ch);
290   return num;
291 }
292 
293 static int add_then_next P_((struct buffer *b, int ch));
294 static int
add_then_next(b,ch)295 add_then_next(b, ch)
296   struct buffer *b;
297   int ch;
298 {
299   add1_buffer(b, ch);
300   return inchar();
301 }
302 
303 static char * convert_number P_((char *, char *, const char *, int, int, int));
304 static char *
convert_number(result,buf,bufend,base,maxdigits,default_char)305 convert_number(result, buf, bufend, base, maxdigits, default_char)
306   char *result;
307   char *buf;
308   const char *bufend;
309   int base;
310   int maxdigits;
311   int default_char;
312 {
313   int n = 0;
314   char *p;
315 
316   for (p=buf; p < bufend && maxdigits-- > 0; ++p)
317     {
318       int d = -1;
319       switch (*p)
320 	{
321 	case '0': d = 0x0; break;
322 	case '1': d = 0x1; break;
323 	case '2': d = 0x2; break;
324 	case '3': d = 0x3; break;
325 	case '4': d = 0x4; break;
326 	case '5': d = 0x5; break;
327 	case '6': d = 0x6; break;
328 	case '7': d = 0x7; break;
329 	case '8': d = 0x8; break;
330 	case '9': d = 0x9; break;
331 	case 'A': case 'a': d = 0xa; break;
332 	case 'B': case 'b': d = 0xb; break;
333 	case 'C': case 'c': d = 0xc; break;
334 	case 'D': case 'd': d = 0xd; break;
335 	case 'E': case 'e': d = 0xe; break;
336 	case 'F': case 'f': d = 0xf; break;
337 	}
338       if (d < 0 || base <= d)
339 	break;
340       n = n * base + d;
341     }
342   if (p == buf)
343     *result = default_char;
344   else
345     *result = n;
346   return p;
347 }
348 
349 
350 /* Read in a filename for a `r', `w', or `s///w' command. */
351 static struct buffer *read_filename P_((void));
352 static struct buffer *
read_filename()353 read_filename()
354 {
355   struct buffer *b;
356   int ch;
357 
358   b = init_buffer();
359   ch = in_nonblank();
360   while (ch != EOF && ch != '\n')
361     {
362 #if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/
363       if (posixicity == POSIXLY_EXTENDED)
364 	if (ch == ';' || ch == '#')
365 	  {
366 	    savchar(ch);
367 	    break;
368 	  }
369 #endif
370       ch = add_then_next(b, ch);
371     }
372   add1_buffer(b, '\0');
373   return b;
374 }
375 
376 static struct output *get_openfile P_((struct output **file_ptrs, const char *mode, int fail));
377 static struct output *
get_openfile(file_ptrs,mode,fail)378 get_openfile(file_ptrs, mode, fail)
379      struct output **file_ptrs;
380      const char *mode;
381      int fail;
382 {
383   struct buffer *b;
384   char *file_name;
385   struct output *p;
386 
387   b = read_filename();
388   file_name = get_buffer(b);
389   for (p=*file_ptrs; p; p=p->link)
390     if (strcmp(p->name, file_name) == 0)
391       break;
392 
393   if (posixicity == POSIXLY_EXTENDED)
394     {
395       /* Check whether it is a special file (stdin, stdout or stderr) */
396       struct special_files *special = special_files;
397 
398       /* std* sometimes are not constants, so they
399          cannot be used in the initializer for special_files */
400       my_stdin = stdin; my_stdout = stdout; my_stderr = stderr;
401       for (special = special_files; special->outf.name; special++)
402         if (strcmp(special->outf.name, file_name) == 0)
403           {
404 	    special->outf.fp = *special->pfp;
405 	    free_buffer (b);
406 	    return &special->outf;
407           }
408     }
409 
410   if (!p)
411     {
412       p = OB_MALLOC(&obs, 1, struct output);
413       p->name = ck_strdup(file_name);
414       p->fp = ck_fopen(p->name, mode, fail);
415       p->missing_newline = false;
416       p->link = *file_ptrs;
417       *file_ptrs = p;
418     }
419   free_buffer(b);
420   return p;
421 }
422 
423 
424 static struct sed_cmd *next_cmd_entry P_((struct vector **vectorp));
425 static struct sed_cmd *
next_cmd_entry(vectorp)426 next_cmd_entry(vectorp)
427   struct vector **vectorp;
428 {
429   struct sed_cmd *cmd;
430   struct vector *v;
431 
432   v = *vectorp;
433   if (v->v_length == v->v_allocated)
434     {
435       v->v_allocated += VECTOR_ALLOC_INCREMENT;
436       v->v = REALLOC(v->v, v->v_allocated, struct sed_cmd);
437     }
438 
439   cmd = v->v + v->v_length;
440   cmd->a1 = NULL;
441   cmd->a2 = NULL;
442   cmd->range_state = RANGE_INACTIVE;
443   cmd->addr_bang = false;
444   cmd->cmd = '\0';	/* something invalid, to catch bugs early */
445 
446   *vectorp  = v;
447   return cmd;
448 }
449 
450 static int snarf_char_class P_((struct buffer *b, mbstate_t *cur_stat));
451 static int
snarf_char_class(b,cur_stat)452 snarf_char_class(b, cur_stat)
453   struct buffer *b;
454   mbstate_t *cur_stat;
455 {
456   int ch;
457   int state = 0;
458   int delim;
459   bool pending_mb = 0;
460 
461   ch = inchar();
462   if (ch == '^')
463     ch = add_then_next(b, ch);
464   if (ch == CLOSE_BRACKET)
465     ch = add_then_next(b, ch);
466 
467   /* States are:
468 	0 outside a collation element, character class or collation class
469 	1 after the bracket
470 	2 after the opening ./:/=
471 	3 after the closing ./:/= */
472 
473   for (;; ch = add_then_next (b, ch))
474     {
475       pending_mb = BRLEN (ch, cur_stat) != 1;
476 
477       switch (ch)
478 	{
479 	case EOF:
480 	case '\n':
481 	  return ch;
482 
483 	case '.':
484 	case ':':
485 	case '=':
486 	  if (pending_mb)
487 	    continue;
488 
489 	  if (state == 1)
490 	    {
491 	      delim = ch;
492 	      state = 2;
493 	    }
494 	  else if (state == 2 && ch == delim)
495 	    state = 3;
496 	  else
497 	    break;
498 
499 	  continue;
500 
501 	case OPEN_BRACKET:
502 	  if (pending_mb)
503 	    continue;
504 
505 	  if (state == 0)
506 	    state = 1;
507 	  continue;
508 
509 	case CLOSE_BRACKET:
510 	  if (pending_mb)
511 	    continue;
512 
513 	  if (state == 0 || state == 1)
514 	    return ch;
515 	  else if (state == 3)
516 	    state = 0;
517 
518 	  break;
519 
520 	default:
521 	  break;
522 	}
523 
524       /* Getting a character different from .=: whilst in state 1
525          goes back to state 0, getting a character different from ]
526          whilst in state 3 goes back to state 2.  */
527       state &= ~1;
528     }
529 }
530 
531 static struct buffer *match_slash P_((int slash, int regex));
532 static struct buffer *
match_slash(slash,regex)533 match_slash(slash, regex)
534   int slash;
535   int regex;
536 {
537   struct buffer *b;
538   int ch;
539   mbstate_t cur_stat;
540 
541   memset (&cur_stat, 0, sizeof (mbstate_t));
542 
543   /* We allow only 1 byte characters for a slash.  */
544   if (BRLEN (slash, &cur_stat) == -2)
545     bad_prog (BAD_DELIM);
546 
547   memset (&cur_stat, 0, sizeof (mbstate_t));
548 
549   b = init_buffer();
550   while ((ch = inchar()) != EOF && ch != '\n')
551     {
552       bool pending_mb = !MBSINIT (&cur_stat);
553       if (BRLEN (ch, &cur_stat) == 1 && !pending_mb)
554 	{
555 	  if (ch == slash)
556 	    return b;
557 	  else if (ch == '\\')
558 	    {
559 	      ch = inchar();
560 	      if (ch == EOF)
561 	        break;
562 #ifndef REG_PERL
563 	      else if (ch == 'n' && regex)
564 	        ch = '\n';
565 #endif
566 	      else if (ch != '\n' && (ch != slash || (!regex && ch == '&')))
567 	        add1_buffer(b, '\\');
568 	    }
569           else if (ch == OPEN_BRACKET && regex)
570 	    {
571 	      add1_buffer(b, ch);
572 	      ch = snarf_char_class(b, &cur_stat);
573 	      if (ch != CLOSE_BRACKET)
574 	        break;
575 	    }
576 	}
577 
578       add1_buffer(b, ch);
579     }
580 
581   if (ch == '\n')
582     savchar(ch);	/* for proper line number in error report */
583   free_buffer(b);
584   return NULL;
585 }
586 
587 static int mark_subst_opts P_((struct subst *cmd));
588 static int
mark_subst_opts(cmd)589 mark_subst_opts(cmd)
590   struct subst *cmd;
591 {
592   int flags = 0;
593   int ch;
594 
595   cmd->global = false;
596   cmd->print = false;
597   cmd->eval = false;
598   cmd->numb = 0;
599   cmd->outf = NULL;
600 
601   for (;;)
602     switch ( (ch = in_nonblank()) )
603       {
604       case 'i':	/* GNU extension */
605       case 'I':	/* GNU extension */
606 	if (posixicity == POSIXLY_BASIC)
607 	  bad_prog(_(UNKNOWN_S_OPT));
608 	flags |= REG_ICASE;
609 	break;
610 
611 #ifdef REG_PERL
612       case 's':	/* GNU extension */
613       case 'S':	/* GNU extension */
614 	if (posixicity == POSIXLY_BASIC)
615 	  bad_prog(_(UNKNOWN_S_OPT));
616 	if (extended_regexp_flags & REG_PERL)
617 	  flags |= REG_DOTALL;
618 	break;
619 
620       case 'x':	/* GNU extension */
621       case 'X':	/* GNU extension */
622 	if (posixicity == POSIXLY_BASIC)
623 	  bad_prog(_(UNKNOWN_S_OPT));
624 	if (extended_regexp_flags & REG_PERL)
625 	  flags |= REG_EXTENDED;
626 	break;
627 #endif
628 
629       case 'm':	/* GNU extension */
630       case 'M':	/* GNU extension */
631 	if (posixicity == POSIXLY_BASIC)
632 	  bad_prog(_(UNKNOWN_S_OPT));
633 	flags |= REG_NEWLINE;
634 	break;
635 
636       case 'e':
637 	cmd->eval = true;
638 	break;
639 
640       case 'p':
641 	if (cmd->print)
642 	  bad_prog(_(EXCESS_P_OPT));
643 	cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */
644 	break;
645 
646       case 'g':
647 	if (cmd->global)
648 	  bad_prog(_(EXCESS_G_OPT));
649 	cmd->global = true;
650 	break;
651 
652       case 'w':
653 	cmd->outf = get_openfile(&file_write, "w", true);
654 	return flags;
655 
656       case '0': case '1': case '2': case '3': case '4':
657       case '5': case '6': case '7': case '8': case '9':
658 	if (cmd->numb)
659 	  bad_prog(_(EXCESS_N_OPT));
660 	cmd->numb = in_integer(ch);
661 	if (!cmd->numb)
662 	  bad_prog(_(ZERO_N_OPT));
663 	break;
664 
665       case CLOSE_BRACE:
666       case '#':
667 	savchar(ch);
668 	/* Fall Through */
669       case EOF:
670       case '\n':
671       case ';':
672 	return flags;
673 
674       case '\r':
675 	if (inchar() == '\n')
676 	  return flags;
677 	/* FALLTHROUGH */
678 
679       default:
680 	bad_prog(_(UNKNOWN_S_OPT));
681 	/*NOTREACHED*/
682       }
683 }
684 
685 
686 /* read in a label for a `:', `b', or `t' command */
687 static char *read_label P_((void));
688 static char *
read_label()689 read_label()
690 {
691   struct buffer *b;
692   int ch;
693   char *ret;
694 
695   b = init_buffer();
696   ch = in_nonblank();
697 
698   while (ch != EOF && ch != '\n'
699 	 && !ISBLANK(ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#')
700     ch = add_then_next (b, ch);
701 
702   savchar(ch);
703   add1_buffer(b, '\0');
704   ret = ck_strdup(get_buffer(b));
705   free_buffer(b);
706   return ret;
707 }
708 
709 /* Store a label (or label reference) created by a `:', `b', or `t'
710    command so that the jump to/from the label can be backpatched after
711    compilation is complete, or a reference created by a `{' to be
712    backpatched when the corresponding `}' is found.  */
713 static struct sed_label *setup_label
714   P_((struct sed_label *, countT, char *, const struct error_info *));
715 static struct sed_label *
setup_label(list,idx,name,err_info)716 setup_label(list, idx, name, err_info)
717   struct sed_label *list;
718   countT idx;
719   char *name;
720   const struct error_info *err_info;
721 {
722   struct sed_label *ret = OB_MALLOC(&obs, 1, struct sed_label);
723   ret->v_index = idx;
724   ret->name = name;
725   if (err_info)
726     MEMCPY(&ret->err_info, err_info, sizeof (ret->err_info));
727   ret->next = list;
728   return ret;
729 }
730 
731 static struct sed_label *release_label P_((struct sed_label *list_head));
732 static struct sed_label *
release_label(list_head)733 release_label(list_head)
734   struct sed_label *list_head;
735 {
736   struct sed_label *ret;
737 
738   if (!list_head)
739     return NULL;
740   ret = list_head->next;
741 
742   FREE(list_head->name);
743 
744 #if 0
745   /* We use obstacks */
746   FREE(list_head);
747 #endif
748   return ret;
749 }
750 
751 static struct replacement *new_replacement P_((char *, size_t,
752 					       enum replacement_types));
753 static struct replacement *
new_replacement(text,length,type)754 new_replacement(text, length, type)
755   char *text;
756   size_t length;
757   enum replacement_types type;
758 {
759   struct replacement *r = OB_MALLOC(&obs, 1, struct replacement);
760 
761   r->prefix = text;
762   r->prefix_length = length;
763   r->subst_id = -1;
764   r->repl_type = type;
765 
766   /* r-> next = NULL; */
767   return r;
768 }
769 
770 static void setup_replacement P_((struct subst *, const char *, size_t));
771 static void
setup_replacement(sub,text,length)772 setup_replacement(sub, text, length)
773      struct subst *sub;
774      const char *text;
775      size_t length;
776 {
777   char *base;
778   char *p;
779   char *text_end;
780   enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS;
781   struct replacement root;
782   struct replacement *tail;
783 
784   sub->max_id = 0;
785   base = MEMDUP(text, length, char);
786   length = normalize_text(base, length, TEXT_REPLACEMENT);
787 
788   text_end = base + length;
789   tail = &root;
790 
791   for (p=base; p<text_end; ++p)
792     {
793       if (*p == '\\')
794 	{
795 	  /* Preceding the backslash may be some literal text: */
796 	  tail = tail->next =
797 	    new_replacement(base, CAST(size_t)(p - base), repl_type);
798 
799 	  repl_type = save_type;
800 
801 	  /* Skip the backslash and look for a numeric back-reference,
802 	     or a case-munging escape if not in POSIX mode: */
803 	  ++p;
804 	  if (p == text_end)
805 	    ++tail->prefix_length;
806 
807 	  else if (posixicity == POSIXLY_BASIC && !ISDIGIT (*p))
808 	    {
809 	      p[-1] = *p;
810 	      ++tail->prefix_length;
811 	    }
812 
813 	  else
814 	    switch (*p)
815 	      {
816 	      case '0': case '1': case '2': case '3': case '4':
817 	      case '5': case '6': case '7': case '8': case '9':
818 		tail->subst_id = *p - '0';
819 		if (sub->max_id < tail->subst_id)
820 		  sub->max_id = tail->subst_id;
821 		break;
822 
823 	      case 'L':
824 		repl_type = REPL_LOWERCASE;
825 		save_type = REPL_LOWERCASE;
826 		break;
827 
828 	      case 'U':
829 		repl_type = REPL_UPPERCASE;
830 		save_type = REPL_UPPERCASE;
831 		break;
832 
833 	      case 'E':
834 		repl_type = REPL_ASIS;
835 		save_type = REPL_ASIS;
836 		break;
837 
838 	      case 'l':
839 		save_type = repl_type;
840 		repl_type |= REPL_LOWERCASE_FIRST;
841 		break;
842 
843 	      case 'u':
844 		save_type = repl_type;
845 		repl_type |= REPL_UPPERCASE_FIRST;
846 		break;
847 
848 	      default:
849 		p[-1] = *p;
850 		++tail->prefix_length;
851 	      }
852 
853 	  base = p + 1;
854 	}
855       else if (*p == '&')
856 	{
857 	  /* Preceding the ampersand may be some literal text: */
858 	  tail = tail->next =
859 	    new_replacement(base, CAST(size_t)(p - base), repl_type);
860 
861 	  repl_type = save_type;
862 	  tail->subst_id = 0;
863 	  base = p + 1;
864 	}
865   }
866   /* There may be some trailing literal text: */
867   if (base < text_end)
868     tail = tail->next =
869       new_replacement(base, CAST(size_t)(text_end - base), repl_type);
870 
871   tail->next = NULL;
872   sub->replacement = root.next;
873 }
874 
875 static void read_text P_((struct text_buf *buf, int leadin_ch));
876 static void
read_text(buf,leadin_ch)877 read_text(buf, leadin_ch)
878   struct text_buf *buf;
879   int leadin_ch;
880 {
881   int ch;
882 
883   /* Should we start afresh (as opposed to continue a partial text)? */
884   if (buf)
885     {
886       if (pending_text)
887 	free_buffer(pending_text);
888       pending_text = init_buffer();
889       buf->text = NULL;
890       buf->text_length = 0;
891       old_text_buf = buf;
892     }
893   /* assert(old_text_buf != NULL); */
894 
895   if (leadin_ch == EOF)
896     return;
897 
898   if (leadin_ch != '\n')
899     add1_buffer(pending_text, leadin_ch);
900 
901   ch = inchar();
902   while (ch != EOF && ch != '\n')
903     {
904       if (ch == '\\')
905 	{
906 	  ch = inchar();
907 	  if (ch != EOF)
908 	    add1_buffer (pending_text, '\\');
909 	}
910 
911       if (ch == EOF)
912 	{
913 	  add1_buffer (pending_text, '\n');
914 	  return;
915 	}
916 
917       ch = add_then_next (pending_text, ch);
918     }
919 
920   add1_buffer(pending_text, '\n');
921   if (!buf)
922     buf = old_text_buf;
923   buf->text_length = normalize_text (get_buffer (pending_text),
924 				     size_buffer (pending_text), TEXT_BUFFER);
925   buf->text = MEMDUP(get_buffer(pending_text), buf->text_length, char);
926   free_buffer(pending_text);
927   pending_text = NULL;
928 }
929 
930 
931 /* Try to read an address for a sed command.  If it succeeds,
932    return non-zero and store the resulting address in `*addr'.
933    If the input doesn't look like an address read nothing
934    and return zero.  */
935 static bool compile_address P_((struct addr *addr, int ch));
936 static bool
compile_address(addr,ch)937 compile_address(addr, ch)
938   struct addr *addr;
939   int ch;
940 {
941   addr->addr_type = ADDR_IS_NULL;
942   addr->addr_step = 0;
943   addr->addr_number = ~(countT)0;  /* extremely unlikely to ever match */
944   addr->addr_regex = NULL;
945 
946   if (ch == '/' || ch == '\\')
947     {
948       int flags = 0;
949       struct buffer *b;
950       addr->addr_type = ADDR_IS_REGEX;
951       if (ch == '\\')
952 	ch = inchar();
953       if ( !(b = match_slash(ch, true)) )
954 	bad_prog(_(UNTERM_ADDR_RE));
955 
956       for(;;)
957 	{
958 	  ch = in_nonblank();
959 	  if (posixicity == POSIXLY_BASIC)
960 	    goto posix_address_modifier;
961           switch(ch)
962 	    {
963 	    case 'I':	/* GNU extension */
964 	      flags |= REG_ICASE;
965 	      break;
966 
967 #ifdef REG_PERL
968 	    case 'S':	/* GNU extension */
969 	      if (extended_regexp_flags & REG_PERL)
970 		flags |= REG_DOTALL;
971 	      break;
972 
973 	    case 'X':	/* GNU extension */
974 	      if (extended_regexp_flags & REG_PERL)
975 		flags |= REG_EXTENDED;
976 	      break;
977 #endif
978 
979 	    case 'M':	/* GNU extension */
980 	      flags |= REG_NEWLINE;
981 	      break;
982 
983 	    default:
984 	    posix_address_modifier:
985 	      savchar (ch);
986 	      addr->addr_regex = compile_regex (b, flags, 0);
987 	      free_buffer(b);
988 	      return true;
989 	    }
990 	}
991     }
992   else if (ISDIGIT(ch))
993     {
994       addr->addr_number = in_integer(ch);
995       addr->addr_type = ADDR_IS_NUM;
996       ch = in_nonblank();
997       if (ch != '~' || posixicity == POSIXLY_BASIC)
998 	{
999 	  savchar(ch);
1000 	}
1001       else
1002 	{
1003 	  countT step = in_integer(in_nonblank());
1004 	  if (step > 0)
1005 	    {
1006 	      addr->addr_step = step;
1007 	      addr->addr_type = ADDR_IS_NUM_MOD;
1008 	    }
1009 	}
1010     }
1011   else if ((ch == '+' || ch == '~') && posixicity != POSIXLY_BASIC)
1012     {
1013       addr->addr_step = in_integer(in_nonblank());
1014       if (addr->addr_step==0)
1015 	; /* default to ADDR_IS_NULL; forces matching to stop on next line */
1016       else if (ch == '+')
1017 	addr->addr_type = ADDR_IS_STEP;
1018       else
1019 	addr->addr_type = ADDR_IS_STEP_MOD;
1020     }
1021   else if (ch == '$')
1022     {
1023       addr->addr_type = ADDR_IS_LAST;
1024     }
1025   else
1026     return false;
1027 
1028   return true;
1029 }
1030 
1031 /* Read a program (or a subprogram within `{' `}' pairs) in and store
1032    the compiled form in `*vector'.  Return a pointer to the new vector.  */
1033 static struct vector *compile_program P_((struct vector *));
1034 static struct vector *
compile_program(vector)1035 compile_program(vector)
1036   struct vector *vector;
1037 {
1038   struct sed_cmd *cur_cmd;
1039   struct buffer *b;
1040   int ch;
1041 
1042   if (!vector)
1043     {
1044       vector = MALLOC(1, struct vector);
1045       vector->v = NULL;
1046       vector->v_allocated = 0;
1047       vector->v_length = 0;
1048 
1049       obstack_init (&obs);
1050     }
1051   if (pending_text)
1052     read_text(NULL, '\n');
1053 
1054   for (;;)
1055     {
1056       struct addr a;
1057 
1058       while ((ch=inchar()) == ';' || ISSPACE(ch))
1059 	;
1060       if (ch == EOF)
1061 	break;
1062 
1063       cur_cmd = next_cmd_entry(&vector);
1064       if (compile_address(&a, ch))
1065 	{
1066 	  if (a.addr_type == ADDR_IS_STEP
1067 	      || a.addr_type == ADDR_IS_STEP_MOD)
1068 	    bad_prog(_(BAD_STEP));
1069 
1070 	  cur_cmd->a1 = MEMDUP(&a, 1, struct addr);
1071 	  ch = in_nonblank();
1072 	  if (ch == ',')
1073 	    {
1074 	      if (!compile_address(&a, in_nonblank()))
1075 		bad_prog(_(BAD_COMMA));
1076 
1077 	      cur_cmd->a2 = MEMDUP(&a, 1, struct addr);
1078 	      ch = in_nonblank();
1079 	    }
1080 
1081 	  if ((cur_cmd->a1->addr_type == ADDR_IS_NUM
1082 	       && cur_cmd->a1->addr_number == 0)
1083 	      && ((!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX)
1084 		  || posixicity == POSIXLY_BASIC))
1085 	    bad_prog(_(INVALID_LINE_0));
1086 	}
1087       if (ch == '!')
1088 	{
1089 	  cur_cmd->addr_bang = true;
1090 	  ch = in_nonblank();
1091 	  if (ch == '!')
1092 	    bad_prog(_(BAD_BANG));
1093 	}
1094 
1095       /* Do not accept extended commands in --posix mode.  Also,
1096 	 a few commands only accept one address in that mode.  */
1097       if (posixicity == POSIXLY_BASIC)
1098 	switch (ch)
1099 	  {
1100 	    case 'e': case 'v': case 'z': case 'L':
1101 	    case 'Q': case 'T': case 'R': case 'W':
1102 	      bad_command(ch);
1103 
1104 	    case 'a': case 'i': case 'l':
1105 	    case '=': case 'r':
1106 	      if (cur_cmd->a2)
1107 	        bad_prog(_(ONE_ADDR));
1108 	  }
1109 
1110       cur_cmd->cmd = ch;
1111       switch (ch)
1112 	{
1113 	case '#':
1114 	  if (cur_cmd->a1)
1115 	    bad_prog(_(NO_SHARP_ADDR));
1116 	  ch = inchar();
1117 	  if (ch=='n' && first_script && cur_input.line < 2)
1118 	    if (   (prog.base && prog.cur==2+prog.base)
1119 		|| (prog.file && !prog.base && 2==ftell(prog.file)))
1120 	      no_default_output = true;
1121 	  while (ch != EOF && ch != '\n')
1122 	    ch = inchar();
1123 	  continue;	/* restart the for (;;) loop */
1124 
1125 	case 'v':
1126 	  /* This is an extension.  Programs needing GNU sed might start
1127 	   * with a `v' command so that other seds will stop.
1128 	   * We compare the version and ignore POSIXLY_CORRECT.
1129 	   */
1130 	  {
1131 	    char *version = read_label ();
1132 	    char *compared_version;
1133 	    compared_version = (*version == '\0') ? "4.0" : version;
1134 	    if (strverscmp (compared_version, SED_FEATURE_VERSION) > 0)
1135 	      bad_prog(_(ANCIENT_VERSION));
1136 
1137 	    free (version);
1138 	    posixicity = POSIXLY_EXTENDED;
1139 	  }
1140 	  continue;
1141 
1142 	case '{':
1143 	  blocks = setup_label(blocks, vector->v_length, NULL, &cur_input);
1144 	  cur_cmd->addr_bang = !cur_cmd->addr_bang;
1145 	  break;
1146 
1147 	case '}':
1148 	  if (!blocks)
1149 	    bad_prog(_(EXCESS_CLOSE_BRACE));
1150 	  if (cur_cmd->a1)
1151 	    bad_prog(_(NO_CLOSE_BRACE_ADDR));
1152 	  ch = in_nonblank();
1153 	  if (ch == CLOSE_BRACE || ch == '#')
1154 	    savchar(ch);
1155 	  else if (ch != EOF && ch != '\n' && ch != ';')
1156 	    bad_prog(_(EXCESS_JUNK));
1157 
1158 	  vector->v[blocks->v_index].x.jump_index = vector->v_length;
1159 	  blocks = release_label(blocks);	/* done with this entry */
1160 	  break;
1161 
1162 	case 'e':
1163 	  ch = in_nonblank();
1164 	  if (ch == EOF || ch == '\n')
1165 	    {
1166 	      cur_cmd->x.cmd_txt.text_length = 0;
1167 	      break;
1168 	    }
1169 	  else
1170 	    goto read_text_to_slash;
1171 
1172 	case 'a':
1173 	case 'i':
1174 	case 'c':
1175 	  ch = in_nonblank();
1176 
1177 	read_text_to_slash:
1178 	  if (ch == EOF)
1179 	    bad_prog(_(EXPECTED_SLASH));
1180 
1181 	  if (ch == '\\')
1182 	    ch = inchar();
1183 	  else
1184 	    {
1185 	      if (posixicity == POSIXLY_BASIC)
1186 		bad_prog(_(EXPECTED_SLASH));
1187 	      savchar(ch);
1188 	      ch = '\n';
1189 	    }
1190 
1191 	  read_text(&cur_cmd->x.cmd_txt, ch);
1192 	  break;
1193 
1194 	case ':':
1195 	  if (cur_cmd->a1)
1196 	    bad_prog(_(NO_COLON_ADDR));
1197 	  labels = setup_label(labels, vector->v_length, read_label(), NULL);
1198 	  break;
1199 
1200 	case 'T':
1201 	case 'b':
1202 	case 't':
1203 	  jumps = setup_label(jumps, vector->v_length, read_label(), NULL);
1204 	  break;
1205 
1206 	case 'Q':
1207 	case 'q':
1208 	  if (cur_cmd->a2)
1209 	    bad_prog(_(ONE_ADDR));
1210 	  /* Fall through */
1211 
1212 	case 'L':
1213 	case 'l':
1214 	  ch = in_nonblank();
1215 	  if (ISDIGIT(ch) && posixicity != POSIXLY_BASIC)
1216 	    {
1217 	      cur_cmd->x.int_arg = in_integer(ch);
1218 	      ch = in_nonblank();
1219 	    }
1220 	  else
1221 	    cur_cmd->x.int_arg = -1;
1222 
1223 	  if (ch == CLOSE_BRACE || ch == '#')
1224 	    savchar(ch);
1225 	  else if (ch != EOF && ch != '\n' && ch != ';')
1226 	    bad_prog(_(EXCESS_JUNK));
1227 
1228 	  break;
1229 
1230 	case '=':
1231 	case 'd':
1232 	case 'D':
1233 	case 'g':
1234 	case 'G':
1235 	case 'h':
1236 	case 'H':
1237 	case 'n':
1238 	case 'N':
1239 	case 'p':
1240 	case 'P':
1241 	case 'z':
1242 	case 'x':
1243 	  ch = in_nonblank();
1244 	  if (ch == CLOSE_BRACE || ch == '#')
1245 	    savchar(ch);
1246 	  else if (ch != EOF && ch != '\n' && ch != ';')
1247 	    bad_prog(_(EXCESS_JUNK));
1248 	  break;
1249 
1250 	case 'r':
1251 	  b = read_filename();
1252 	  cur_cmd->x.fname = ck_strdup(get_buffer(b));
1253 	  free_buffer(b);
1254 	  break;
1255 
1256         case 'R':
1257 	  cur_cmd->x.fp = get_openfile(&file_read, read_mode, false)->fp;
1258 	  break;
1259 
1260         case 'W':
1261 	case 'w':
1262 	  cur_cmd->x.outf = get_openfile(&file_write, "w", true);
1263 	  break;
1264 
1265 	case 's':
1266 	  {
1267 	    struct buffer *b2;
1268 	    int flags;
1269 	    int slash;
1270 
1271 	    slash = inchar();
1272 	    if ( !(b  = match_slash(slash, true)) )
1273 	      bad_prog(_(UNTERM_S_CMD));
1274 	    if ( !(b2 = match_slash(slash, false)) )
1275 	      bad_prog(_(UNTERM_S_CMD));
1276 
1277 	    cur_cmd->x.cmd_subst = OB_MALLOC(&obs, 1, struct subst);
1278 	    setup_replacement(cur_cmd->x.cmd_subst,
1279 			      get_buffer(b2), size_buffer(b2));
1280 	    free_buffer(b2);
1281 
1282 	    flags = mark_subst_opts(cur_cmd->x.cmd_subst);
1283 	    cur_cmd->x.cmd_subst->regx =
1284 	      compile_regex(b, flags, cur_cmd->x.cmd_subst->max_id + 1);
1285 	    free_buffer(b);
1286 	  }
1287 	  break;
1288 
1289 	case 'y':
1290 	  {
1291 	    size_t len, dest_len;
1292 	    int slash;
1293 	    struct buffer *b2;
1294             char *src_buf, *dest_buf;
1295 
1296 	    slash = inchar();
1297 	    if ( !(b = match_slash(slash, false)) )
1298 	      bad_prog(_(UNTERM_Y_CMD));
1299             src_buf = get_buffer(b);
1300 	    len = normalize_text(src_buf, size_buffer (b), TEXT_BUFFER);
1301 
1302             if ( !(b2 = match_slash(slash, false)) )
1303  	      bad_prog(_(UNTERM_Y_CMD));
1304             dest_buf = get_buffer(b2);
1305 	    dest_len = normalize_text(dest_buf, size_buffer (b2), TEXT_BUFFER);
1306 
1307             if (mb_cur_max > 1)
1308 	      {
1309                 int i, j, idx, src_char_num;
1310                 size_t *src_lens = MALLOC(len, size_t);
1311                 char **trans_pairs;
1312                 size_t mbclen;
1313                 mbstate_t cur_stat;
1314 
1315                 /* Enumerate how many character the source buffer has.  */
1316                 memset(&cur_stat, 0, sizeof(mbstate_t));
1317                 for (i = 0, j = 0; i < len;)
1318                   {
1319                     mbclen = MBRLEN (src_buf + i, len - i, &cur_stat);
1320                     /* An invalid sequence, or a truncated multibyte character.
1321                        We treat it as a singlebyte character.  */
1322                     if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1323                         || mbclen == 0)
1324                       mbclen = 1;
1325                     src_lens[j++] = mbclen;
1326                     i += mbclen;
1327                   }
1328                 src_char_num = j;
1329 
1330                 memset(&cur_stat, 0, sizeof(mbstate_t));
1331                 idx = 0;
1332 
1333                 /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL}
1334                      src(i) : pointer to i-th source character.
1335                      dest(i) : pointer to i-th destination character.
1336                      NULL : terminator */
1337                 trans_pairs = MALLOC(2 * src_char_num + 1, char*);
1338                 cur_cmd->x.translatemb = trans_pairs;
1339                 for (i = 0; i < src_char_num; i++)
1340                   {
1341                     if (idx >= dest_len)
1342                       bad_prog(_(Y_CMD_LEN));
1343 
1344                     /* Set the i-th source character.  */
1345                     trans_pairs[2 * i] = MALLOC(src_lens[i] + 1, char);
1346                     strncpy(trans_pairs[2 * i], src_buf, src_lens[i]);
1347                     trans_pairs[2 * i][src_lens[i]] = '\0';
1348                     src_buf += src_lens[i]; /* Forward to next character.  */
1349 
1350                     /* Fetch the i-th destination character.  */
1351                     mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat);
1352                     /* An invalid sequence, or a truncated multibyte character.
1353                        We treat it as a singlebyte character.  */
1354                     if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1355                         || mbclen == 0)
1356                       mbclen = 1;
1357 
1358                     /* Set the i-th destination character.  */
1359                     trans_pairs[2 * i + 1] = MALLOC(mbclen + 1, char);
1360                     strncpy(trans_pairs[2 * i + 1], dest_buf + idx, mbclen);
1361                     trans_pairs[2 * i + 1][mbclen] = '\0';
1362                     idx += mbclen; /* Forward to next character.  */
1363                   }
1364                 trans_pairs[2 * i] = NULL;
1365                 if (idx != dest_len)
1366                   bad_prog(_(Y_CMD_LEN));
1367               }
1368             else
1369               {
1370 	        unsigned char *translate =
1371 		  OB_MALLOC(&obs, YMAP_LENGTH, unsigned char);
1372                 unsigned char *ustring = CAST(unsigned char *)src_buf;
1373 
1374 		if (len != dest_len)
1375                   bad_prog(_(Y_CMD_LEN));
1376 
1377 	        for (len = 0; len < YMAP_LENGTH; len++)
1378 	          translate[len] = len;
1379 
1380                 while (dest_len--)
1381                   translate[*ustring++] = (unsigned char)*dest_buf++;
1382 
1383 	        cur_cmd->x.translate = translate;
1384 	      }
1385 
1386             if ((ch = in_nonblank()) != EOF && ch != '\n' && ch != ';')
1387               bad_prog(_(EXCESS_JUNK));
1388 
1389             free_buffer(b);
1390             free_buffer(b2);
1391 	  }
1392 	break;
1393 
1394 	case EOF:
1395 	  bad_prog(_(NO_COMMAND));
1396 	  /*NOTREACHED*/
1397 
1398 	default:
1399 	  bad_command (ch);
1400 	  /*NOTREACHED*/
1401 	}
1402 
1403       /* this is buried down here so that "continue" statements will miss it */
1404       ++vector->v_length;
1405     }
1406   return vector;
1407 }
1408 
1409 
1410 /* deal with \X escapes */
1411 size_t
normalize_text(buf,len,buftype)1412 normalize_text(buf, len, buftype)
1413   char *buf;
1414   size_t len;
1415   enum text_types buftype;
1416 {
1417   const char *bufend = buf + len;
1418   char *p = buf;
1419   char *q = buf;
1420 
1421   /* This variable prevents normalizing text within bracket
1422      subexpressions when conforming to POSIX.  If 0, we
1423      are not within a bracket expression.  If -1, we are within a
1424      bracket expression but are not within [.FOO.], [=FOO=],
1425      or [:FOO:].  Otherwise, this is the '.', '=', or ':'
1426      respectively within these three types of subexpressions.  */
1427   int bracket_state = 0;
1428 
1429   int mbclen;
1430   mbstate_t cur_stat;
1431   memset(&cur_stat, 0, sizeof(mbstate_t));
1432 
1433   while (p < bufend)
1434     {
1435       int c;
1436       mbclen = MBRLEN (p, bufend - p, &cur_stat);
1437       if (mbclen != 1)
1438 	{
1439           /* An invalid sequence, or a truncated multibyte character.
1440              We treat it as a singlebyte character.  */
1441           if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
1442             mbclen = 1;
1443 
1444           memmove (q, p, mbclen);
1445           q += mbclen;
1446           p += mbclen;
1447 	  continue;
1448 	}
1449 
1450       if (*p == '\\' && p+1 < bufend && bracket_state == 0)
1451 	switch ( (c = *++p) )
1452 	  {
1453 #if defined __STDC__ && __STDC__-0
1454 	  case 'a': *q++ = '\a'; p++; continue;
1455 #else /* Not STDC; we'll just assume ASCII */
1456 	  case 'a': *q++ = '\007'; p++; continue;
1457 #endif
1458 	  /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */
1459 	  case 'f': *q++ = '\f'; p++; continue;
1460 	  case '\n': /*fall through */
1461 	  case 'n': *q++ = '\n'; p++; continue;
1462 	  case 'r': *q++ = '\r'; p++; continue;
1463 	  case 't': *q++ = '\t'; p++; continue;
1464 	  case 'v': *q++ = '\v'; p++; continue;
1465 
1466 	  case 'd': /* decimal byte */
1467 	    p = convert_number(q, p+1, bufend, 10, 3, 'd');
1468 	    q++;
1469 	    continue;
1470 
1471 	  case 'x': /* hexadecimal byte */
1472 	    p = convert_number(q, p+1, bufend, 16, 2, 'x');
1473 	    q++;
1474 	    continue;
1475 
1476 #ifdef REG_PERL
1477 	  case '0': case '1': case '2': case '3':
1478 	  case '4': case '5': case '6': case '7':
1479 	    if ((extended_regexp_flags & REG_PERL)
1480 		&& p+1 < bufend
1481 		&& p[1] >= '0' && p[1] <= '9')
1482 	      {
1483 		p = convert_number(q, p, bufend, 8, 3, *p);
1484 		q++;
1485 	      }
1486 	    else
1487 	      {
1488 		/* we just pass the \ up one level for interpretation */
1489 	        if (buftype != TEXT_BUFFER)
1490 		  *q++ = '\\';
1491 	      }
1492 
1493 	    continue;
1494 
1495 	  case 'o': /* octal byte */
1496 	    if (!(extended_regexp_flags & REG_PERL))
1497 	      {
1498 	        p = convert_number(q, p+1, bufend,  8, 3, 'o');
1499 		q++;
1500 	      }
1501 	    else
1502 	      {
1503 	        /* we just pass the \ up one level for interpretation */
1504 	        if (buftype != TEXT_BUFFER)
1505 		  *q++ = '\\';
1506 	      }
1507 
1508 	    continue;
1509 #else
1510 	  case 'o': /* octal byte */
1511 	    p = convert_number(q, p+1, bufend,  8, 3, 'o');
1512 	    q++;
1513 	    continue;
1514 #endif
1515 
1516 	  case 'c':
1517 	    if (++p < bufend)
1518 	      {
1519 		*q++ = toupper(*p) ^ 0x40;
1520 		p++;
1521 		continue;
1522 	      }
1523 	    else
1524 	      {
1525 	        /* we just pass the \ up one level for interpretation */
1526 	        if (buftype != TEXT_BUFFER)
1527 		  *q++ = '\\';
1528 	        continue;
1529 	      }
1530 
1531 	  default:
1532 	    /* we just pass the \ up one level for interpretation */
1533 	    if (buftype != TEXT_BUFFER)
1534 	      *q++ = '\\';
1535 	    break;
1536 	  }
1537       else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED)
1538         switch (*p)
1539           {
1540           case '[':
1541             if (!bracket_state)
1542               bracket_state = -1;
1543             break;
1544 
1545 	  case ':':
1546 	  case '.':
1547 	  case '=':
1548             if (bracket_state == -1 && p[-1] == '[')
1549               bracket_state = *p;
1550             break;
1551 
1552           case ']':
1553             if (bracket_state == 0)
1554 	      ;
1555             else if (bracket_state == -1)
1556               bracket_state = 0;
1557             else if (p[-2] != bracket_state && p[-1] == bracket_state)
1558               bracket_state = -1;
1559             break;
1560           }
1561 
1562       *q++ = *p++;
1563     }
1564     return (size_t)(q - buf);
1565 }
1566 
1567 
1568 /* `str' is a string (from the command line) that contains a sed command.
1569    Compile the command, and add it to the end of `cur_program'. */
1570 struct vector *
compile_string(cur_program,str,len)1571 compile_string(cur_program, str, len)
1572   struct vector *cur_program;
1573   char *str;
1574   size_t len;
1575 {
1576   static countT string_expr_count = 0;
1577   struct vector *ret;
1578 
1579   prog.file = NULL;
1580   prog.base = CAST(unsigned char *)str;
1581   prog.cur = prog.base;
1582   prog.end = prog.cur + len;
1583 
1584   cur_input.line = 0;
1585   cur_input.name = NULL;
1586   cur_input.string_expr_count = ++string_expr_count;
1587 
1588   ret = compile_program(cur_program);
1589   prog.base = NULL;
1590   prog.cur = NULL;
1591   prog.end = NULL;
1592 
1593   first_script = false;
1594   return ret;
1595 }
1596 
1597 /* `cmdfile' is the name of a file containing sed commands.
1598    Read them in and add them to the end of `cur_program'.
1599  */
1600 struct vector *
compile_file(cur_program,cmdfile)1601 compile_file(cur_program, cmdfile)
1602   struct vector *cur_program;
1603   const char *cmdfile;
1604 {
1605   struct vector *ret;
1606 
1607   prog.file = stdin;
1608   if (cmdfile[0] != '-' || cmdfile[1] != '\0')
1609     prog.file = ck_fopen(cmdfile, "rt", true);
1610 
1611   cur_input.line = 1;
1612   cur_input.name = cmdfile;
1613   cur_input.string_expr_count = 0;
1614 
1615   ret = compile_program(cur_program);
1616   if (prog.file != stdin)
1617     ck_fclose(prog.file);
1618   prog.file = NULL;
1619 
1620   first_script = false;
1621   return ret;
1622 }
1623 
1624 /* Make any checks which require the whole program to have been read.
1625    In particular: this backpatches the jump targets.
1626    Any cleanup which can be done after these checks is done here also.  */
1627 void
check_final_program(program)1628 check_final_program(program)
1629   struct vector *program;
1630 {
1631   struct sed_label *go;
1632   struct sed_label *lbl;
1633 
1634   /* do all "{"s have a corresponding "}"? */
1635   if (blocks)
1636     {
1637       /* update info for error reporting: */
1638       MEMCPY(&cur_input, &blocks->err_info, sizeof (cur_input));
1639       bad_prog(_(EXCESS_OPEN_BRACE));
1640     }
1641 
1642   /* was the final command an unterminated a/c/i command? */
1643   if (pending_text)
1644     {
1645       old_text_buf->text_length = size_buffer(pending_text);
1646       if (old_text_buf->text_length)
1647         old_text_buf->text = MEMDUP(get_buffer(pending_text),
1648 				    old_text_buf->text_length, char);
1649       free_buffer(pending_text);
1650       pending_text = NULL;
1651     }
1652 
1653   for (go = jumps; go; go = release_label(go))
1654     {
1655       for (lbl = labels; lbl; lbl = lbl->next)
1656 	if (strcmp(lbl->name, go->name) == 0)
1657 	  break;
1658       if (lbl)
1659 	{
1660 	  program->v[go->v_index].x.jump_index = lbl->v_index;
1661 	}
1662       else
1663 	{
1664 	  if (*go->name)
1665 	    panic(_("can't find label for jump to `%s'"), go->name);
1666 	  program->v[go->v_index].x.jump_index = program->v_length;
1667 	}
1668     }
1669   jumps = NULL;
1670 
1671   for (lbl = labels; lbl; lbl = release_label(lbl))
1672     ;
1673   labels = NULL;
1674 
1675   /* There is no longer a need to track file names: */
1676   {
1677     struct output *p;
1678 
1679     for (p=file_read; p; p=p->link)
1680       if (p->name)
1681 	{
1682 	  FREE(p->name);
1683 	  p->name = NULL;
1684 	}
1685 
1686     for (p=file_write; p; p=p->link)
1687       if (p->name)
1688 	{
1689 	  FREE(p->name);
1690 	  p->name = NULL;
1691 	}
1692   }
1693 }
1694 
1695 /* Rewind all resources which were allocated in this module. */
1696 void
rewind_read_files()1697 rewind_read_files()
1698 {
1699   struct output *p;
1700 
1701   for (p=file_read; p; p=p->link)
1702     if (p->fp)
1703       rewind(p->fp);
1704 }
1705 
1706 /* Release all resources which were allocated in this module. */
1707 void
finish_program(program)1708 finish_program(program)
1709   struct vector *program;
1710 {
1711   /* close all files... */
1712   {
1713     struct output *p, *q;
1714 
1715     for (p=file_read; p; p=q)
1716       {
1717 	if (p->fp)
1718 	  ck_fclose(p->fp);
1719 	q = p->link;
1720 #if 0
1721 	/* We use obstacks. */
1722 	FREE(p);
1723 #endif
1724       }
1725 
1726     for (p=file_write; p; p=q)
1727       {
1728 	if (p->fp)
1729 	  ck_fclose(p->fp);
1730 	q = p->link;
1731 #if 0
1732 	/* We use obstacks. */
1733 	FREE(p);
1734 #endif
1735       }
1736     file_read = file_write = NULL;
1737   }
1738 
1739 #ifdef DEBUG_LEAKS
1740   obstack_free (&obs, NULL);
1741 #endif /*DEBUG_LEAKS*/
1742 }
1743