1 /*  GNU SED, a batch stream editor.
2     Copyright (C) 1999, 2002, 2003, 2004, 2005, 2006
3     Free Software Foundation, Inc.
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 3, or (at your option)
8     any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 
19 #include "sed.h"
20 
21 #include <ctype.h>
22 #include <string.h>
23 #include <stdio.h>
24 #ifdef HAVE_STDLIB_H
25 # include <stdlib.h>
26 #endif
27 
28 #ifdef gettext_noop
29 # define N_(String) gettext_noop(String)
30 #else
31 # define N_(String) (String)
32 #endif
33 
34 extern bool use_extended_syntax_p;
35 
36 static const char errors[] =
37   "no previous regular expression\0"
38   "cannot specify modifiers on empty regexp";
39 
40 #define NO_REGEX (errors)
41 #define BAD_MODIF (NO_REGEX + sizeof(N_("no previous regular expression")))
42 #define END_ERRORS (BAD_MODIF + sizeof(N_("cannot specify modifiers on empty regexp")))
43 
44 
45 
46 static void
compile_regex_1(new_regex,needed_sub)47 compile_regex_1 (new_regex, needed_sub)
48   struct regex *new_regex;
49   int needed_sub;
50 {
51 #ifdef REG_PERL
52   int errcode;
53   errcode = regncomp(&new_regex->pattern, new_regex->re, new_regex->sz,
54 		     (needed_sub ? 0 : REG_NOSUB)
55 		     | new_regex->flags
56 		     | extended_regexp_flags);
57 
58   if (errcode)
59     {
60       char errorbuf[200];
61       regerror(errcode, NULL, errorbuf, 200);
62       bad_prog(gettext(errorbuf));
63     }
64 #else
65   const char *error;
66   int syntax = ((extended_regexp_flags & REG_EXTENDED)
67 		 ? RE_SYNTAX_POSIX_EXTENDED
68                  : RE_SYNTAX_POSIX_BASIC);
69 
70   syntax &= ~RE_DOT_NOT_NULL;
71   syntax |= RE_NO_POSIX_BACKTRACKING;
72 
73   switch (posixicity)
74     {
75     case POSIXLY_EXTENDED:
76       syntax &= ~RE_UNMATCHED_RIGHT_PAREN_ORD;
77       break;
78     case POSIXLY_CORRECT:
79       syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD;
80       break;
81     case POSIXLY_BASIC:
82       syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD | RE_LIMITED_OPS | RE_NO_GNU_OPS;
83       break;
84     }
85 
86 #ifdef RE_ICASE
87   syntax |= (new_regex->flags & REG_ICASE) ? RE_ICASE : 0;
88 #endif
89 #ifdef RE_NO_SUB
90   syntax |= needed_sub ? 0 : RE_NO_SUB;
91 #endif
92 
93   new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8));
94 
95   /* If REG_NEWLINE is set, newlines are treated differently.  */
96   if (new_regex->flags & REG_NEWLINE)
97     {
98       /* REG_NEWLINE implies neither . nor [^...] match newline.  */
99       syntax &= ~RE_DOT_NEWLINE;
100       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
101     }
102 
103   re_set_syntax (syntax);
104   error = re_compile_pattern (new_regex->re, new_regex->sz,
105 			      &new_regex->pattern);
106   new_regex->pattern.newline_anchor = (new_regex->flags & REG_NEWLINE) != 0;
107 
108   new_regex->pattern.translate = NULL;
109 #ifndef RE_ICASE
110   if (new_regex->flags & REG_ICASE)
111     {
112       static char translate[1 << (sizeof(char) * 8)];
113       int i;
114       for (i = 0; i < sizeof(translate) / sizeof(char); i++)
115 	translate[i] = tolower (i);
116 
117       new_regex->pattern.translate = translate;
118     }
119 #endif
120 
121   if (error)
122     bad_prog(error);
123 #endif
124 
125   /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */
126   if (needed_sub
127       && new_regex->pattern.re_nsub < needed_sub - 1
128       && posixicity == POSIXLY_EXTENDED)
129     {
130       char buf[200];
131       sprintf(buf, _("invalid reference \\%d on `s' command's RHS"),
132 	      needed_sub - 1);
133       bad_prog(buf);
134     }
135 }
136 
137 struct regex *
compile_regex(b,flags,needed_sub)138 compile_regex(b, flags, needed_sub)
139   struct buffer *b;
140   int flags;
141   int needed_sub;
142 {
143   struct regex *new_regex;
144   size_t re_len;
145 
146   /* // matches the last RE */
147   if (size_buffer(b) == 0)
148     {
149       if (flags > 0)
150 	bad_prog(_(BAD_MODIF));
151       return NULL;
152     }
153 
154   re_len = size_buffer(b);
155   new_regex = ck_malloc(sizeof (struct regex) + re_len - 1);
156   new_regex->flags = flags;
157   memcpy (new_regex->re, get_buffer(b), re_len);
158 
159 #ifdef REG_PERL
160   new_regex->sz = re_len;
161 #else
162   /* GNU regex does not process \t & co. */
163   new_regex->sz = normalize_text(new_regex->re, re_len, TEXT_REGEX);
164 #endif
165 
166   compile_regex_1 (new_regex, needed_sub);
167   return new_regex;
168 }
169 
170 #ifdef REG_PERL
171 static void
copy_regs(regs,pmatch,nregs)172 copy_regs (regs, pmatch, nregs)
173      struct re_registers *regs;
174      regmatch_t *pmatch;
175      int nregs;
176 {
177   int i;
178   int need_regs = nregs + 1;
179   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
180      uses.  */
181 
182   /* Have the register data arrays been allocated?  */
183   if (!regs->start)
184     { /* No.  So allocate them with malloc.  */
185       regs->start = MALLOC (need_regs, regoff_t);
186       regs->end = MALLOC (need_regs, regoff_t);
187       regs->num_regs = need_regs;
188     }
189   else if (need_regs > regs->num_regs)
190     { /* Yes.  We also need more elements than were already
191          allocated, so reallocate them.  */
192       regs->start = REALLOC (regs->start, need_regs, regoff_t);
193       regs->end = REALLOC (regs->end, need_regs, regoff_t);
194       regs->num_regs = need_regs;
195     }
196 
197   /* Copy the regs.  */
198   for (i = 0; i < nregs; ++i)
199     {
200       regs->start[i] = pmatch[i].rm_so;
201       regs->end[i] = pmatch[i].rm_eo;
202     }
203   for ( ; i < regs->num_regs; ++i)
204     regs->start[i] = regs->end[i] = -1;
205 }
206 #endif
207 
208 int
match_regex(regex,buf,buflen,buf_start_offset,regarray,regsize)209 match_regex(regex, buf, buflen, buf_start_offset, regarray, regsize)
210   struct regex *regex;
211   char *buf;
212   size_t buflen;
213   size_t buf_start_offset;
214   struct re_registers *regarray;
215   int regsize;
216 {
217   int ret;
218   static struct regex *regex_last;
219 #ifdef REG_PERL
220   regmatch_t rm[10], *regmatch = rm;
221   if (regsize > 10)
222     regmatch = (regmatch_t *) alloca (sizeof (regmatch_t) * regsize);
223 #endif
224 
225   /* printf ("Matching from %d/%d\n", buf_start_offset, buflen); */
226 
227   /* Keep track of the last regexp matched. */
228   if (!regex)
229     {
230       regex = regex_last;
231       if (!regex_last)
232 	bad_prog(_(NO_REGEX));
233     }
234   else
235     regex_last = regex;
236 
237 #ifdef REG_PERL
238   regmatch[0].rm_so = CAST(int)buf_start_offset;
239   regmatch[0].rm_eo = CAST(int)buflen;
240   ret = regexec (&regex->pattern, buf, regsize, regmatch, REG_STARTEND);
241 
242   if (regsize)
243     copy_regs (regarray, regmatch, regsize);
244 
245   return (ret == 0);
246 #else
247   if (regex->pattern.no_sub && regsize)
248     compile_regex_1 (regex, regsize);
249 
250   regex->pattern.regs_allocated = REGS_REALLOCATE;
251 
252   ret = re_search (&regex->pattern, buf, buflen, buf_start_offset,
253 		   buflen - buf_start_offset,
254 		   regsize ? regarray : NULL);
255 
256   return (ret > -1);
257 #endif
258 }
259 
260 
261 #ifdef DEBUG_LEAKS
262 void
release_regex(regex)263 release_regex(regex)
264   struct regex *regex;
265 {
266   regfree(&regex->pattern);
267   FREE(regex);
268 }
269 #endif /*DEBUG_LEAKS*/
270