1 #include <stdio.h>
2 #include <string.h>
3 #include <sys/types.h>
4 #include <regex.h>
5 #include <assert.h>
6 
7 #include "main.ih"
8 
9 char *progname;
10 int debug = 0;
11 int line = 0;
12 int status = 0;
13 
14 int copts = REG_EXTENDED;
15 int eopts = 0;
16 regoff_t startoff = 0;
17 regoff_t endoff = 0;
18 
19 
20 extern int split();
21 extern void regprint();
22 
23 /*
24  - main - do the simple case, hand off to regress() for regression
25  */
main(argc,argv)26 main(argc, argv)
27 int argc;
28 char *argv[];
29 {
30 	regex_t re;
31 #	define	NS	10
32 	regmatch_t subs[NS];
33 	char erbuf[100];
34 	int err;
35 	size_t len;
36 	int c;
37 	int errflg = 0;
38 	register int i;
39 	extern int optind;
40 	extern char *optarg;
41 
42 	progname = argv[0];
43 
44 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
45 		switch (c) {
46 		case 'c':	/* compile options */
47 			copts = options('c', optarg);
48 			break;
49 		case 'e':	/* execute options */
50 			eopts = options('e', optarg);
51 			break;
52 		case 'S':	/* start offset */
53 			startoff = (regoff_t)atoi(optarg);
54 			break;
55 		case 'E':	/* end offset */
56 			endoff = (regoff_t)atoi(optarg);
57 			break;
58 		case 'x':	/* Debugging. */
59 			debug++;
60 			break;
61 		case '?':
62 		default:
63 			errflg++;
64 			break;
65 		}
66 	if (errflg) {
67 		fprintf(stderr, "usage: %s ", progname);
68 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
69 		exit(2);
70 	}
71 
72 	if (optind >= argc) {
73 		regress(stdin);
74 		exit(status);
75 	}
76 
77 	err = regcomp(&re, argv[optind++], copts);
78 	if (err) {
79 		len = regerror(err, &re, erbuf, sizeof(erbuf));
80 		fprintf(stderr, "error %s, %d/%d `%s'\n",
81 			eprint(err), len, sizeof(erbuf), erbuf);
82 		exit(status);
83 	}
84 	regprint(&re, stdout);
85 
86 	if (optind >= argc) {
87 		regfree(&re);
88 		exit(status);
89 	}
90 
91 	if (eopts&REG_STARTEND) {
92 		subs[0].rm_so = startoff;
93 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
94 	}
95 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
96 	if (err) {
97 		len = regerror(err, &re, erbuf, sizeof(erbuf));
98 		fprintf(stderr, "error %s, %d/%d `%s'\n",
99 			eprint(err), len, sizeof(erbuf), erbuf);
100 		exit(status);
101 	}
102 	if (!(copts&REG_NOSUB)) {
103 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
104 		if (subs[0].rm_so != -1) {
105 			if (len != 0)
106 				printf("match `%.*s'\n", len,
107 					argv[optind] + subs[0].rm_so);
108 			else
109 				printf("match `'@%.1s\n",
110 					argv[optind] + subs[0].rm_so);
111 		}
112 		for (i = 1; i < NS; i++)
113 			if (subs[i].rm_so != -1)
114 				printf("(%d) `%.*s'\n", i,
115 					(int)(subs[i].rm_eo - subs[i].rm_so),
116 					argv[optind] + subs[i].rm_so);
117 	}
118 	exit(status);
119 }
120 
121 /*
122  - regress - main loop of regression test
123  == void regress(FILE *in);
124  */
125 void
regress(in)126 regress(in)
127 FILE *in;
128 {
129 	char inbuf[1000];
130 #	define	MAXF	10
131 	char *f[MAXF];
132 	int nf;
133 	int i;
134 	char erbuf[100];
135 	size_t ne;
136 	char *badpat = "invalid regular expression";
137 #	define	SHORT	10
138 	char *bpname = "REG_BADPAT";
139 	regex_t re;
140 
141 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
142 		line++;
143 		if (inbuf[0] == '#' || inbuf[0] == '\n')
144 			continue;			/* NOTE CONTINUE */
145 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
146 		if (debug)
147 			fprintf(stdout, "%d:\n", line);
148 		nf = split(inbuf, f, MAXF, "\t\t");
149 		if (nf < 3) {
150 			fprintf(stderr, "bad input, line %d\n", line);
151 			exit(1);
152 		}
153 		for (i = 0; i < nf; i++)
154 			if (strcmp(f[i], "\"\"") == 0)
155 				f[i] = "";
156 		if (nf <= 3)
157 			f[3] = NULL;
158 		if (nf <= 4)
159 			f[4] = NULL;
160 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
161 		if (opt('&', f[1]))	/* try with either type of RE */
162 			try(f[0], f[1], f[2], f[3], f[4],
163 					options('c', f[1]) &~ REG_EXTENDED);
164 	}
165 
166 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
167 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
168 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
169 							erbuf, badpat);
170 		status = 1;
171 	}
172 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
173 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
174 						ne != strlen(badpat)+1) {
175 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
176 						erbuf, SHORT-1, badpat);
177 		status = 1;
178 	}
179 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
180 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
181 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
182 						erbuf, bpname);
183 		status = 1;
184 	}
185 	re.re_endp = bpname;
186 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
187 	if (atoi(erbuf) != (int)REG_BADPAT) {
188 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
189 						erbuf, (long)REG_BADPAT);
190 		status = 1;
191 	} else if (ne != strlen(erbuf)+1) {
192 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
193 						erbuf, (long)REG_BADPAT);
194 		status = 1;
195 	}
196 }
197 
198 /*
199  - try - try it, and report on problems
200  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
201  */
202 void
try(f0,f1,f2,f3,f4,opts)203 try(f0, f1, f2, f3, f4, opts)
204 char *f0;
205 char *f1;
206 char *f2;
207 char *f3;
208 char *f4;
209 int opts;			/* may not match f1 */
210 {
211 	regex_t re;
212 #	define	NSUBS	10
213 	regmatch_t subs[NSUBS];
214 #	define	NSHOULD	15
215 	char *should[NSHOULD];
216 	int nshould;
217 	char erbuf[100];
218 	int err;
219 	int len;
220 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
221 	register int i;
222 	char *grump;
223 	char f0copy[1000];
224 	char f2copy[1000];
225 
226 	strcpy(f0copy, f0);
227 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
228 	fixstr(f0copy);
229 	err = regcomp(&re, f0copy, opts);
230 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
231 		/* unexpected error or wrong error */
232 		len = regerror(err, &re, erbuf, sizeof(erbuf));
233 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
234 					line, type, eprint(err), len,
235 					sizeof(erbuf), erbuf);
236 		status = 1;
237 	} else if (err == 0 && opt('C', f1)) {
238 		/* unexpected success */
239 		fprintf(stderr, "%d: %s should have given REG_%s\n",
240 						line, type, f2);
241 		status = 1;
242 		err = 1;	/* so we won't try regexec */
243 	}
244 
245 	if (err != 0) {
246 		regfree(&re);
247 		return;
248 	}
249 
250 	strcpy(f2copy, f2);
251 	fixstr(f2copy);
252 
253 	if (options('e', f1)&REG_STARTEND) {
254 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
255 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
256 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
257 		subs[0].rm_eo = strchr(f2, ')') - f2;
258 	}
259 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
260 
261 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
262 		/* unexpected error or wrong error */
263 		len = regerror(err, &re, erbuf, sizeof(erbuf));
264 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
265 					line, type, eprint(err), len,
266 					sizeof(erbuf), erbuf);
267 		status = 1;
268 	} else if (err != 0) {
269 		/* nothing more to check */
270 	} else if (f3 == NULL) {
271 		/* unexpected success */
272 		fprintf(stderr, "%d: %s exec should have failed\n",
273 						line, type);
274 		status = 1;
275 		err = 1;		/* just on principle */
276 	} else if (opts&REG_NOSUB) {
277 		/* nothing more to check */
278 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
279 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
280 		status = 1;
281 		err = 1;
282 	}
283 
284 	if (err != 0 || f4 == NULL) {
285 		regfree(&re);
286 		return;
287 	}
288 
289 	for (i = 1; i < NSHOULD; i++)
290 		should[i] = NULL;
291 	nshould = split(f4, should+1, NSHOULD-1, ",");
292 	if (nshould == 0) {
293 		nshould = 1;
294 		should[1] = "";
295 	}
296 	for (i = 1; i < NSUBS; i++) {
297 		grump = check(f2, subs[i], should[i]);
298 		if (grump != NULL) {
299 			fprintf(stderr, "%d: %s $%d %s\n", line,
300 							type, i, grump);
301 			status = 1;
302 			err = 1;
303 		}
304 	}
305 
306 	regfree(&re);
307 }
308 
309 /*
310  - options - pick options out of a regression-test string
311  == int options(int type, char *s);
312  */
313 int
options(type,s)314 options(type, s)
315 int type;			/* 'c' compile, 'e' exec */
316 char *s;
317 {
318 	register char *p;
319 	register int o = (type == 'c') ? copts : eopts;
320 	register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
321 
322 	for (p = s; *p != '\0'; p++)
323 		if (strchr(legal, *p) != NULL)
324 			switch (*p) {
325 			case 'b':
326 				o &= ~REG_EXTENDED;
327 				break;
328 			case 'i':
329 				o |= REG_ICASE;
330 				break;
331 			case 's':
332 				o |= REG_NOSUB;
333 				break;
334 			case 'n':
335 				o |= REG_NEWLINE;
336 				break;
337 			case 'm':
338 				o &= ~REG_EXTENDED;
339 				o |= REG_NOSPEC;
340 				break;
341 			case 'p':
342 				o |= REG_PEND;
343 				break;
344 			case '^':
345 				o |= REG_NOTBOL;
346 				break;
347 			case '$':
348 				o |= REG_NOTEOL;
349 				break;
350 			case '#':
351 				o |= REG_STARTEND;
352 				break;
353 			case 't':	/* trace */
354 				o |= REG_TRACE;
355 				break;
356 			case 'l':	/* force long representation */
357 				o |= REG_LARGE;
358 				break;
359 			case 'r':	/* force backref use */
360 				o |= REG_BACKR;
361 				break;
362 			}
363 	return(o);
364 }
365 
366 /*
367  - opt - is a particular option in a regression string?
368  == int opt(int c, char *s);
369  */
370 int				/* predicate */
opt(c,s)371 opt(c, s)
372 int c;
373 char *s;
374 {
375 	return(strchr(s, c) != NULL);
376 }
377 
378 /*
379  - fixstr - transform magic characters in strings
380  == void fixstr(register char *p);
381  */
382 void
fixstr(p)383 fixstr(p)
384 register char *p;
385 {
386 	if (p == NULL)
387 		return;
388 
389 	for (; *p != '\0'; p++)
390 		if (*p == 'N')
391 			*p = '\n';
392 		else if (*p == 'T')
393 			*p = '\t';
394 		else if (*p == 'S')
395 			*p = ' ';
396 		else if (*p == 'Z')
397 			*p = '\0';
398 }
399 
400 /*
401  - check - check a substring match
402  == char *check(char *str, regmatch_t sub, char *should);
403  */
404 char *				/* NULL or complaint */
check(str,sub,should)405 check(str, sub, should)
406 char *str;
407 regmatch_t sub;
408 char *should;
409 {
410 	register int len;
411 	register int shlen;
412 	register char *p;
413 	static char grump[500];
414 	register char *at = NULL;
415 
416 	if (should != NULL && strcmp(should, "-") == 0)
417 		should = NULL;
418 	if (should != NULL && should[0] == '@') {
419 		at = should + 1;
420 		should = "";
421 	}
422 
423 	/* check rm_so and rm_eo for consistency */
424 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
425 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
426 				(sub.rm_so != -1 && sub.rm_so < 0) ||
427 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
428 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
429 							(long)sub.rm_eo);
430 		return(grump);
431 	}
432 
433 	/* check for no match */
434 	if (sub.rm_so == -1 && should == NULL)
435 		return(NULL);
436 	if (sub.rm_so == -1)
437 		return("did not match");
438 
439 	/* check for in range */
440 	if (sub.rm_eo > strlen(str)) {
441 		sprintf(grump, "start %ld end %ld, past end of string",
442 					(long)sub.rm_so, (long)sub.rm_eo);
443 		return(grump);
444 	}
445 
446 	len = (int)(sub.rm_eo - sub.rm_so);
447 	shlen = (int)strlen(should);
448 	p = str + sub.rm_so;
449 
450 	/* check for not supposed to match */
451 	if (should == NULL) {
452 		sprintf(grump, "matched `%.*s'", len, p);
453 		return(grump);
454 	}
455 
456 	/* check for wrong match */
457 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
458 		sprintf(grump, "matched `%.*s' instead", len, p);
459 		return(grump);
460 	}
461 	if (shlen > 0)
462 		return(NULL);
463 
464 	/* check null match in right place */
465 	if (at == NULL)
466 		return(NULL);
467 	shlen = strlen(at);
468 	if (shlen == 0)
469 		shlen = 1;	/* force check for end-of-string */
470 	if (strncmp(p, at, shlen) != 0) {
471 		sprintf(grump, "matched null at `%.20s'", p);
472 		return(grump);
473 	}
474 	return(NULL);
475 }
476 
477 /*
478  - eprint - convert error number to name
479  == static char *eprint(int err);
480  */
481 static char *
eprint(err)482 eprint(err)
483 int err;
484 {
485 	static char epbuf[100];
486 	size_t len;
487 
488 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
489 	assert(len <= sizeof(epbuf));
490 	return(epbuf);
491 }
492 
493 /*
494  - efind - convert error name to number
495  == static int efind(char *name);
496  */
497 static int
efind(name)498 efind(name)
499 char *name;
500 {
501 	static char efbuf[100];
502 	size_t n;
503 	regex_t re;
504 
505 	sprintf(efbuf, "REG_%s", name);
506 	assert(strlen(efbuf) < sizeof(efbuf));
507 	re.re_endp = efbuf;
508 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
509 	return(atoi(efbuf));
510 }
511