1 /*
2  * testRegexp.c: simple module for testing regular expressions
3  *
4  * See Copyright for the status of this software.
5  *
6  * Daniel Veillard <veillard@redhat.com>
7  */
8 
9 #include "libxml.h"
10 #ifdef LIBXML_REGEXP_ENABLED
11 #include <string.h>
12 
13 #include <libxml/tree.h>
14 #include <libxml/xmlregexp.h>
15 
16 static int repeat = 0;
17 static int debug = 0;
18 
testRegexp(xmlRegexpPtr comp,const char * value)19 static void testRegexp(xmlRegexpPtr comp, const char *value) {
20     int ret;
21 
22     ret = xmlRegexpExec(comp, (const xmlChar *) value);
23     if (ret == 1)
24 	printf("%s: Ok\n", value);
25     else if (ret == 0)
26 	printf("%s: Fail\n", value);
27     else
28 	printf("%s: Error: %d\n", value, ret);
29     if (repeat) {
30 	int j;
31 	for (j = 0;j < 999999;j++)
32 	    xmlRegexpExec(comp, (const xmlChar *) value);
33     }
34 }
35 
36 static void
testRegexpFile(const char * filename)37 testRegexpFile(const char *filename) {
38     xmlRegexpPtr comp = NULL;
39     FILE *input;
40     char expression[5000];
41     int len;
42 
43     input = fopen(filename, "r");
44     if (input == NULL) {
45         xmlGenericError(xmlGenericErrorContext,
46 		"Cannot open %s for reading\n", filename);
47 	return;
48     }
49     while (fgets(expression, 4500, input) != NULL) {
50 	len = strlen(expression);
51 	len--;
52 	while ((len >= 0) &&
53 	       ((expression[len] == '\n') || (expression[len] == '\t') ||
54 		(expression[len] == '\r') || (expression[len] == ' '))) len--;
55 	expression[len + 1] = 0;
56 	if (len >= 0) {
57 	    if (expression[0] == '#')
58 		continue;
59 	    if ((expression[0] == '=') && (expression[1] == '>')) {
60 		char *pattern = &expression[2];
61 
62 		if (comp != NULL) {
63 		    xmlRegFreeRegexp(comp);
64 		    comp = NULL;
65 		}
66 		printf("Regexp: %s\n", pattern) ;
67 		comp = xmlRegexpCompile((const xmlChar *) pattern);
68 		if (comp == NULL) {
69 		    printf("   failed to compile\n");
70 		    break;
71 		}
72 	    } else if (comp == NULL) {
73 		printf("Regexp: %s\n", expression) ;
74 		comp = xmlRegexpCompile((const xmlChar *) expression);
75 		if (comp == NULL) {
76 		    printf("   failed to compile\n");
77 		    break;
78 		}
79 	    } else if (comp != NULL) {
80 		testRegexp(comp, expression);
81 	    }
82 	}
83     }
84     fclose(input);
85     if (comp != NULL)
86 	xmlRegFreeRegexp(comp);
87 }
88 
89 #ifdef LIBXML_EXPR_ENABLED
90 static void
runFileTest(xmlExpCtxtPtr ctxt,const char * filename)91 runFileTest(xmlExpCtxtPtr ctxt, const char *filename) {
92     xmlExpNodePtr expr = NULL, sub;
93     FILE *input;
94     char expression[5000];
95     int len;
96 
97     input = fopen(filename, "r");
98     if (input == NULL) {
99         xmlGenericError(xmlGenericErrorContext,
100 		"Cannot open %s for reading\n", filename);
101 	return;
102     }
103     while (fgets(expression, 4500, input) != NULL) {
104 	len = strlen(expression);
105 	len--;
106 	while ((len >= 0) &&
107 	       ((expression[len] == '\n') || (expression[len] == '\t') ||
108 		(expression[len] == '\r') || (expression[len] == ' '))) len--;
109 	expression[len + 1] = 0;
110 	if (len >= 0) {
111 	    if (expression[0] == '#')
112 		continue;
113 	    if ((expression[0] == '=') && (expression[1] == '>')) {
114 		char *str = &expression[2];
115 
116 		if (expr != NULL) {
117 		    xmlExpFree(ctxt, expr);
118 		    if (xmlExpCtxtNbNodes(ctxt) != 0)
119 		        printf(" Parse/free of Expression leaked %d\n",
120 			       xmlExpCtxtNbNodes(ctxt));
121 		    expr = NULL;
122 		}
123 		printf("Expression: %s\n", str) ;
124 		expr = xmlExpParse(ctxt, str);
125 		if (expr == NULL) {
126 		    printf("   parsing Failed\n");
127 		    break;
128 		}
129 	    } else if (expr != NULL) {
130 	        int expect = -1;
131 		int nodes1, nodes2;
132 
133 		if (expression[0] == '0')
134 		    expect = 0;
135 		if (expression[0] == '1')
136 		    expect = 1;
137 		printf("Subexp: %s", expression + 2) ;
138 		nodes1 = xmlExpCtxtNbNodes(ctxt);
139 		sub = xmlExpParse(ctxt, expression + 2);
140 		if (sub == NULL) {
141 		    printf("   parsing Failed\n");
142 		    break;
143 		} else {
144 		    int ret;
145 
146 		    nodes2 = xmlExpCtxtNbNodes(ctxt);
147 		    ret = xmlExpSubsume(ctxt, expr, sub);
148 
149 		    if ((expect == 1) && (ret == 1)) {
150 			printf(" => accept, Ok\n");
151 		    } else if ((expect == 0) && (ret == 0)) {
152 		        printf(" => reject, Ok\n");
153 		    } else if ((expect == 1) && (ret == 0)) {
154 			printf(" => reject, Failed\n");
155 		    } else if ((expect == 0) && (ret == 1)) {
156 			printf(" => accept, Failed\n");
157 		    } else {
158 		        printf(" => fail internally\n");
159 		    }
160 		    if (xmlExpCtxtNbNodes(ctxt) > nodes2) {
161 		        printf(" Subsume leaked %d\n",
162 			       xmlExpCtxtNbNodes(ctxt) - nodes2);
163 			nodes1 += xmlExpCtxtNbNodes(ctxt) - nodes2;
164 		    }
165 		    xmlExpFree(ctxt, sub);
166 		    if (xmlExpCtxtNbNodes(ctxt) > nodes1) {
167 		        printf(" Parse/free leaked %d\n",
168 			       xmlExpCtxtNbNodes(ctxt) - nodes1);
169 		    }
170 		}
171 
172 	    }
173 	}
174     }
175     if (expr != NULL) {
176 	xmlExpFree(ctxt, expr);
177 	if (xmlExpCtxtNbNodes(ctxt) != 0)
178 	    printf(" Parse/free of Expression leaked %d\n",
179 		   xmlExpCtxtNbNodes(ctxt));
180     }
181     fclose(input);
182 }
183 
184 static void
testReduce(xmlExpCtxtPtr ctxt,xmlExpNodePtr expr,const char * tst)185 testReduce(xmlExpCtxtPtr ctxt, xmlExpNodePtr expr, const char *tst) {
186     xmlBufferPtr xmlExpBuf;
187     xmlExpNodePtr sub, deriv;
188     xmlExpBuf = xmlBufferCreate();
189 
190     sub = xmlExpParse(ctxt, tst);
191     if (sub == NULL) {
192         printf("Subset %s failed to parse\n", tst);
193 	return;
194     }
195     xmlExpDump(xmlExpBuf, sub);
196     printf("Subset parsed as: %s\n",
197            (const char *) xmlBufferContent(xmlExpBuf));
198     deriv = xmlExpExpDerive(ctxt, expr, sub);
199     if (deriv == NULL) {
200         printf("Derivation led to an internal error, report this !\n");
201 	return;
202     } else {
203         xmlBufferEmpty(xmlExpBuf);
204 	xmlExpDump(xmlExpBuf, deriv);
205 	if (xmlExpIsNillable(deriv))
206 	    printf("Resulting nillable derivation: %s\n",
207 	           (const char *) xmlBufferContent(xmlExpBuf));
208 	else
209 	    printf("Resulting derivation: %s\n",
210 	           (const char *) xmlBufferContent(xmlExpBuf));
211 	xmlExpFree(ctxt, deriv);
212     }
213     xmlExpFree(ctxt, sub);
214 }
215 
216 static void
exprDebug(xmlExpCtxtPtr ctxt,xmlExpNodePtr expr)217 exprDebug(xmlExpCtxtPtr ctxt, xmlExpNodePtr expr) {
218     xmlBufferPtr xmlExpBuf;
219     xmlExpNodePtr deriv;
220     const char *list[40];
221     int ret;
222 
223     xmlExpBuf = xmlBufferCreate();
224 
225     if (expr == NULL) {
226         printf("Failed to parse\n");
227 	return;
228     }
229     xmlExpDump(xmlExpBuf, expr);
230     printf("Parsed as: %s\n", (const char *) xmlBufferContent(xmlExpBuf));
231     printf("Max token input = %d\n", xmlExpMaxToken(expr));
232     if (xmlExpIsNillable(expr) == 1)
233 	printf("Is nillable\n");
234     ret = xmlExpGetLanguage(ctxt, expr, (const xmlChar **) &list[0], 40);
235     if (ret < 0)
236 	printf("Failed to get list: %d\n", ret);
237     else {
238 	int i;
239 
240 	printf("Language has %d strings, testing string derivations\n", ret);
241 	for (i = 0;i < ret;i++) {
242 	    deriv = xmlExpStringDerive(ctxt, expr, BAD_CAST list[i], -1);
243 	    if (deriv == NULL) {
244 		printf("  %s -> derivation failed\n", list[i]);
245 	    } else {
246 		xmlBufferEmpty(xmlExpBuf);
247 		xmlExpDump(xmlExpBuf, deriv);
248 		printf("  %s -> %s\n", list[i],
249 		       (const char *) xmlBufferContent(xmlExpBuf));
250 	    }
251 	    xmlExpFree(ctxt, deriv);
252 	}
253     }
254     xmlBufferFree(xmlExpBuf);
255 }
256 #endif
257 
usage(const char * name)258 static void usage(const char *name) {
259     fprintf(stderr, "Usage: %s [flags]\n", name);
260     fprintf(stderr, "Testing tool for libxml2 string and pattern regexps\n");
261     fprintf(stderr, "   --debug: switch on debugging\n");
262     fprintf(stderr, "   --repeat: loop on the operation\n");
263 #ifdef LIBXML_EXPR_ENABLED
264     fprintf(stderr, "   --expr: test xmlExp and not xmlRegexp\n");
265 #endif
266     fprintf(stderr, "   --input filename: use the given filename for regexp\n");
267     fprintf(stderr, "   --input filename: use the given filename for exp\n");
268 }
269 
main(int argc,char ** argv)270 int main(int argc, char **argv) {
271     xmlRegexpPtr comp = NULL;
272 #ifdef LIBXML_EXPR_ENABLED
273     xmlExpNodePtr expr = NULL;
274     int use_exp = 0;
275     xmlExpCtxtPtr ctxt = NULL;
276 #endif
277     const char *pattern = NULL;
278     char *filename = NULL;
279     int i;
280 
281     xmlInitMemory();
282 
283     if (argc <= 1) {
284 	usage(argv[0]);
285 	return(1);
286     }
287     for (i = 1; i < argc ; i++) {
288 	if (!strcmp(argv[i], "-"))
289 	    break;
290 
291 	if (argv[i][0] != '-')
292 	    continue;
293 	if (!strcmp(argv[i], "--"))
294 	    break;
295 
296 	if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) {
297 	    debug++;
298 	} else if ((!strcmp(argv[i], "-repeat")) ||
299 	         (!strcmp(argv[i], "--repeat"))) {
300 	    repeat++;
301 #ifdef LIBXML_EXPR_ENABLED
302 	} else if ((!strcmp(argv[i], "-expr")) ||
303 	         (!strcmp(argv[i], "--expr"))) {
304 	    use_exp++;
305 #endif
306 	} else if ((!strcmp(argv[i], "-i")) || (!strcmp(argv[i], "-f")) ||
307 		   (!strcmp(argv[i], "--input")))
308 	    filename = argv[++i];
309         else {
310 	    fprintf(stderr, "Unknown option %s\n", argv[i]);
311 	    usage(argv[0]);
312 	}
313     }
314 
315 #ifdef LIBXML_EXPR_ENABLED
316     if (use_exp)
317 	ctxt = xmlExpNewCtxt(0, NULL);
318 #endif
319 
320     if (filename != NULL) {
321 #ifdef LIBXML_EXPR_ENABLED
322         if (use_exp)
323 	    runFileTest(ctxt, filename);
324 	else
325 #endif
326 	    testRegexpFile(filename);
327     } else {
328         int  data = 0;
329 #ifdef LIBXML_EXPR_ENABLED
330 
331         if (use_exp) {
332 	    for (i = 1; i < argc ; i++) {
333 	        if (strcmp(argv[i], "--") == 0)
334 		    data = 1;
335 		else if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0) ||
336 		    (data == 1)) {
337 		    if (pattern == NULL) {
338 			pattern = argv[i];
339 			printf("Testing expr %s:\n", pattern);
340 			expr = xmlExpParse(ctxt, pattern);
341 			if (expr == NULL) {
342 			    printf("   failed to compile\n");
343 			    break;
344 			}
345 			if (debug) {
346 			    exprDebug(ctxt, expr);
347 			}
348 		    } else {
349 			testReduce(ctxt, expr, argv[i]);
350 		    }
351 		}
352 	    }
353 	    if (expr != NULL) {
354 		xmlExpFree(ctxt, expr);
355 		expr = NULL;
356 	    }
357 	} else
358 #endif
359         {
360 	    for (i = 1; i < argc ; i++) {
361 	        if (strcmp(argv[i], "--") == 0)
362 		    data = 1;
363 		else if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0) ||
364 		         (data == 1)) {
365 		    if (pattern == NULL) {
366 			pattern = argv[i];
367 			printf("Testing %s:\n", pattern);
368 			comp = xmlRegexpCompile((const xmlChar *) pattern);
369 			if (comp == NULL) {
370 			    printf("   failed to compile\n");
371 			    break;
372 			}
373 			if (debug)
374 			    xmlRegexpPrint(stdout, comp);
375 		    } else {
376 			testRegexp(comp, argv[i]);
377 		    }
378 		}
379 	    }
380 	    if (comp != NULL)
381 		xmlRegFreeRegexp(comp);
382         }
383     }
384 #ifdef LIBXML_EXPR_ENABLED
385     if (ctxt != NULL) {
386 	printf("Ops: %d nodes, %d cons\n",
387 	       xmlExpCtxtNbNodes(ctxt), xmlExpCtxtNbCons(ctxt));
388 	xmlExpFreeCtxt(ctxt);
389     }
390 #endif
391     xmlCleanupParser();
392     xmlMemoryDump();
393     return(0);
394 }
395 
396 #else
397 #include <stdio.h>
main(int argc ATTRIBUTE_UNUSED,char ** argv ATTRIBUTE_UNUSED)398 int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
399     printf("%s : Regexp support not compiled in\n", argv[0]);
400     return(0);
401 }
402 #endif /* LIBXML_REGEXP_ENABLED */
403