1 /* 2 * testRegexp.c: simple module for testing regular expressions 3 * 4 * See Copyright for the status of this software. 5 * 6 * Daniel Veillard <veillard@redhat.com> 7 */ 8 9 #include "libxml.h" 10 #ifdef LIBXML_AUTOMATA_ENABLED 11 #include <string.h> 12 13 #include <libxml/tree.h> 14 #include <libxml/xmlautomata.h> 15 16 static int scanNumber(char **ptr) { 17 int ret = 0; 18 char *cur; 19 20 cur = *ptr; 21 while ((*cur >= '0') && (*cur <= '9')) { 22 ret = ret * 10 + (*cur - '0'); 23 cur++; 24 } 25 *ptr = cur; 26 return(ret); 27 } 28 29 static void 30 testRegexpFile(const char *filename) { 31 FILE *input; 32 char expr[5000]; 33 int len; 34 int ret; 35 int i; 36 xmlAutomataPtr am; 37 xmlAutomataStatePtr states[1000]; 38 xmlRegexpPtr regexp = NULL; 39 xmlRegExecCtxtPtr exec = NULL; 40 41 for (i = 0;i<1000;i++) 42 states[i] = NULL; 43 44 input = fopen(filename, "r"); 45 if (input == NULL) { 46 xmlGenericError(xmlGenericErrorContext, 47 "Cannot open %s for reading\n", filename); 48 return; 49 } 50 51 am = xmlNewAutomata(); 52 if (am == NULL) { 53 xmlGenericError(xmlGenericErrorContext, 54 "Cannot create automata\n"); 55 fclose(input); 56 return; 57 } 58 states[0] = xmlAutomataGetInitState(am); 59 if (states[0] == NULL) { 60 xmlGenericError(xmlGenericErrorContext, 61 "Cannot get start state\n"); 62 xmlFreeAutomata(am); 63 fclose(input); 64 return; 65 } 66 ret = 0; 67 68 while (fgets(expr, 4500, input) != NULL) { 69 if (expr[0] == '#') 70 continue; 71 len = strlen(expr); 72 len--; 73 while ((len >= 0) && 74 ((expr[len] == '\n') || (expr[len] == '\t') || 75 (expr[len] == '\r') || (expr[len] == ' '))) len--; 76 expr[len + 1] = 0; 77 if (len >= 0) { 78 if ((am != NULL) && (expr[0] == 't') && (expr[1] == ' ')) { 79 char *ptr = &expr[2]; 80 int from, to; 81 82 from = scanNumber(&ptr); 83 if (*ptr != ' ') { 84 xmlGenericError(xmlGenericErrorContext, 85 "Bad line %s\n", expr); 86 break; 87 } 88 if (states[from] == NULL) 89 states[from] = xmlAutomataNewState(am); 90 ptr++; 91 to = scanNumber(&ptr); 92 if (*ptr != ' ') { 93 xmlGenericError(xmlGenericErrorContext, 94 "Bad line %s\n", expr); 95 break; 96 } 97 if (states[to] == NULL) 98 states[to] = xmlAutomataNewState(am); 99 ptr++; 100 xmlAutomataNewTransition(am, states[from], states[to], 101 BAD_CAST ptr, NULL); 102 } else if ((am != NULL) && (expr[0] == 'e') && (expr[1] == ' ')) { 103 char *ptr = &expr[2]; 104 int from, to; 105 106 from = scanNumber(&ptr); 107 if (*ptr != ' ') { 108 xmlGenericError(xmlGenericErrorContext, 109 "Bad line %s\n", expr); 110 break; 111 } 112 if (states[from] == NULL) 113 states[from] = xmlAutomataNewState(am); 114 ptr++; 115 to = scanNumber(&ptr); 116 if (states[to] == NULL) 117 states[to] = xmlAutomataNewState(am); 118 xmlAutomataNewEpsilon(am, states[from], states[to]); 119 } else if ((am != NULL) && (expr[0] == 'f') && (expr[1] == ' ')) { 120 char *ptr = &expr[2]; 121 int state; 122 123 state = scanNumber(&ptr); 124 if (states[state] == NULL) { 125 xmlGenericError(xmlGenericErrorContext, 126 "Bad state %d : %s\n", state, expr); 127 break; 128 } 129 xmlAutomataSetFinalState(am, states[state]); 130 } else if ((am != NULL) && (expr[0] == 'c') && (expr[1] == ' ')) { 131 char *ptr = &expr[2]; 132 int from, to; 133 int min, max; 134 135 from = scanNumber(&ptr); 136 if (*ptr != ' ') { 137 xmlGenericError(xmlGenericErrorContext, 138 "Bad line %s\n", expr); 139 break; 140 } 141 if (states[from] == NULL) 142 states[from] = xmlAutomataNewState(am); 143 ptr++; 144 to = scanNumber(&ptr); 145 if (*ptr != ' ') { 146 xmlGenericError(xmlGenericErrorContext, 147 "Bad line %s\n", expr); 148 break; 149 } 150 if (states[to] == NULL) 151 states[to] = xmlAutomataNewState(am); 152 ptr++; 153 min = scanNumber(&ptr); 154 if (*ptr != ' ') { 155 xmlGenericError(xmlGenericErrorContext, 156 "Bad line %s\n", expr); 157 break; 158 } 159 ptr++; 160 max = scanNumber(&ptr); 161 if (*ptr != ' ') { 162 xmlGenericError(xmlGenericErrorContext, 163 "Bad line %s\n", expr); 164 break; 165 } 166 ptr++; 167 xmlAutomataNewCountTrans(am, states[from], states[to], 168 BAD_CAST ptr, min, max, NULL); 169 } else if ((am != NULL) && (expr[0] == '-') && (expr[1] == '-')) { 170 /* end of the automata */ 171 regexp = xmlAutomataCompile(am); 172 xmlFreeAutomata(am); 173 am = NULL; 174 if (regexp == NULL) { 175 xmlGenericError(xmlGenericErrorContext, 176 "Failed to compile the automata"); 177 break; 178 } 179 } else if ((expr[0] == '=') && (expr[1] == '>')) { 180 if (regexp == NULL) { 181 printf("=> failed not compiled\n"); 182 } else { 183 if (exec == NULL) 184 exec = xmlRegNewExecCtxt(regexp, NULL, NULL); 185 if (ret == 0) { 186 ret = xmlRegExecPushString(exec, NULL, NULL); 187 } 188 if (ret == 1) 189 printf("=> Passed\n"); 190 else if ((ret == 0) || (ret == -1)) 191 printf("=> Failed\n"); 192 else if (ret < 0) 193 printf("=> Error\n"); 194 xmlRegFreeExecCtxt(exec); 195 exec = NULL; 196 } 197 ret = 0; 198 } else if (regexp != NULL) { 199 if (exec == NULL) 200 exec = xmlRegNewExecCtxt(regexp, NULL, NULL); 201 ret = xmlRegExecPushString(exec, BAD_CAST expr, NULL); 202 } else { 203 xmlGenericError(xmlGenericErrorContext, 204 "Unexpected line %s\n", expr); 205 } 206 } 207 } 208 fclose(input); 209 if (regexp != NULL) 210 xmlRegFreeRegexp(regexp); 211 if (exec != NULL) 212 xmlRegFreeExecCtxt(exec); 213 if (am != NULL) 214 xmlFreeAutomata(am); 215 } 216 217 int main(int argc, char **argv) { 218 219 xmlInitMemory(); 220 221 if (argc == 1) { 222 int ret; 223 xmlAutomataPtr am; 224 xmlAutomataStatePtr start, cur; 225 xmlRegexpPtr regexp; 226 xmlRegExecCtxtPtr exec; 227 228 am = xmlNewAutomata(); 229 start = xmlAutomataGetInitState(am); 230 231 /* generate a[ba]*a */ 232 cur = xmlAutomataNewTransition(am, start, NULL, BAD_CAST"a", NULL); 233 xmlAutomataNewTransition(am, cur, cur, BAD_CAST"b", NULL); 234 xmlAutomataNewTransition(am, cur, cur, BAD_CAST"a", NULL); 235 cur = xmlAutomataNewCountTrans(am, cur, NULL, BAD_CAST"a", 2, 3, NULL); 236 xmlAutomataSetFinalState(am, cur); 237 238 /* compile it in a regexp and free the automata */ 239 regexp = xmlAutomataCompile(am); 240 xmlFreeAutomata(am); 241 242 /* test the regexp */ 243 xmlRegexpPrint(stdout, regexp); 244 exec = xmlRegNewExecCtxt(regexp, NULL, NULL); 245 ret = xmlRegExecPushString(exec, BAD_CAST"a", NULL); 246 if (ret == 1) 247 printf("final\n"); 248 else if (ret < 0) 249 printf("error\n"); 250 ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL); 251 if (ret == 1) 252 printf("final\n"); 253 else if (ret < 0) 254 printf("error\n"); 255 ret =xmlRegExecPushString(exec, BAD_CAST"b", NULL); 256 if (ret == 1) 257 printf("final\n"); 258 else if (ret < 0) 259 printf("error\n"); 260 ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL); 261 if (ret == 1) 262 printf("final\n"); 263 else if (ret < 0) 264 printf("error\n"); 265 ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL); 266 if (ret == 1) 267 printf("final\n"); 268 else if (ret < 0) 269 printf("error\n"); 270 ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL); 271 if (ret == 1) 272 printf("final\n"); 273 else if (ret < 0) 274 printf("error\n"); 275 ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL); 276 if (ret == 1) 277 printf("final\n"); 278 else if (ret < 0) 279 printf("error\n"); 280 if (ret == 0) { 281 ret = xmlRegExecPushString(exec, NULL, NULL); 282 if (ret == 1) 283 printf("final\n"); 284 else if (ret < 0) 285 printf("error\n"); 286 } 287 xmlRegFreeExecCtxt(exec); 288 289 /* free the regexp */ 290 xmlRegFreeRegexp(regexp); 291 } else { 292 int i; 293 294 for (i = 1;i < argc;i++) 295 testRegexpFile(argv[i]); 296 } 297 298 xmlCleanupParser(); 299 xmlMemoryDump(); 300 return(0); 301 } 302 303 #else 304 #include <stdio.h> 305 int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) { 306 printf("%s : Automata support not compiled in\n", argv[0]); 307 return(0); 308 } 309 #endif /* LIBXML_AUTOMATA_ENABLED */ 310