1 /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
2    See the file COPYING for copying permission.
3 
4    runtest.c : run the Expat test suite
5 */
6 
7 #ifdef HAVE_EXPAT_CONFIG_H
8 #include <expat_config.h>
9 #endif
10 
11 #include <assert.h>
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <stdint.h>
16 
17 #include "expat.h"
18 #include "chardata.h"
19 #include "minicheck.h"
20 
21 #if defined(__amigaos__) && defined(__USE_INLINE__)
22 #include <proto/expat.h>
23 #endif
24 
25 #ifdef XML_LARGE_SIZE
26 #define XML_FMT_INT_MOD "ll"
27 #else
28 #define XML_FMT_INT_MOD "l"
29 #endif
30 
31 static XML_Parser parser;
32 
33 
34 static void
basic_setup(void)35 basic_setup(void)
36 {
37     parser = XML_ParserCreate(NULL);
38     if (parser == NULL)
39         fail("Parser not created.");
40 }
41 
42 static void
basic_teardown(void)43 basic_teardown(void)
44 {
45     if (parser != NULL)
46         XML_ParserFree(parser);
47 }
48 
49 /* Generate a failure using the parser state to create an error message;
50    this should be used when the parser reports an error we weren't
51    expecting.
52 */
53 static void
_xml_failure(XML_Parser parser,const char * file,int line)54 _xml_failure(XML_Parser parser, const char *file, int line)
55 {
56     char buffer[1024];
57     enum XML_Error err = XML_GetErrorCode(parser);
58     sprintf(buffer,
59             "    %d: %s (line %" XML_FMT_INT_MOD "u, offset %"\
60                 XML_FMT_INT_MOD "u)\n    reported from %s, line %d\n",
61             err,
62             XML_ErrorString(err),
63             XML_GetCurrentLineNumber(parser),
64             XML_GetCurrentColumnNumber(parser),
65             file, line);
66     _fail_unless(0, file, line, buffer);
67 }
68 
69 #define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__)
70 
71 static void
_expect_failure(char * text,enum XML_Error errorCode,char * errorMessage,char * file,int lineno)72 _expect_failure(char *text, enum XML_Error errorCode, char *errorMessage,
73                 char *file, int lineno)
74 {
75     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
76         /* Hackish use of _fail_unless() macro, but let's us report
77            the right filename and line number. */
78         _fail_unless(0, file, lineno, errorMessage);
79     if (XML_GetErrorCode(parser) != errorCode)
80         _xml_failure(parser, file, lineno);
81 }
82 
83 #define expect_failure(text, errorCode, errorMessage) \
84         _expect_failure((text), (errorCode), (errorMessage), \
85                         __FILE__, __LINE__)
86 
87 /* Dummy handlers for when we need to set a handler to tickle a bug,
88    but it doesn't need to do anything.
89 */
90 
91 static void XMLCALL
dummy_start_doctype_handler(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)92 dummy_start_doctype_handler(void           *userData,
93                             const XML_Char *doctypeName,
94                             const XML_Char *sysid,
95                             const XML_Char *pubid,
96                             int            has_internal_subset)
97 {}
98 
99 static void XMLCALL
dummy_end_doctype_handler(void * userData)100 dummy_end_doctype_handler(void *userData)
101 {}
102 
103 static void XMLCALL
dummy_entity_decl_handler(void * userData,const XML_Char * entityName,int is_parameter_entity,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)104 dummy_entity_decl_handler(void           *userData,
105                           const XML_Char *entityName,
106                           int            is_parameter_entity,
107                           const XML_Char *value,
108                           int            value_length,
109                           const XML_Char *base,
110                           const XML_Char *systemId,
111                           const XML_Char *publicId,
112                           const XML_Char *notationName)
113 {}
114 
115 static void XMLCALL
dummy_notation_decl_handler(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)116 dummy_notation_decl_handler(void *userData,
117                             const XML_Char *notationName,
118                             const XML_Char *base,
119                             const XML_Char *systemId,
120                             const XML_Char *publicId)
121 {}
122 
123 static void XMLCALL
dummy_element_decl_handler(void * userData,const XML_Char * name,XML_Content * model)124 dummy_element_decl_handler(void *userData,
125                            const XML_Char *name,
126                            XML_Content *model)
127 {}
128 
129 static void XMLCALL
dummy_attlist_decl_handler(void * userData,const XML_Char * elname,const XML_Char * attname,const XML_Char * att_type,const XML_Char * dflt,int isrequired)130 dummy_attlist_decl_handler(void           *userData,
131                            const XML_Char *elname,
132                            const XML_Char *attname,
133                            const XML_Char *att_type,
134                            const XML_Char *dflt,
135                            int            isrequired)
136 {}
137 
138 static void XMLCALL
dummy_comment_handler(void * userData,const XML_Char * data)139 dummy_comment_handler(void *userData, const XML_Char *data)
140 {}
141 
142 static void XMLCALL
dummy_pi_handler(void * userData,const XML_Char * target,const XML_Char * data)143 dummy_pi_handler(void *userData, const XML_Char *target, const XML_Char *data)
144 {}
145 
146 static void XMLCALL
dummy_start_element(void * userData,const XML_Char * name,const XML_Char ** atts)147 dummy_start_element(void *userData,
148                     const XML_Char *name, const XML_Char **atts)
149 {}
150 
151 
152 /*
153  * Character & encoding tests.
154  */
155 
START_TEST(test_nul_byte)156 START_TEST(test_nul_byte)
157 {
158     char text[] = "<doc>\0</doc>";
159 
160     /* test that a NUL byte (in US-ASCII data) is an error */
161     if (XML_Parse(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK)
162         fail("Parser did not report error on NUL-byte.");
163     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
164         xml_failure(parser);
165 }
166 END_TEST
167 
168 
START_TEST(test_u0000_char)169 START_TEST(test_u0000_char)
170 {
171     /* test that a NUL byte (in US-ASCII data) is an error */
172     expect_failure("<doc>&#0;</doc>",
173                    XML_ERROR_BAD_CHAR_REF,
174                    "Parser did not report error on NUL-byte.");
175 }
176 END_TEST
177 
START_TEST(test_bom_utf8)178 START_TEST(test_bom_utf8)
179 {
180     /* This test is really just making sure we don't core on a UTF-8 BOM. */
181     char *text = "\357\273\277<e/>";
182 
183     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
184         xml_failure(parser);
185 }
186 END_TEST
187 
START_TEST(test_bom_utf16_be)188 START_TEST(test_bom_utf16_be)
189 {
190     char text[] = "\376\377\0<\0e\0/\0>";
191 
192     if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
193         xml_failure(parser);
194 }
195 END_TEST
196 
START_TEST(test_bom_utf16_le)197 START_TEST(test_bom_utf16_le)
198 {
199     char text[] = "\377\376<\0e\0/\0>\0";
200 
201     if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
202         xml_failure(parser);
203 }
204 END_TEST
205 
206 static void XMLCALL
accumulate_characters(void * userData,const XML_Char * s,int len)207 accumulate_characters(void *userData, const XML_Char *s, int len)
208 {
209     CharData_AppendXMLChars((CharData *)userData, s, len);
210 }
211 
212 static void XMLCALL
accumulate_attribute(void * userData,const XML_Char * name,const XML_Char ** atts)213 accumulate_attribute(void *userData, const XML_Char *name,
214                      const XML_Char **atts)
215 {
216     CharData *storage = (CharData *)userData;
217     if (storage->count < 0 && atts != NULL && atts[0] != NULL) {
218         /* "accumulate" the value of the first attribute we see */
219         CharData_AppendXMLChars(storage, atts[1], -1);
220     }
221 }
222 
223 
224 static void
_run_character_check(XML_Char * text,XML_Char * expected,const char * file,int line)225 _run_character_check(XML_Char *text, XML_Char *expected,
226                      const char *file, int line)
227 {
228     CharData storage;
229 
230     CharData_Init(&storage);
231     XML_SetUserData(parser, &storage);
232     XML_SetCharacterDataHandler(parser, accumulate_characters);
233     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
234         _xml_failure(parser, file, line);
235     CharData_CheckXMLChars(&storage, expected);
236 }
237 
238 #define run_character_check(text, expected) \
239         _run_character_check(text, expected, __FILE__, __LINE__)
240 
241 static void
_run_attribute_check(XML_Char * text,XML_Char * expected,const char * file,int line)242 _run_attribute_check(XML_Char *text, XML_Char *expected,
243                      const char *file, int line)
244 {
245     CharData storage;
246 
247     CharData_Init(&storage);
248     XML_SetUserData(parser, &storage);
249     XML_SetStartElementHandler(parser, accumulate_attribute);
250     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
251         _xml_failure(parser, file, line);
252     CharData_CheckXMLChars(&storage, expected);
253 }
254 
255 #define run_attribute_check(text, expected) \
256         _run_attribute_check(text, expected, __FILE__, __LINE__)
257 
258 /* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)259 START_TEST(test_danish_latin1)
260 {
261     char *text =
262         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
263         "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
264     run_character_check(text,
265              "J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
266 }
267 END_TEST
268 
269 
270 /* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)271 START_TEST(test_french_charref_hexidecimal)
272 {
273     char *text =
274         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
275         "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
276     run_character_check(text,
277                         "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
278 }
279 END_TEST
280 
START_TEST(test_french_charref_decimal)281 START_TEST(test_french_charref_decimal)
282 {
283     char *text =
284         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
285         "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
286     run_character_check(text,
287                         "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
288 }
289 END_TEST
290 
START_TEST(test_french_latin1)291 START_TEST(test_french_latin1)
292 {
293     char *text =
294         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
295         "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
296     run_character_check(text,
297                         "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
298 }
299 END_TEST
300 
START_TEST(test_french_utf8)301 START_TEST(test_french_utf8)
302 {
303     char *text =
304         "<?xml version='1.0' encoding='utf-8'?>\n"
305         "<doc>\xC3\xA9</doc>";
306     run_character_check(text, "\xC3\xA9");
307 }
308 END_TEST
309 
310 /* Regression test for SF bug #600479.
311    XXX There should be a test that exercises all legal XML Unicode
312    characters as PCDATA and attribute value content, and XML Name
313    characters as part of element and attribute names.
314 */
START_TEST(test_utf8_false_rejection)315 START_TEST(test_utf8_false_rejection)
316 {
317     char *text = "<doc>\xEF\xBA\xBF</doc>";
318     run_character_check(text, "\xEF\xBA\xBF");
319 }
320 END_TEST
321 
322 /* Regression test for SF bug #477667.
323    This test assures that any 8-bit character followed by a 7-bit
324    character will not be mistakenly interpreted as a valid UTF-8
325    sequence.
326 */
START_TEST(test_illegal_utf8)327 START_TEST(test_illegal_utf8)
328 {
329     char text[100];
330     int i;
331 
332     for (i = 128; i <= 255; ++i) {
333         sprintf(text, "<e>%ccd</e>", i);
334         if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) {
335             sprintf(text,
336                     "expected token error for '%c' (ordinal %d) in UTF-8 text",
337                     i, i);
338             fail(text);
339         }
340         else if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
341             xml_failure(parser);
342         /* Reset the parser since we use the same parser repeatedly. */
343         XML_ParserReset(parser, NULL);
344     }
345 }
346 END_TEST
347 
START_TEST(test_utf16)348 START_TEST(test_utf16)
349 {
350     /* <?xml version="1.0" encoding="UTF-16"?>
351        <doc a='123'>some text</doc>
352     */
353     char text[] =
354         "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
355         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
356         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
357         "\000'\000?\000>\000\n"
358         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'"
359         "\000>\000s\000o\000m\000e\000 \000t\000e\000x\000t\000<\000/"
360         "\000d\000o\000c\000>";
361     if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
362         xml_failure(parser);
363 }
364 END_TEST
365 
START_TEST(test_utf16_le_epilog_newline)366 START_TEST(test_utf16_le_epilog_newline)
367 {
368     unsigned int first_chunk_bytes = 17;
369     char text[] =
370         "\xFF\xFE"                      /* BOM */
371         "<\000e\000/\000>\000"          /* document element */
372         "\r\000\n\000\r\000\n\000";     /* epilog */
373 
374     if (first_chunk_bytes >= sizeof(text) - 1)
375         fail("bad value of first_chunk_bytes");
376     if (  XML_Parse(parser, text, first_chunk_bytes, XML_FALSE)
377           == XML_STATUS_ERROR)
378         xml_failure(parser);
379     else {
380         enum XML_Status rc;
381         rc = XML_Parse(parser, text + first_chunk_bytes,
382                        sizeof(text) - first_chunk_bytes - 1, XML_TRUE);
383         if (rc == XML_STATUS_ERROR)
384             xml_failure(parser);
385     }
386 }
387 END_TEST
388 
389 /* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)390 START_TEST(test_latin1_umlauts)
391 {
392     char *text =
393         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
394         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
395         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
396     char *utf8 =
397         "\xC3\xA4 \xC3\xB6 \xC3\xBC "
398         "\xC3\xA4 \xC3\xB6 \xC3\xBC "
399         "\xC3\xA4 \xC3\xB6 \xC3\xBC >";
400     run_character_check(text, utf8);
401     XML_ParserReset(parser, NULL);
402     run_attribute_check(text, utf8);
403 }
404 END_TEST
405 
406 /* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)407 START_TEST(test_line_number_after_parse)
408 {
409     char *text =
410         "<tag>\n"
411         "\n"
412         "\n</tag>";
413     XML_Size lineno;
414 
415     if (XML_Parse(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
416         xml_failure(parser);
417     lineno = XML_GetCurrentLineNumber(parser);
418     if (lineno != 4) {
419         char buffer[100];
420         sprintf(buffer,
421             "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
422         fail(buffer);
423     }
424 }
425 END_TEST
426 
427 /* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)428 START_TEST(test_column_number_after_parse)
429 {
430     char *text = "<tag></tag>";
431     XML_Size colno;
432 
433     if (XML_Parse(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
434         xml_failure(parser);
435     colno = XML_GetCurrentColumnNumber(parser);
436     if (colno != 11) {
437         char buffer[100];
438         sprintf(buffer,
439             "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
440         fail(buffer);
441     }
442 }
443 END_TEST
444 
445 static void XMLCALL
start_element_event_handler2(void * userData,const XML_Char * name,const XML_Char ** attr)446 start_element_event_handler2(void *userData, const XML_Char *name,
447 			     const XML_Char **attr)
448 {
449     CharData *storage = (CharData *) userData;
450     char buffer[100];
451 
452     sprintf(buffer,
453         "<%s> at col:%" XML_FMT_INT_MOD "u line:%"\
454             XML_FMT_INT_MOD "u\n", name,
455 	    XML_GetCurrentColumnNumber(parser),
456 	    XML_GetCurrentLineNumber(parser));
457     CharData_AppendString(storage, buffer);
458 }
459 
460 static void XMLCALL
end_element_event_handler2(void * userData,const XML_Char * name)461 end_element_event_handler2(void *userData, const XML_Char *name)
462 {
463     CharData *storage = (CharData *) userData;
464     char buffer[100];
465 
466     sprintf(buffer,
467         "</%s> at col:%" XML_FMT_INT_MOD "u line:%"\
468             XML_FMT_INT_MOD "u\n", name,
469 	    XML_GetCurrentColumnNumber(parser),
470 	    XML_GetCurrentLineNumber(parser));
471     CharData_AppendString(storage, buffer);
472 }
473 
474 /* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)475 START_TEST(test_line_and_column_numbers_inside_handlers)
476 {
477     char *text =
478         "<a>\n"        /* Unix end-of-line */
479         "  <b>\r\n"    /* Windows end-of-line */
480         "    <c/>\r"   /* Mac OS end-of-line */
481         "  </b>\n"
482         "  <d>\n"
483         "    <f/>\n"
484         "  </d>\n"
485         "</a>";
486     char *expected =
487         "<a> at col:0 line:1\n"
488         "<b> at col:2 line:2\n"
489         "<c> at col:4 line:3\n"
490         "</c> at col:8 line:3\n"
491         "</b> at col:2 line:4\n"
492         "<d> at col:2 line:5\n"
493         "<f> at col:4 line:6\n"
494         "</f> at col:8 line:6\n"
495         "</d> at col:2 line:7\n"
496         "</a> at col:0 line:8\n";
497     CharData storage;
498 
499     CharData_Init(&storage);
500     XML_SetUserData(parser, &storage);
501     XML_SetStartElementHandler(parser, start_element_event_handler2);
502     XML_SetEndElementHandler(parser, end_element_event_handler2);
503     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
504         xml_failure(parser);
505 
506     CharData_CheckString(&storage, expected);
507 }
508 END_TEST
509 
510 /* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)511 START_TEST(test_line_number_after_error)
512 {
513     char *text =
514         "<a>\n"
515         "  <b>\n"
516         "  </a>";  /* missing </b> */
517     XML_Size lineno;
518     if (XML_Parse(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
519         fail("Expected a parse error");
520 
521     lineno = XML_GetCurrentLineNumber(parser);
522     if (lineno != 3) {
523         char buffer[100];
524         sprintf(buffer, "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
525         fail(buffer);
526     }
527 }
528 END_TEST
529 
530 /* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)531 START_TEST(test_column_number_after_error)
532 {
533     char *text =
534         "<a>\n"
535         "  <b>\n"
536         "  </a>";  /* missing </b> */
537     XML_Size colno;
538     if (XML_Parse(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
539         fail("Expected a parse error");
540 
541     colno = XML_GetCurrentColumnNumber(parser);
542     if (colno != 4) {
543         char buffer[100];
544         sprintf(buffer,
545             "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
546         fail(buffer);
547     }
548 }
549 END_TEST
550 
551 /* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)552 START_TEST(test_really_long_lines)
553 {
554     /* This parses an input line longer than INIT_DATA_BUF_SIZE
555        characters long (defined to be 1024 in xmlparse.c).  We take a
556        really cheesy approach to building the input buffer, because
557        this avoids writing bugs in buffer-filling code.
558     */
559     char *text =
560         "<e>"
561         /* 64 chars */
562         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
563         /* until we have at least 1024 characters on the line: */
564         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
565         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
566         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
567         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
568         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
569         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
570         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
571         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
572         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
573         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
574         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
575         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
576         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
577         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
578         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
579         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
580         "</e>";
581     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
582         xml_failure(parser);
583 }
584 END_TEST
585 
586 
587 /*
588  * Element event tests.
589  */
590 
591 static void XMLCALL
end_element_event_handler(void * userData,const XML_Char * name)592 end_element_event_handler(void *userData, const XML_Char *name)
593 {
594     CharData *storage = (CharData *) userData;
595     CharData_AppendString(storage, "/");
596     CharData_AppendXMLChars(storage, name, -1);
597 }
598 
START_TEST(test_end_element_events)599 START_TEST(test_end_element_events)
600 {
601     char *text = "<a><b><c/></b><d><f/></d></a>";
602     char *expected = "/c/b/f/d/a";
603     CharData storage;
604 
605     CharData_Init(&storage);
606     XML_SetUserData(parser, &storage);
607     XML_SetEndElementHandler(parser, end_element_event_handler);
608     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
609         xml_failure(parser);
610     CharData_CheckString(&storage, expected);
611 }
612 END_TEST
613 
614 
615 /*
616  * Attribute tests.
617  */
618 
619 /* Helpers used by the following test; this checks any "attr" and "refs"
620    attributes to make sure whitespace has been normalized.
621 
622    Return true if whitespace has been normalized in a string, using
623    the rules for attribute value normalization.  The 'is_cdata' flag
624    is needed since CDATA attributes don't need to have multiple
625    whitespace characters collapsed to a single space, while other
626    attribute data types do.  (Section 3.3.3 of the recommendation.)
627 */
628 static int
is_whitespace_normalized(const XML_Char * s,int is_cdata)629 is_whitespace_normalized(const XML_Char *s, int is_cdata)
630 {
631     int blanks = 0;
632     int at_start = 1;
633     while (*s) {
634         if (*s == ' ')
635             ++blanks;
636         else if (*s == '\t' || *s == '\n' || *s == '\r')
637             return 0;
638         else {
639             if (at_start) {
640                 at_start = 0;
641                 if (blanks && !is_cdata)
642                     /* illegal leading blanks */
643                     return 0;
644             }
645             else if (blanks > 1 && !is_cdata)
646                 return 0;
647             blanks = 0;
648         }
649         ++s;
650     }
651     if (blanks && !is_cdata)
652         return 0;
653     return 1;
654 }
655 
656 /* Check the attribute whitespace checker: */
657 static void
testhelper_is_whitespace_normalized(void)658 testhelper_is_whitespace_normalized(void)
659 {
660     assert(is_whitespace_normalized("abc", 0));
661     assert(is_whitespace_normalized("abc", 1));
662     assert(is_whitespace_normalized("abc def ghi", 0));
663     assert(is_whitespace_normalized("abc def ghi", 1));
664     assert(!is_whitespace_normalized(" abc def ghi", 0));
665     assert(is_whitespace_normalized(" abc def ghi", 1));
666     assert(!is_whitespace_normalized("abc  def ghi", 0));
667     assert(is_whitespace_normalized("abc  def ghi", 1));
668     assert(!is_whitespace_normalized("abc def ghi ", 0));
669     assert(is_whitespace_normalized("abc def ghi ", 1));
670     assert(!is_whitespace_normalized(" ", 0));
671     assert(is_whitespace_normalized(" ", 1));
672     assert(!is_whitespace_normalized("\t", 0));
673     assert(!is_whitespace_normalized("\t", 1));
674     assert(!is_whitespace_normalized("\n", 0));
675     assert(!is_whitespace_normalized("\n", 1));
676     assert(!is_whitespace_normalized("\r", 0));
677     assert(!is_whitespace_normalized("\r", 1));
678     assert(!is_whitespace_normalized("abc\t def", 1));
679 }
680 
681 static void XMLCALL
check_attr_contains_normalized_whitespace(void * userData,const XML_Char * name,const XML_Char ** atts)682 check_attr_contains_normalized_whitespace(void *userData,
683                                           const XML_Char *name,
684                                           const XML_Char **atts)
685 {
686     int i;
687     for (i = 0; atts[i] != NULL; i += 2) {
688         const XML_Char *attrname = atts[i];
689         const XML_Char *value = atts[i + 1];
690         if (strcmp("attr", attrname) == 0
691             || strcmp("ents", attrname) == 0
692             || strcmp("refs", attrname) == 0) {
693             if (!is_whitespace_normalized(value, 0)) {
694                 char buffer[256];
695                 sprintf(buffer, "attribute value not normalized: %s='%s'",
696                         attrname, value);
697                 fail(buffer);
698             }
699         }
700     }
701 }
702 
START_TEST(test_attr_whitespace_normalization)703 START_TEST(test_attr_whitespace_normalization)
704 {
705     char *text =
706         "<!DOCTYPE doc [\n"
707         "  <!ATTLIST doc\n"
708         "            attr NMTOKENS #REQUIRED\n"
709         "            ents ENTITIES #REQUIRED\n"
710         "            refs IDREFS   #REQUIRED>\n"
711         "]>\n"
712         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
713         "     ents=' ent-1   \t\r\n"
714         "            ent-2  ' >\n"
715         "  <e id='id-1'/>\n"
716         "  <e id='id-2'/>\n"
717         "</doc>";
718 
719     XML_SetStartElementHandler(parser,
720                                check_attr_contains_normalized_whitespace);
721     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
722         xml_failure(parser);
723 }
724 END_TEST
725 
726 
727 /*
728  * XML declaration tests.
729  */
730 
START_TEST(test_xmldecl_misplaced)731 START_TEST(test_xmldecl_misplaced)
732 {
733     expect_failure("\n"
734                    "<?xml version='1.0'?>\n"
735                    "<a/>",
736                    XML_ERROR_MISPLACED_XML_PI,
737                    "failed to report misplaced XML declaration");
738 }
739 END_TEST
740 
741 /* Regression test for SF bug #584832. */
742 static int XMLCALL
UnknownEncodingHandler(void * data,const XML_Char * encoding,XML_Encoding * info)743 UnknownEncodingHandler(void *data,const XML_Char *encoding,XML_Encoding *info)
744 {
745     if (strcmp(encoding,"unsupported-encoding") == 0) {
746         int i;
747         for (i = 0; i < 256; ++i)
748             info->map[i] = i;
749         info->data = NULL;
750         info->convert = NULL;
751         info->release = NULL;
752         return XML_STATUS_OK;
753     }
754     return XML_STATUS_ERROR;
755 }
756 
START_TEST(test_unknown_encoding_internal_entity)757 START_TEST(test_unknown_encoding_internal_entity)
758 {
759     char *text =
760         "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
761         "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
762         "<test a='&foo;'/>";
763 
764     XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, NULL);
765     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
766         xml_failure(parser);
767 }
768 END_TEST
769 
770 /* Regression test for SF bug #620106. */
771 static int XMLCALL
external_entity_loader_set_encoding(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)772 external_entity_loader_set_encoding(XML_Parser parser,
773                                     const XML_Char *context,
774                                     const XML_Char *base,
775                                     const XML_Char *systemId,
776                                     const XML_Char *publicId)
777 {
778     /* This text says it's an unsupported encoding, but it's really
779        UTF-8, which we tell Expat using XML_SetEncoding().
780     */
781     char *text =
782         "<?xml encoding='iso-8859-3'?>"
783         "\xC3\xA9";
784     XML_Parser extparser;
785 
786     extparser = XML_ExternalEntityParserCreate(parser, context, NULL);
787     if (extparser == NULL)
788         fail("Could not create external entity parser.");
789     if (!XML_SetEncoding(extparser, "utf-8"))
790         fail("XML_SetEncoding() ignored for external entity");
791     if (  XML_Parse(extparser, text, strlen(text), XML_TRUE)
792           == XML_STATUS_ERROR) {
793         xml_failure(parser);
794         return 0;
795     }
796     return 1;
797 }
798 
START_TEST(test_ext_entity_set_encoding)799 START_TEST(test_ext_entity_set_encoding)
800 {
801     char *text =
802         "<!DOCTYPE doc [\n"
803         "  <!ENTITY en SYSTEM 'http://xml.libexpat.org/dummy.ent'>\n"
804         "]>\n"
805         "<doc>&en;</doc>";
806 
807     XML_SetExternalEntityRefHandler(parser,
808                                     external_entity_loader_set_encoding);
809     run_character_check(text, "\xC3\xA9");
810 }
811 END_TEST
812 
813 /* Test that no error is reported for unknown entities if we don't
814    read an external subset.  This was fixed in Expat 1.95.5.
815 */
START_TEST(test_wfc_undeclared_entity_unread_external_subset)816 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
817     char *text =
818         "<!DOCTYPE doc SYSTEM 'foo'>\n"
819         "<doc>&entity;</doc>";
820 
821     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
822         xml_failure(parser);
823 }
824 END_TEST
825 
826 /* Test that an error is reported for unknown entities if we don't
827    have an external subset.
828 */
START_TEST(test_wfc_undeclared_entity_no_external_subset)829 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
830     expect_failure("<doc>&entity;</doc>",
831                    XML_ERROR_UNDEFINED_ENTITY,
832                    "Parser did not report undefined entity w/out a DTD.");
833 }
834 END_TEST
835 
836 /* Test that an error is reported for unknown entities if we don't
837    read an external subset, but have been declared standalone.
838 */
START_TEST(test_wfc_undeclared_entity_standalone)839 START_TEST(test_wfc_undeclared_entity_standalone) {
840     char *text =
841         "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
842         "<!DOCTYPE doc SYSTEM 'foo'>\n"
843         "<doc>&entity;</doc>";
844 
845     expect_failure(text,
846                    XML_ERROR_UNDEFINED_ENTITY,
847                    "Parser did not report undefined entity (standalone).");
848 }
849 END_TEST
850 
851 static int XMLCALL
external_entity_loader(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)852 external_entity_loader(XML_Parser parser,
853                        const XML_Char *context,
854                        const XML_Char *base,
855                        const XML_Char *systemId,
856                        const XML_Char *publicId)
857 {
858     char *text = (char *)XML_GetUserData(parser);
859     XML_Parser extparser;
860 
861     extparser = XML_ExternalEntityParserCreate(parser, context, NULL);
862     if (extparser == NULL)
863         fail("Could not create external entity parser.");
864     if (  XML_Parse(extparser, text, strlen(text), XML_TRUE)
865           == XML_STATUS_ERROR) {
866         xml_failure(parser);
867         return XML_STATUS_ERROR;
868     }
869     return XML_STATUS_OK;
870 }
871 
872 /* Test that an error is reported for unknown entities if we have read
873    an external subset, and standalone is true.
874 */
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone)875 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
876     char *text =
877         "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
878         "<!DOCTYPE doc SYSTEM 'foo'>\n"
879         "<doc>&entity;</doc>";
880     char *foo_text =
881         "<!ELEMENT doc (#PCDATA)*>";
882 
883     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
884     XML_SetUserData(parser, foo_text);
885     XML_SetExternalEntityRefHandler(parser, external_entity_loader);
886     expect_failure(text,
887                    XML_ERROR_UNDEFINED_ENTITY,
888                    "Parser did not report undefined entity (external DTD).");
889 }
890 END_TEST
891 
892 /* Test that no error is reported for unknown entities if we have read
893    an external subset, and standalone is false.
894 */
START_TEST(test_wfc_undeclared_entity_with_external_subset)895 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
896     char *text =
897         "<?xml version='1.0' encoding='us-ascii'?>\n"
898         "<!DOCTYPE doc SYSTEM 'foo'>\n"
899         "<doc>&entity;</doc>";
900     char *foo_text =
901         "<!ELEMENT doc (#PCDATA)*>";
902 
903     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
904     XML_SetUserData(parser, foo_text);
905     XML_SetExternalEntityRefHandler(parser, external_entity_loader);
906     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
907         xml_failure(parser);
908 }
909 END_TEST
910 
START_TEST(test_wfc_no_recursive_entity_refs)911 START_TEST(test_wfc_no_recursive_entity_refs)
912 {
913     char *text =
914         "<!DOCTYPE doc [\n"
915         "  <!ENTITY entity '&#38;entity;'>\n"
916         "]>\n"
917         "<doc>&entity;</doc>";
918 
919     expect_failure(text,
920                    XML_ERROR_RECURSIVE_ENTITY_REF,
921                    "Parser did not report recursive entity reference.");
922 }
923 END_TEST
924 
925 /* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)926 START_TEST(test_dtd_default_handling)
927 {
928     char *text =
929         "<!DOCTYPE doc [\n"
930         "<!ENTITY e SYSTEM 'http://xml.libexpat.org/e'>\n"
931         "<!NOTATION n SYSTEM 'http://xml.libexpat.org/n'>\n"
932         "<!ELEMENT doc EMPTY>\n"
933         "<!ATTLIST doc a CDATA #IMPLIED>\n"
934         "<?pi in dtd?>\n"
935         "<!--comment in dtd-->\n"
936         "]><doc/>";
937 
938     XML_SetDefaultHandler(parser, accumulate_characters);
939     XML_SetDoctypeDeclHandler(parser,
940                               dummy_start_doctype_handler,
941                               dummy_end_doctype_handler);
942     XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler);
943     XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler);
944     XML_SetElementDeclHandler(parser, dummy_element_decl_handler);
945     XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler);
946     XML_SetProcessingInstructionHandler(parser, dummy_pi_handler);
947     XML_SetCommentHandler(parser, dummy_comment_handler);
948     run_character_check(text, "\n\n\n\n\n\n\n<doc/>");
949 }
950 END_TEST
951 
952 /* See related SF bug #673791.
953    When namespace processing is enabled, setting the namespace URI for
954    a prefix is not allowed; this test ensures that it *is* allowed
955    when namespace processing is not enabled.
956    (See Namespaces in XML, section 2.)
957 */
START_TEST(test_empty_ns_without_namespaces)958 START_TEST(test_empty_ns_without_namespaces)
959 {
960     char *text =
961         "<doc xmlns:prefix='http://www.example.com/'>\n"
962         "  <e xmlns:prefix=''/>\n"
963         "</doc>";
964 
965     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
966         xml_failure(parser);
967 }
968 END_TEST
969 
970 /* Regression test for SF bug #824420.
971    Checks that an xmlns:prefix attribute set in an attribute's default
972    value isn't misinterpreted.
973 */
START_TEST(test_ns_in_attribute_default_without_namespaces)974 START_TEST(test_ns_in_attribute_default_without_namespaces)
975 {
976     char *text =
977         "<!DOCTYPE e:element [\n"
978         "  <!ATTLIST e:element\n"
979         "    xmlns:e CDATA 'http://example.com/'>\n"
980         "      ]>\n"
981         "<e:element/>";
982 
983     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
984         xml_failure(parser);
985 }
986 END_TEST
987 
988 static char *long_character_data_text =
989     "<?xml version='1.0' encoding='iso-8859-1'?><s>"
990     "012345678901234567890123456789012345678901234567890123456789"
991     "012345678901234567890123456789012345678901234567890123456789"
992     "012345678901234567890123456789012345678901234567890123456789"
993     "012345678901234567890123456789012345678901234567890123456789"
994     "012345678901234567890123456789012345678901234567890123456789"
995     "012345678901234567890123456789012345678901234567890123456789"
996     "012345678901234567890123456789012345678901234567890123456789"
997     "012345678901234567890123456789012345678901234567890123456789"
998     "012345678901234567890123456789012345678901234567890123456789"
999     "012345678901234567890123456789012345678901234567890123456789"
1000     "012345678901234567890123456789012345678901234567890123456789"
1001     "012345678901234567890123456789012345678901234567890123456789"
1002     "012345678901234567890123456789012345678901234567890123456789"
1003     "012345678901234567890123456789012345678901234567890123456789"
1004     "012345678901234567890123456789012345678901234567890123456789"
1005     "012345678901234567890123456789012345678901234567890123456789"
1006     "012345678901234567890123456789012345678901234567890123456789"
1007     "012345678901234567890123456789012345678901234567890123456789"
1008     "012345678901234567890123456789012345678901234567890123456789"
1009     "012345678901234567890123456789012345678901234567890123456789"
1010     "</s>";
1011 
1012 static XML_Bool resumable = XML_FALSE;
1013 
1014 static void
clearing_aborting_character_handler(void * userData,const XML_Char * s,int len)1015 clearing_aborting_character_handler(void *userData,
1016                                     const XML_Char *s, int len)
1017 {
1018     XML_StopParser(parser, resumable);
1019     XML_SetCharacterDataHandler(parser, NULL);
1020 }
1021 
1022 /* Regression test for SF bug #1515266: missing check of stopped
1023    parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)1024 START_TEST(test_stop_parser_between_char_data_calls)
1025 {
1026     /* The sample data must be big enough that there are two calls to
1027        the character data handler from within the inner "for" loop of
1028        the XML_TOK_DATA_CHARS case in doContent(), and the character
1029        handler must stop the parser and clear the character data
1030        handler.
1031     */
1032     char *text = long_character_data_text;
1033 
1034     XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
1035     resumable = XML_FALSE;
1036     if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
1037         xml_failure(parser);
1038     if (XML_GetErrorCode(parser) != XML_ERROR_ABORTED)
1039         xml_failure(parser);
1040 }
1041 END_TEST
1042 
1043 /* Regression test for SF bug #1515266: missing check of stopped
1044    parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)1045 START_TEST(test_suspend_parser_between_char_data_calls)
1046 {
1047     /* The sample data must be big enough that there are two calls to
1048        the character data handler from within the inner "for" loop of
1049        the XML_TOK_DATA_CHARS case in doContent(), and the character
1050        handler must stop the parser and clear the character data
1051        handler.
1052     */
1053     char *text = long_character_data_text;
1054 
1055     XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
1056     resumable = XML_TRUE;
1057     if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED)
1058         xml_failure(parser);
1059     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
1060         xml_failure(parser);
1061 }
1062 END_TEST
1063 
1064 
1065 /*
1066  * Namespaces tests.
1067  */
1068 
1069 static void
namespace_setup(void)1070 namespace_setup(void)
1071 {
1072     parser = XML_ParserCreateNS(NULL, ' ');
1073     if (parser == NULL)
1074         fail("Parser not created.");
1075 }
1076 
1077 static void
namespace_teardown(void)1078 namespace_teardown(void)
1079 {
1080     basic_teardown();
1081 }
1082 
1083 /* Check that an element name and attribute name match the expected values.
1084    The expected values are passed as an array reference of string pointers
1085    provided as the userData argument; the first is the expected
1086    element name, and the second is the expected attribute name.
1087 */
1088 static void XMLCALL
triplet_start_checker(void * userData,const XML_Char * name,const XML_Char ** atts)1089 triplet_start_checker(void *userData, const XML_Char *name,
1090                       const XML_Char **atts)
1091 {
1092     char **elemstr = (char **)userData;
1093     char buffer[1024];
1094     if (strcmp(elemstr[0], name) != 0) {
1095         sprintf(buffer, "unexpected start string: '%s'", name);
1096         fail(buffer);
1097     }
1098     if (strcmp(elemstr[1], atts[0]) != 0) {
1099         sprintf(buffer, "unexpected attribute string: '%s'", atts[0]);
1100         fail(buffer);
1101     }
1102 }
1103 
1104 /* Check that the element name passed to the end-element handler matches
1105    the expected value.  The expected value is passed as the first element
1106    in an array of strings passed as the userData argument.
1107 */
1108 static void XMLCALL
triplet_end_checker(void * userData,const XML_Char * name)1109 triplet_end_checker(void *userData, const XML_Char *name)
1110 {
1111     char **elemstr = (char **)userData;
1112     if (strcmp(elemstr[0], name) != 0) {
1113         char buffer[1024];
1114         sprintf(buffer, "unexpected end string: '%s'", name);
1115         fail(buffer);
1116     }
1117 }
1118 
START_TEST(test_return_ns_triplet)1119 START_TEST(test_return_ns_triplet)
1120 {
1121     char *text =
1122         "<foo:e xmlns:foo='http://expat.sf.net/' bar:a='12'\n"
1123         "       xmlns:bar='http://expat.sf.net/'></foo:e>";
1124     char *elemstr[] = {
1125         "http://expat.sf.net/ e foo",
1126         "http://expat.sf.net/ a bar"
1127     };
1128     XML_SetReturnNSTriplet(parser, XML_TRUE);
1129     XML_SetUserData(parser, elemstr);
1130     XML_SetElementHandler(parser, triplet_start_checker, triplet_end_checker);
1131     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1132         xml_failure(parser);
1133 }
1134 END_TEST
1135 
1136 static void XMLCALL
overwrite_start_checker(void * userData,const XML_Char * name,const XML_Char ** atts)1137 overwrite_start_checker(void *userData, const XML_Char *name,
1138                         const XML_Char **atts)
1139 {
1140     CharData *storage = (CharData *) userData;
1141     CharData_AppendString(storage, "start ");
1142     CharData_AppendXMLChars(storage, name, -1);
1143     while (*atts != NULL) {
1144         CharData_AppendString(storage, "\nattribute ");
1145         CharData_AppendXMLChars(storage, *atts, -1);
1146         atts += 2;
1147     }
1148     CharData_AppendString(storage, "\n");
1149 }
1150 
1151 static void XMLCALL
overwrite_end_checker(void * userData,const XML_Char * name)1152 overwrite_end_checker(void *userData, const XML_Char *name)
1153 {
1154     CharData *storage = (CharData *) userData;
1155     CharData_AppendString(storage, "end ");
1156     CharData_AppendXMLChars(storage, name, -1);
1157     CharData_AppendString(storage, "\n");
1158 }
1159 
1160 static void
run_ns_tagname_overwrite_test(char * text,char * result)1161 run_ns_tagname_overwrite_test(char *text, char *result)
1162 {
1163     CharData storage;
1164     CharData_Init(&storage);
1165     XML_SetUserData(parser, &storage);
1166     XML_SetElementHandler(parser,
1167                           overwrite_start_checker, overwrite_end_checker);
1168     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1169         xml_failure(parser);
1170     CharData_CheckString(&storage, result);
1171 }
1172 
1173 /* Regression test for SF bug #566334. */
START_TEST(test_ns_tagname_overwrite)1174 START_TEST(test_ns_tagname_overwrite)
1175 {
1176     char *text =
1177         "<n:e xmlns:n='http://xml.libexpat.org/'>\n"
1178         "  <n:f n:attr='foo'/>\n"
1179         "  <n:g n:attr2='bar'/>\n"
1180         "</n:e>";
1181     char *result =
1182         "start http://xml.libexpat.org/ e\n"
1183         "start http://xml.libexpat.org/ f\n"
1184         "attribute http://xml.libexpat.org/ attr\n"
1185         "end http://xml.libexpat.org/ f\n"
1186         "start http://xml.libexpat.org/ g\n"
1187         "attribute http://xml.libexpat.org/ attr2\n"
1188         "end http://xml.libexpat.org/ g\n"
1189         "end http://xml.libexpat.org/ e\n";
1190     run_ns_tagname_overwrite_test(text, result);
1191 }
1192 END_TEST
1193 
1194 /* Regression test for SF bug #566334. */
START_TEST(test_ns_tagname_overwrite_triplet)1195 START_TEST(test_ns_tagname_overwrite_triplet)
1196 {
1197     char *text =
1198         "<n:e xmlns:n='http://xml.libexpat.org/'>\n"
1199         "  <n:f n:attr='foo'/>\n"
1200         "  <n:g n:attr2='bar'/>\n"
1201         "</n:e>";
1202     char *result =
1203         "start http://xml.libexpat.org/ e n\n"
1204         "start http://xml.libexpat.org/ f n\n"
1205         "attribute http://xml.libexpat.org/ attr n\n"
1206         "end http://xml.libexpat.org/ f n\n"
1207         "start http://xml.libexpat.org/ g n\n"
1208         "attribute http://xml.libexpat.org/ attr2 n\n"
1209         "end http://xml.libexpat.org/ g n\n"
1210         "end http://xml.libexpat.org/ e n\n";
1211     XML_SetReturnNSTriplet(parser, XML_TRUE);
1212     run_ns_tagname_overwrite_test(text, result);
1213 }
1214 END_TEST
1215 
1216 
1217 /* Regression test for SF bug #620343. */
1218 static void XMLCALL
start_element_fail(void * userData,const XML_Char * name,const XML_Char ** atts)1219 start_element_fail(void *userData,
1220                    const XML_Char *name, const XML_Char **atts)
1221 {
1222     /* We should never get here. */
1223     fail("should never reach start_element_fail()");
1224 }
1225 
1226 static void XMLCALL
start_ns_clearing_start_element(void * userData,const XML_Char * prefix,const XML_Char * uri)1227 start_ns_clearing_start_element(void *userData,
1228                                 const XML_Char *prefix,
1229                                 const XML_Char *uri)
1230 {
1231     XML_SetStartElementHandler((XML_Parser) userData, NULL);
1232 }
1233 
START_TEST(test_start_ns_clears_start_element)1234 START_TEST(test_start_ns_clears_start_element)
1235 {
1236     /* This needs to use separate start/end tags; using the empty tag
1237        syntax doesn't cause the problematic path through Expat to be
1238        taken.
1239     */
1240     char *text = "<e xmlns='http://xml.libexpat.org/'></e>";
1241 
1242     XML_SetStartElementHandler(parser, start_element_fail);
1243     XML_SetStartNamespaceDeclHandler(parser, start_ns_clearing_start_element);
1244     XML_UseParserAsHandlerArg(parser);
1245     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1246         xml_failure(parser);
1247 }
1248 END_TEST
1249 
1250 /* Regression test for SF bug #616863. */
1251 static int XMLCALL
external_entity_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)1252 external_entity_handler(XML_Parser parser,
1253                         const XML_Char *context,
1254                         const XML_Char *base,
1255                         const XML_Char *systemId,
1256                         const XML_Char *publicId)
1257 {
1258     intptr_t callno = 1 + (intptr_t)XML_GetUserData(parser);
1259     char *text;
1260     XML_Parser p2;
1261 
1262     if (callno == 1)
1263         text = ("<!ELEMENT doc (e+)>\n"
1264                 "<!ATTLIST doc xmlns CDATA #IMPLIED>\n"
1265                 "<!ELEMENT e EMPTY>\n");
1266     else
1267         text = ("<?xml version='1.0' encoding='us-ascii'?>"
1268                 "<e/>");
1269 
1270     XML_SetUserData(parser, (void *) callno);
1271     p2 = XML_ExternalEntityParserCreate(parser, context, NULL);
1272     if (XML_Parse(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) {
1273         xml_failure(p2);
1274         return 0;
1275     }
1276     XML_ParserFree(p2);
1277     return 1;
1278 }
1279 
START_TEST(test_default_ns_from_ext_subset_and_ext_ge)1280 START_TEST(test_default_ns_from_ext_subset_and_ext_ge)
1281 {
1282     char *text =
1283         "<?xml version='1.0'?>\n"
1284         "<!DOCTYPE doc SYSTEM 'http://xml.libexpat.org/doc.dtd' [\n"
1285         "  <!ENTITY en SYSTEM 'http://xml.libexpat.org/entity.ent'>\n"
1286         "]>\n"
1287         "<doc xmlns='http://xml.libexpat.org/ns1'>\n"
1288         "&en;\n"
1289         "</doc>";
1290 
1291     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1292     XML_SetExternalEntityRefHandler(parser, external_entity_handler);
1293     /* We actually need to set this handler to tickle this bug. */
1294     XML_SetStartElementHandler(parser, dummy_start_element);
1295     XML_SetUserData(parser, NULL);
1296     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1297         xml_failure(parser);
1298 }
1299 END_TEST
1300 
1301 /* Regression test #1 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_1)1302 START_TEST(test_ns_prefix_with_empty_uri_1)
1303 {
1304     char *text =
1305         "<doc xmlns:prefix='http://xml.libexpat.org/'>\n"
1306         "  <e xmlns:prefix=''/>\n"
1307         "</doc>";
1308 
1309     expect_failure(text,
1310                    XML_ERROR_UNDECLARING_PREFIX,
1311                    "Did not report re-setting namespace"
1312                    " URI with prefix to ''.");
1313 }
1314 END_TEST
1315 
1316 /* Regression test #2 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_2)1317 START_TEST(test_ns_prefix_with_empty_uri_2)
1318 {
1319     char *text =
1320         "<?xml version='1.0'?>\n"
1321         "<docelem xmlns:pre=''/>";
1322 
1323     expect_failure(text,
1324                    XML_ERROR_UNDECLARING_PREFIX,
1325                    "Did not report setting namespace URI with prefix to ''.");
1326 }
1327 END_TEST
1328 
1329 /* Regression test #3 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_3)1330 START_TEST(test_ns_prefix_with_empty_uri_3)
1331 {
1332     char *text =
1333         "<!DOCTYPE doc [\n"
1334         "  <!ELEMENT doc EMPTY>\n"
1335         "  <!ATTLIST doc\n"
1336         "    xmlns:prefix CDATA ''>\n"
1337         "]>\n"
1338         "<doc/>";
1339 
1340     expect_failure(text,
1341                    XML_ERROR_UNDECLARING_PREFIX,
1342                    "Didn't report attr default setting NS w/ prefix to ''.");
1343 }
1344 END_TEST
1345 
1346 /* Regression test #4 for SF bug #673791. */
START_TEST(test_ns_prefix_with_empty_uri_4)1347 START_TEST(test_ns_prefix_with_empty_uri_4)
1348 {
1349     char *text =
1350         "<!DOCTYPE doc [\n"
1351         "  <!ELEMENT prefix:doc EMPTY>\n"
1352         "  <!ATTLIST prefix:doc\n"
1353         "    xmlns:prefix CDATA 'http://xml.libexpat.org/'>\n"
1354         "]>\n"
1355         "<prefix:doc/>";
1356     /* Packaged info expected by the end element handler;
1357        the weird structuring lets us re-use the triplet_end_checker()
1358        function also used for another test. */
1359     char *elemstr[] = {
1360         "http://xml.libexpat.org/ doc prefix"
1361     };
1362     XML_SetReturnNSTriplet(parser, XML_TRUE);
1363     XML_SetUserData(parser, elemstr);
1364     XML_SetEndElementHandler(parser, triplet_end_checker);
1365     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1366         xml_failure(parser);
1367 }
1368 END_TEST
1369 
START_TEST(test_ns_default_with_empty_uri)1370 START_TEST(test_ns_default_with_empty_uri)
1371 {
1372     char *text =
1373         "<doc xmlns='http://xml.libexpat.org/'>\n"
1374         "  <e xmlns=''/>\n"
1375         "</doc>";
1376     if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1377         xml_failure(parser);
1378 }
1379 END_TEST
1380 
1381 /* Regression test for SF bug #692964: two prefixes for one namespace. */
START_TEST(test_ns_duplicate_attrs_diff_prefixes)1382 START_TEST(test_ns_duplicate_attrs_diff_prefixes)
1383 {
1384     char *text =
1385         "<doc xmlns:a='http://xml.libexpat.org/a'\n"
1386         "     xmlns:b='http://xml.libexpat.org/a'\n"
1387         "     a:a='v' b:a='v' />";
1388     expect_failure(text,
1389                    XML_ERROR_DUPLICATE_ATTRIBUTE,
1390                    "did not report multiple attributes with same URI+name");
1391 }
1392 END_TEST
1393 
1394 /* Regression test for SF bug #695401: unbound prefix. */
START_TEST(test_ns_unbound_prefix_on_attribute)1395 START_TEST(test_ns_unbound_prefix_on_attribute)
1396 {
1397     char *text = "<doc a:attr=''/>";
1398     expect_failure(text,
1399                    XML_ERROR_UNBOUND_PREFIX,
1400                    "did not report unbound prefix on attribute");
1401 }
1402 END_TEST
1403 
1404 /* Regression test for SF bug #695401: unbound prefix. */
START_TEST(test_ns_unbound_prefix_on_element)1405 START_TEST(test_ns_unbound_prefix_on_element)
1406 {
1407     char *text = "<a:doc/>";
1408     expect_failure(text,
1409                    XML_ERROR_UNBOUND_PREFIX,
1410                    "did not report unbound prefix on element");
1411 }
1412 END_TEST
1413 
1414 static Suite *
make_suite(void)1415 make_suite(void)
1416 {
1417     Suite *s = suite_create("basic");
1418     TCase *tc_basic = tcase_create("basic tests");
1419     TCase *tc_namespace = tcase_create("XML namespaces");
1420 
1421     suite_add_tcase(s, tc_basic);
1422     tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
1423     tcase_add_test(tc_basic, test_nul_byte);
1424     tcase_add_test(tc_basic, test_u0000_char);
1425     tcase_add_test(tc_basic, test_bom_utf8);
1426     tcase_add_test(tc_basic, test_bom_utf16_be);
1427     tcase_add_test(tc_basic, test_bom_utf16_le);
1428     tcase_add_test(tc_basic, test_illegal_utf8);
1429     tcase_add_test(tc_basic, test_utf16);
1430     tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
1431     tcase_add_test(tc_basic, test_latin1_umlauts);
1432     /* Regression test for SF bug #491986. */
1433     tcase_add_test(tc_basic, test_danish_latin1);
1434     /* Regression test for SF bug #514281. */
1435     tcase_add_test(tc_basic, test_french_charref_hexidecimal);
1436     tcase_add_test(tc_basic, test_french_charref_decimal);
1437     tcase_add_test(tc_basic, test_french_latin1);
1438     tcase_add_test(tc_basic, test_french_utf8);
1439     tcase_add_test(tc_basic, test_utf8_false_rejection);
1440     tcase_add_test(tc_basic, test_line_number_after_parse);
1441     tcase_add_test(tc_basic, test_column_number_after_parse);
1442     tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
1443     tcase_add_test(tc_basic, test_line_number_after_error);
1444     tcase_add_test(tc_basic, test_column_number_after_error);
1445     tcase_add_test(tc_basic, test_really_long_lines);
1446     tcase_add_test(tc_basic, test_end_element_events);
1447     tcase_add_test(tc_basic, test_attr_whitespace_normalization);
1448     tcase_add_test(tc_basic, test_xmldecl_misplaced);
1449     tcase_add_test(tc_basic, test_unknown_encoding_internal_entity);
1450     tcase_add_test(tc_basic,
1451                    test_wfc_undeclared_entity_unread_external_subset);
1452     tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
1453     tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
1454     tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
1455     tcase_add_test(tc_basic,
1456                    test_wfc_undeclared_entity_with_external_subset_standalone);
1457     tcase_add_test(tc_basic, test_wfc_no_recursive_entity_refs);
1458     tcase_add_test(tc_basic, test_ext_entity_set_encoding);
1459     tcase_add_test(tc_basic, test_dtd_default_handling);
1460     tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
1461     tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
1462     tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
1463     tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
1464 
1465     suite_add_tcase(s, tc_namespace);
1466     tcase_add_checked_fixture(tc_namespace,
1467                               namespace_setup, namespace_teardown);
1468     tcase_add_test(tc_namespace, test_return_ns_triplet);
1469     tcase_add_test(tc_namespace, test_ns_tagname_overwrite);
1470     tcase_add_test(tc_namespace, test_ns_tagname_overwrite_triplet);
1471     tcase_add_test(tc_namespace, test_start_ns_clears_start_element);
1472     tcase_add_test(tc_namespace, test_default_ns_from_ext_subset_and_ext_ge);
1473     tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_1);
1474     tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_2);
1475     tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_3);
1476     tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_4);
1477     tcase_add_test(tc_namespace, test_ns_default_with_empty_uri);
1478     tcase_add_test(tc_namespace, test_ns_duplicate_attrs_diff_prefixes);
1479     tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_attribute);
1480     tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_element);
1481 
1482     return s;
1483 }
1484 
1485 
1486 int
main(int argc,char * argv[])1487 main(int argc, char *argv[])
1488 {
1489     int i, nf;
1490     int verbosity = CK_NORMAL;
1491     Suite *s = make_suite();
1492     SRunner *sr = srunner_create(s);
1493 
1494     /* run the tests for internal helper functions */
1495     testhelper_is_whitespace_normalized();
1496 
1497     for (i = 1; i < argc; ++i) {
1498         char *opt = argv[i];
1499         if (strcmp(opt, "-v") == 0 || strcmp(opt, "--verbose") == 0)
1500             verbosity = CK_VERBOSE;
1501         else if (strcmp(opt, "-q") == 0 || strcmp(opt, "--quiet") == 0)
1502             verbosity = CK_SILENT;
1503         else {
1504             fprintf(stderr, "runtests: unknown option '%s'\n", opt);
1505             return 2;
1506         }
1507     }
1508     if (verbosity != CK_SILENT)
1509         printf("Expat version: %s\n", XML_ExpatVersion());
1510     srunner_run_all(sr, verbosity);
1511     nf = srunner_ntests_failed(sr);
1512     srunner_free(sr);
1513 
1514     return (nf == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
1515 }
1516