1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 #define IN_LIBXML
34 #include "libxml.h"
35
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <string.h>
45 #include <stdarg.h>
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
60 #endif
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
64 #endif
65 #ifdef HAVE_CTYPE_H
66 #include <ctype.h>
67 #endif
68 #ifdef HAVE_STDLIB_H
69 #include <stdlib.h>
70 #endif
71 #ifdef HAVE_SYS_STAT_H
72 #include <sys/stat.h>
73 #endif
74 #ifdef HAVE_FCNTL_H
75 #include <fcntl.h>
76 #endif
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 #ifdef HAVE_ZLIB_H
81 #include <zlib.h>
82 #endif
83 #ifdef HAVE_LZMA_H
84 #include <lzma.h>
85 #endif
86
87 #include "buf.h"
88 #include "enc.h"
89
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
97 /************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103 #define XML_PARSER_BIG_ENTITY 1000
104 #define XML_PARSER_LOT_ENTITY 5000
105
106 /*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112 #define XML_PARSER_NON_LINEAR 10
113
114 /*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)124 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125 xmlEntityPtr ent, size_t replacement)
126 {
127 size_t consumed = 0;
128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
133
134 /*
135 * This may look absurd but is needed to detect
136 * entities problems
137 */
138 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
139 (ent->content != NULL) && (ent->checked == 0)) {
140 unsigned long oldnbent = ctxt->nbentities;
141 xmlChar *rep;
142
143 ent->checked = 1;
144
145 rep = xmlStringDecodeEntities(ctxt, ent->content,
146 XML_SUBSTITUTE_REF, 0, 0, 0);
147
148 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
149 if (rep != NULL) {
150 if (xmlStrchr(rep, '<'))
151 ent->checked |= 1;
152 xmlFree(rep);
153 rep = NULL;
154 }
155 }
156 if (replacement != 0) {
157 if (replacement < XML_MAX_TEXT_LENGTH)
158 return(0);
159
160 /*
161 * If the volume of entity copy reaches 10 times the
162 * amount of parsed data and over the large text threshold
163 * then that's very likely to be an abuse.
164 */
165 if (ctxt->input != NULL) {
166 consumed = ctxt->input->consumed +
167 (ctxt->input->cur - ctxt->input->base);
168 }
169 consumed += ctxt->sizeentities;
170
171 if (replacement < XML_PARSER_NON_LINEAR * consumed)
172 return(0);
173 } else if (size != 0) {
174 /*
175 * Do the check based on the replacement size of the entity
176 */
177 if (size < XML_PARSER_BIG_ENTITY)
178 return(0);
179
180 /*
181 * A limit on the amount of text data reasonably used
182 */
183 if (ctxt->input != NULL) {
184 consumed = ctxt->input->consumed +
185 (ctxt->input->cur - ctxt->input->base);
186 }
187 consumed += ctxt->sizeentities;
188
189 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
190 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
191 return (0);
192 } else if (ent != NULL) {
193 /*
194 * use the number of parsed entities in the replacement
195 */
196 size = ent->checked / 2;
197
198 /*
199 * The amount of data parsed counting entities size only once
200 */
201 if (ctxt->input != NULL) {
202 consumed = ctxt->input->consumed +
203 (ctxt->input->cur - ctxt->input->base);
204 }
205 consumed += ctxt->sizeentities;
206
207 /*
208 * Check the density of entities for the amount of data
209 * knowing an entity reference will take at least 3 bytes
210 */
211 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
212 return (0);
213 } else {
214 /*
215 * strange we got no data for checking
216 */
217 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
218 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
219 (ctxt->nbentities <= 10000))
220 return (0);
221 }
222 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
223 return (1);
224 }
225
226 /**
227 * xmlParserMaxDepth:
228 *
229 * arbitrary depth limit for the XML documents that we allow to
230 * process. This is not a limitation of the parser but a safety
231 * boundary feature. It can be disabled with the XML_PARSE_HUGE
232 * parser option.
233 */
234 unsigned int xmlParserMaxDepth = 256;
235
236
237
238 #define SAX2 1
239 #define XML_PARSER_BIG_BUFFER_SIZE 300
240 #define XML_PARSER_BUFFER_SIZE 100
241 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
242
243 /**
244 * XML_PARSER_CHUNK_SIZE
245 *
246 * When calling GROW that's the minimal amount of data
247 * the parser expected to have received. It is not a hard
248 * limit but an optimization when reading strings like Names
249 * It is not strictly needed as long as inputs available characters
250 * are followed by 0, which should be provided by the I/O level
251 */
252 #define XML_PARSER_CHUNK_SIZE 100
253
254 /*
255 * List of XML prefixed PI allowed by W3C specs
256 */
257
258 static const char *xmlW3CPIs[] = {
259 "xml-stylesheet",
260 "xml-model",
261 NULL
262 };
263
264
265 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
266 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
267 const xmlChar **str);
268
269 static xmlParserErrors
270 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
271 xmlSAXHandlerPtr sax,
272 void *user_data, int depth, const xmlChar *URL,
273 const xmlChar *ID, xmlNodePtr *list);
274
275 static int
276 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
277 const char *encoding);
278 #ifdef LIBXML_LEGACY_ENABLED
279 static void
280 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
281 xmlNodePtr lastNode);
282 #endif /* LIBXML_LEGACY_ENABLED */
283
284 static xmlParserErrors
285 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
286 const xmlChar *string, void *user_data, xmlNodePtr *lst);
287
288 static int
289 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
290
291 /************************************************************************
292 * *
293 * Some factorized error routines *
294 * *
295 ************************************************************************/
296
297 /**
298 * xmlErrAttributeDup:
299 * @ctxt: an XML parser context
300 * @prefix: the attribute prefix
301 * @localname: the attribute localname
302 *
303 * Handle a redefinition of attribute error
304 */
305 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)306 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
307 const xmlChar * localname)
308 {
309 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
310 (ctxt->instate == XML_PARSER_EOF))
311 return;
312 if (ctxt != NULL)
313 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
314
315 if (prefix == NULL)
316 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
317 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
318 (const char *) localname, NULL, NULL, 0, 0,
319 "Attribute %s redefined\n", localname);
320 else
321 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
322 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
323 (const char *) prefix, (const char *) localname,
324 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
325 localname);
326 if (ctxt != NULL) {
327 ctxt->wellFormed = 0;
328 if (ctxt->recovery == 0)
329 ctxt->disableSAX = 1;
330 }
331 }
332
333 /**
334 * xmlFatalErr:
335 * @ctxt: an XML parser context
336 * @error: the error number
337 * @extra: extra information string
338 *
339 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
340 */
341 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)342 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
343 {
344 const char *errmsg;
345 char errstr[129] = "";
346
347 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
348 (ctxt->instate == XML_PARSER_EOF))
349 return;
350 switch (error) {
351 case XML_ERR_INVALID_HEX_CHARREF:
352 errmsg = "CharRef: invalid hexadecimal value";
353 break;
354 case XML_ERR_INVALID_DEC_CHARREF:
355 errmsg = "CharRef: invalid decimal value";
356 break;
357 case XML_ERR_INVALID_CHARREF:
358 errmsg = "CharRef: invalid value";
359 break;
360 case XML_ERR_INTERNAL_ERROR:
361 errmsg = "internal error";
362 break;
363 case XML_ERR_PEREF_AT_EOF:
364 errmsg = "PEReference at end of document";
365 break;
366 case XML_ERR_PEREF_IN_PROLOG:
367 errmsg = "PEReference in prolog";
368 break;
369 case XML_ERR_PEREF_IN_EPILOG:
370 errmsg = "PEReference in epilog";
371 break;
372 case XML_ERR_PEREF_NO_NAME:
373 errmsg = "PEReference: no name";
374 break;
375 case XML_ERR_PEREF_SEMICOL_MISSING:
376 errmsg = "PEReference: expecting ';'";
377 break;
378 case XML_ERR_ENTITY_LOOP:
379 errmsg = "Detected an entity reference loop";
380 break;
381 case XML_ERR_ENTITY_NOT_STARTED:
382 errmsg = "EntityValue: \" or ' expected";
383 break;
384 case XML_ERR_ENTITY_PE_INTERNAL:
385 errmsg = "PEReferences forbidden in internal subset";
386 break;
387 case XML_ERR_ENTITY_NOT_FINISHED:
388 errmsg = "EntityValue: \" or ' expected";
389 break;
390 case XML_ERR_ATTRIBUTE_NOT_STARTED:
391 errmsg = "AttValue: \" or ' expected";
392 break;
393 case XML_ERR_LT_IN_ATTRIBUTE:
394 errmsg = "Unescaped '<' not allowed in attributes values";
395 break;
396 case XML_ERR_LITERAL_NOT_STARTED:
397 errmsg = "SystemLiteral \" or ' expected";
398 break;
399 case XML_ERR_LITERAL_NOT_FINISHED:
400 errmsg = "Unfinished System or Public ID \" or ' expected";
401 break;
402 case XML_ERR_MISPLACED_CDATA_END:
403 errmsg = "Sequence ']]>' not allowed in content";
404 break;
405 case XML_ERR_URI_REQUIRED:
406 errmsg = "SYSTEM or PUBLIC, the URI is missing";
407 break;
408 case XML_ERR_PUBID_REQUIRED:
409 errmsg = "PUBLIC, the Public Identifier is missing";
410 break;
411 case XML_ERR_HYPHEN_IN_COMMENT:
412 errmsg = "Comment must not contain '--' (double-hyphen)";
413 break;
414 case XML_ERR_PI_NOT_STARTED:
415 errmsg = "xmlParsePI : no target name";
416 break;
417 case XML_ERR_RESERVED_XML_NAME:
418 errmsg = "Invalid PI name";
419 break;
420 case XML_ERR_NOTATION_NOT_STARTED:
421 errmsg = "NOTATION: Name expected here";
422 break;
423 case XML_ERR_NOTATION_NOT_FINISHED:
424 errmsg = "'>' required to close NOTATION declaration";
425 break;
426 case XML_ERR_VALUE_REQUIRED:
427 errmsg = "Entity value required";
428 break;
429 case XML_ERR_URI_FRAGMENT:
430 errmsg = "Fragment not allowed";
431 break;
432 case XML_ERR_ATTLIST_NOT_STARTED:
433 errmsg = "'(' required to start ATTLIST enumeration";
434 break;
435 case XML_ERR_NMTOKEN_REQUIRED:
436 errmsg = "NmToken expected in ATTLIST enumeration";
437 break;
438 case XML_ERR_ATTLIST_NOT_FINISHED:
439 errmsg = "')' required to finish ATTLIST enumeration";
440 break;
441 case XML_ERR_MIXED_NOT_STARTED:
442 errmsg = "MixedContentDecl : '|' or ')*' expected";
443 break;
444 case XML_ERR_PCDATA_REQUIRED:
445 errmsg = "MixedContentDecl : '#PCDATA' expected";
446 break;
447 case XML_ERR_ELEMCONTENT_NOT_STARTED:
448 errmsg = "ContentDecl : Name or '(' expected";
449 break;
450 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
451 errmsg = "ContentDecl : ',' '|' or ')' expected";
452 break;
453 case XML_ERR_PEREF_IN_INT_SUBSET:
454 errmsg =
455 "PEReference: forbidden within markup decl in internal subset";
456 break;
457 case XML_ERR_GT_REQUIRED:
458 errmsg = "expected '>'";
459 break;
460 case XML_ERR_CONDSEC_INVALID:
461 errmsg = "XML conditional section '[' expected";
462 break;
463 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
464 errmsg = "Content error in the external subset";
465 break;
466 case XML_ERR_CONDSEC_INVALID_KEYWORD:
467 errmsg =
468 "conditional section INCLUDE or IGNORE keyword expected";
469 break;
470 case XML_ERR_CONDSEC_NOT_FINISHED:
471 errmsg = "XML conditional section not closed";
472 break;
473 case XML_ERR_XMLDECL_NOT_STARTED:
474 errmsg = "Text declaration '<?xml' required";
475 break;
476 case XML_ERR_XMLDECL_NOT_FINISHED:
477 errmsg = "parsing XML declaration: '?>' expected";
478 break;
479 case XML_ERR_EXT_ENTITY_STANDALONE:
480 errmsg = "external parsed entities cannot be standalone";
481 break;
482 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
483 errmsg = "EntityRef: expecting ';'";
484 break;
485 case XML_ERR_DOCTYPE_NOT_FINISHED:
486 errmsg = "DOCTYPE improperly terminated";
487 break;
488 case XML_ERR_LTSLASH_REQUIRED:
489 errmsg = "EndTag: '</' not found";
490 break;
491 case XML_ERR_EQUAL_REQUIRED:
492 errmsg = "expected '='";
493 break;
494 case XML_ERR_STRING_NOT_CLOSED:
495 errmsg = "String not closed expecting \" or '";
496 break;
497 case XML_ERR_STRING_NOT_STARTED:
498 errmsg = "String not started expecting ' or \"";
499 break;
500 case XML_ERR_ENCODING_NAME:
501 errmsg = "Invalid XML encoding name";
502 break;
503 case XML_ERR_STANDALONE_VALUE:
504 errmsg = "standalone accepts only 'yes' or 'no'";
505 break;
506 case XML_ERR_DOCUMENT_EMPTY:
507 errmsg = "Document is empty";
508 break;
509 case XML_ERR_DOCUMENT_END:
510 errmsg = "Extra content at the end of the document";
511 break;
512 case XML_ERR_NOT_WELL_BALANCED:
513 errmsg = "chunk is not well balanced";
514 break;
515 case XML_ERR_EXTRA_CONTENT:
516 errmsg = "extra content at the end of well balanced chunk";
517 break;
518 case XML_ERR_VERSION_MISSING:
519 errmsg = "Malformed declaration expecting version";
520 break;
521 case XML_ERR_NAME_TOO_LONG:
522 errmsg = "Name too long use XML_PARSE_HUGE option";
523 break;
524 #if 0
525 case:
526 errmsg = "";
527 break;
528 #endif
529 default:
530 errmsg = "Unregistered error message";
531 }
532 if (info == NULL)
533 snprintf(errstr, 128, "%s\n", errmsg);
534 else
535 snprintf(errstr, 128, "%s: %%s\n", errmsg);
536 if (ctxt != NULL)
537 ctxt->errNo = error;
538 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
539 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
540 info);
541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
545 }
546 }
547
548 /**
549 * xmlFatalErrMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 *
554 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
555 */
556 static void
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)557 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
558 const char *msg)
559 {
560 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
561 (ctxt->instate == XML_PARSER_EOF))
562 return;
563 if (ctxt != NULL)
564 ctxt->errNo = error;
565 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
566 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
567 if (ctxt != NULL) {
568 ctxt->wellFormed = 0;
569 if (ctxt->recovery == 0)
570 ctxt->disableSAX = 1;
571 }
572 }
573
574 /**
575 * xmlWarningMsg:
576 * @ctxt: an XML parser context
577 * @error: the error number
578 * @msg: the error message
579 * @str1: extra data
580 * @str2: extra data
581 *
582 * Handle a warning.
583 */
584 static void
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)585 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
586 const char *msg, const xmlChar *str1, const xmlChar *str2)
587 {
588 xmlStructuredErrorFunc schannel = NULL;
589
590 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
591 (ctxt->instate == XML_PARSER_EOF))
592 return;
593 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
594 (ctxt->sax->initialized == XML_SAX2_MAGIC))
595 schannel = ctxt->sax->serror;
596 if (ctxt != NULL) {
597 __xmlRaiseError(schannel,
598 (ctxt->sax) ? ctxt->sax->warning : NULL,
599 ctxt->userData,
600 ctxt, NULL, XML_FROM_PARSER, error,
601 XML_ERR_WARNING, NULL, 0,
602 (const char *) str1, (const char *) str2, NULL, 0, 0,
603 msg, (const char *) str1, (const char *) str2);
604 } else {
605 __xmlRaiseError(schannel, NULL, NULL,
606 ctxt, NULL, XML_FROM_PARSER, error,
607 XML_ERR_WARNING, NULL, 0,
608 (const char *) str1, (const char *) str2, NULL, 0, 0,
609 msg, (const char *) str1, (const char *) str2);
610 }
611 }
612
613 /**
614 * xmlValidityError:
615 * @ctxt: an XML parser context
616 * @error: the error number
617 * @msg: the error message
618 * @str1: extra data
619 *
620 * Handle a validity error.
621 */
622 static void
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)623 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
624 const char *msg, const xmlChar *str1, const xmlChar *str2)
625 {
626 xmlStructuredErrorFunc schannel = NULL;
627
628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
631 if (ctxt != NULL) {
632 ctxt->errNo = error;
633 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
634 schannel = ctxt->sax->serror;
635 }
636 if (ctxt != NULL) {
637 __xmlRaiseError(schannel,
638 ctxt->vctxt.error, ctxt->vctxt.userData,
639 ctxt, NULL, XML_FROM_DTD, error,
640 XML_ERR_ERROR, NULL, 0, (const char *) str1,
641 (const char *) str2, NULL, 0, 0,
642 msg, (const char *) str1, (const char *) str2);
643 ctxt->valid = 0;
644 } else {
645 __xmlRaiseError(schannel, NULL, NULL,
646 ctxt, NULL, XML_FROM_DTD, error,
647 XML_ERR_ERROR, NULL, 0, (const char *) str1,
648 (const char *) str2, NULL, 0, 0,
649 msg, (const char *) str1, (const char *) str2);
650 }
651 }
652
653 /**
654 * xmlFatalErrMsgInt:
655 * @ctxt: an XML parser context
656 * @error: the error number
657 * @msg: the error message
658 * @val: an integer value
659 *
660 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
661 */
662 static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)663 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
664 const char *msg, int val)
665 {
666 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
667 (ctxt->instate == XML_PARSER_EOF))
668 return;
669 if (ctxt != NULL)
670 ctxt->errNo = error;
671 __xmlRaiseError(NULL, NULL, NULL,
672 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
673 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
674 if (ctxt != NULL) {
675 ctxt->wellFormed = 0;
676 if (ctxt->recovery == 0)
677 ctxt->disableSAX = 1;
678 }
679 }
680
681 /**
682 * xmlFatalErrMsgStrIntStr:
683 * @ctxt: an XML parser context
684 * @error: the error number
685 * @msg: the error message
686 * @str1: an string info
687 * @val: an integer value
688 * @str2: an string info
689 *
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691 */
692 static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)693 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg, const xmlChar *str1, int val,
695 const xmlChar *str2)
696 {
697 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
698 (ctxt->instate == XML_PARSER_EOF))
699 return;
700 if (ctxt != NULL)
701 ctxt->errNo = error;
702 __xmlRaiseError(NULL, NULL, NULL,
703 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
704 NULL, 0, (const char *) str1, (const char *) str2,
705 NULL, val, 0, msg, str1, val, str2);
706 if (ctxt != NULL) {
707 ctxt->wellFormed = 0;
708 if (ctxt->recovery == 0)
709 ctxt->disableSAX = 1;
710 }
711 }
712
713 /**
714 * xmlFatalErrMsgStr:
715 * @ctxt: an XML parser context
716 * @error: the error number
717 * @msg: the error message
718 * @val: a string value
719 *
720 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
721 */
722 static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)723 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
724 const char *msg, const xmlChar * val)
725 {
726 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
727 (ctxt->instate == XML_PARSER_EOF))
728 return;
729 if (ctxt != NULL)
730 ctxt->errNo = error;
731 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
732 XML_FROM_PARSER, error, XML_ERR_FATAL,
733 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
734 val);
735 if (ctxt != NULL) {
736 ctxt->wellFormed = 0;
737 if (ctxt->recovery == 0)
738 ctxt->disableSAX = 1;
739 }
740 }
741
742 /**
743 * xmlErrMsgStr:
744 * @ctxt: an XML parser context
745 * @error: the error number
746 * @msg: the error message
747 * @val: a string value
748 *
749 * Handle a non fatal parser error
750 */
751 static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)752 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
753 const char *msg, const xmlChar * val)
754 {
755 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
756 (ctxt->instate == XML_PARSER_EOF))
757 return;
758 if (ctxt != NULL)
759 ctxt->errNo = error;
760 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
761 XML_FROM_PARSER, error, XML_ERR_ERROR,
762 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
763 val);
764 }
765
766 /**
767 * xmlNsErr:
768 * @ctxt: an XML parser context
769 * @error: the error number
770 * @msg: the message
771 * @info1: extra information string
772 * @info2: extra information string
773 *
774 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
775 */
776 static void
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)777 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
778 const char *msg,
779 const xmlChar * info1, const xmlChar * info2,
780 const xmlChar * info3)
781 {
782 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
783 (ctxt->instate == XML_PARSER_EOF))
784 return;
785 if (ctxt != NULL)
786 ctxt->errNo = error;
787 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
788 XML_ERR_ERROR, NULL, 0, (const char *) info1,
789 (const char *) info2, (const char *) info3, 0, 0, msg,
790 info1, info2, info3);
791 if (ctxt != NULL)
792 ctxt->nsWellFormed = 0;
793 }
794
795 /**
796 * xmlNsWarn
797 * @ctxt: an XML parser context
798 * @error: the error number
799 * @msg: the message
800 * @info1: extra information string
801 * @info2: extra information string
802 *
803 * Handle a namespace warning error
804 */
805 static void
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)806 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
807 const char *msg,
808 const xmlChar * info1, const xmlChar * info2,
809 const xmlChar * info3)
810 {
811 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
812 (ctxt->instate == XML_PARSER_EOF))
813 return;
814 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
815 XML_ERR_WARNING, NULL, 0, (const char *) info1,
816 (const char *) info2, (const char *) info3, 0, 0, msg,
817 info1, info2, info3);
818 }
819
820 /************************************************************************
821 * *
822 * Library wide options *
823 * *
824 ************************************************************************/
825
826 /**
827 * xmlHasFeature:
828 * @feature: the feature to be examined
829 *
830 * Examines if the library has been compiled with a given feature.
831 *
832 * Returns a non-zero value if the feature exist, otherwise zero.
833 * Returns zero (0) if the feature does not exist or an unknown
834 * unknown feature is requested, non-zero otherwise.
835 */
836 int
xmlHasFeature(xmlFeature feature)837 xmlHasFeature(xmlFeature feature)
838 {
839 switch (feature) {
840 case XML_WITH_THREAD:
841 #ifdef LIBXML_THREAD_ENABLED
842 return(1);
843 #else
844 return(0);
845 #endif
846 case XML_WITH_TREE:
847 #ifdef LIBXML_TREE_ENABLED
848 return(1);
849 #else
850 return(0);
851 #endif
852 case XML_WITH_OUTPUT:
853 #ifdef LIBXML_OUTPUT_ENABLED
854 return(1);
855 #else
856 return(0);
857 #endif
858 case XML_WITH_PUSH:
859 #ifdef LIBXML_PUSH_ENABLED
860 return(1);
861 #else
862 return(0);
863 #endif
864 case XML_WITH_READER:
865 #ifdef LIBXML_READER_ENABLED
866 return(1);
867 #else
868 return(0);
869 #endif
870 case XML_WITH_PATTERN:
871 #ifdef LIBXML_PATTERN_ENABLED
872 return(1);
873 #else
874 return(0);
875 #endif
876 case XML_WITH_WRITER:
877 #ifdef LIBXML_WRITER_ENABLED
878 return(1);
879 #else
880 return(0);
881 #endif
882 case XML_WITH_SAX1:
883 #ifdef LIBXML_SAX1_ENABLED
884 return(1);
885 #else
886 return(0);
887 #endif
888 case XML_WITH_FTP:
889 #ifdef LIBXML_FTP_ENABLED
890 return(1);
891 #else
892 return(0);
893 #endif
894 case XML_WITH_HTTP:
895 #ifdef LIBXML_HTTP_ENABLED
896 return(1);
897 #else
898 return(0);
899 #endif
900 case XML_WITH_VALID:
901 #ifdef LIBXML_VALID_ENABLED
902 return(1);
903 #else
904 return(0);
905 #endif
906 case XML_WITH_HTML:
907 #ifdef LIBXML_HTML_ENABLED
908 return(1);
909 #else
910 return(0);
911 #endif
912 case XML_WITH_LEGACY:
913 #ifdef LIBXML_LEGACY_ENABLED
914 return(1);
915 #else
916 return(0);
917 #endif
918 case XML_WITH_C14N:
919 #ifdef LIBXML_C14N_ENABLED
920 return(1);
921 #else
922 return(0);
923 #endif
924 case XML_WITH_CATALOG:
925 #ifdef LIBXML_CATALOG_ENABLED
926 return(1);
927 #else
928 return(0);
929 #endif
930 case XML_WITH_XPATH:
931 #ifdef LIBXML_XPATH_ENABLED
932 return(1);
933 #else
934 return(0);
935 #endif
936 case XML_WITH_XPTR:
937 #ifdef LIBXML_XPTR_ENABLED
938 return(1);
939 #else
940 return(0);
941 #endif
942 case XML_WITH_XINCLUDE:
943 #ifdef LIBXML_XINCLUDE_ENABLED
944 return(1);
945 #else
946 return(0);
947 #endif
948 case XML_WITH_ICONV:
949 #ifdef LIBXML_ICONV_ENABLED
950 return(1);
951 #else
952 return(0);
953 #endif
954 case XML_WITH_ISO8859X:
955 #ifdef LIBXML_ISO8859X_ENABLED
956 return(1);
957 #else
958 return(0);
959 #endif
960 case XML_WITH_UNICODE:
961 #ifdef LIBXML_UNICODE_ENABLED
962 return(1);
963 #else
964 return(0);
965 #endif
966 case XML_WITH_REGEXP:
967 #ifdef LIBXML_REGEXP_ENABLED
968 return(1);
969 #else
970 return(0);
971 #endif
972 case XML_WITH_AUTOMATA:
973 #ifdef LIBXML_AUTOMATA_ENABLED
974 return(1);
975 #else
976 return(0);
977 #endif
978 case XML_WITH_EXPR:
979 #ifdef LIBXML_EXPR_ENABLED
980 return(1);
981 #else
982 return(0);
983 #endif
984 case XML_WITH_SCHEMAS:
985 #ifdef LIBXML_SCHEMAS_ENABLED
986 return(1);
987 #else
988 return(0);
989 #endif
990 case XML_WITH_SCHEMATRON:
991 #ifdef LIBXML_SCHEMATRON_ENABLED
992 return(1);
993 #else
994 return(0);
995 #endif
996 case XML_WITH_MODULES:
997 #ifdef LIBXML_MODULES_ENABLED
998 return(1);
999 #else
1000 return(0);
1001 #endif
1002 case XML_WITH_DEBUG:
1003 #ifdef LIBXML_DEBUG_ENABLED
1004 return(1);
1005 #else
1006 return(0);
1007 #endif
1008 case XML_WITH_DEBUG_MEM:
1009 #ifdef DEBUG_MEMORY_LOCATION
1010 return(1);
1011 #else
1012 return(0);
1013 #endif
1014 case XML_WITH_DEBUG_RUN:
1015 #ifdef LIBXML_DEBUG_RUNTIME
1016 return(1);
1017 #else
1018 return(0);
1019 #endif
1020 case XML_WITH_ZLIB:
1021 #ifdef LIBXML_ZLIB_ENABLED
1022 return(1);
1023 #else
1024 return(0);
1025 #endif
1026 case XML_WITH_LZMA:
1027 #ifdef LIBXML_LZMA_ENABLED
1028 return(1);
1029 #else
1030 return(0);
1031 #endif
1032 case XML_WITH_ICU:
1033 #ifdef LIBXML_ICU_ENABLED
1034 return(1);
1035 #else
1036 return(0);
1037 #endif
1038 default:
1039 break;
1040 }
1041 return(0);
1042 }
1043
1044 /************************************************************************
1045 * *
1046 * SAX2 defaulted attributes handling *
1047 * *
1048 ************************************************************************/
1049
1050 /**
1051 * xmlDetectSAX2:
1052 * @ctxt: an XML parser context
1053 *
1054 * Do the SAX2 detection and specific intialization
1055 */
1056 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1057 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1058 if (ctxt == NULL) return;
1059 #ifdef LIBXML_SAX1_ENABLED
1060 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1061 ((ctxt->sax->startElementNs != NULL) ||
1062 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1063 #else
1064 ctxt->sax2 = 1;
1065 #endif /* LIBXML_SAX1_ENABLED */
1066
1067 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1068 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1069 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1070 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1071 (ctxt->str_xml_ns == NULL)) {
1072 xmlErrMemory(ctxt, NULL);
1073 }
1074 }
1075
1076 typedef struct _xmlDefAttrs xmlDefAttrs;
1077 typedef xmlDefAttrs *xmlDefAttrsPtr;
1078 struct _xmlDefAttrs {
1079 int nbAttrs; /* number of defaulted attributes on that element */
1080 int maxAttrs; /* the size of the array */
1081 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1082 };
1083
1084 /**
1085 * xmlAttrNormalizeSpace:
1086 * @src: the source string
1087 * @dst: the target string
1088 *
1089 * Normalize the space in non CDATA attribute values:
1090 * If the attribute type is not CDATA, then the XML processor MUST further
1091 * process the normalized attribute value by discarding any leading and
1092 * trailing space (#x20) characters, and by replacing sequences of space
1093 * (#x20) characters by a single space (#x20) character.
1094 * Note that the size of dst need to be at least src, and if one doesn't need
1095 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1096 * passing src as dst is just fine.
1097 *
1098 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1099 * is needed.
1100 */
1101 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1102 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1103 {
1104 if ((src == NULL) || (dst == NULL))
1105 return(NULL);
1106
1107 while (*src == 0x20) src++;
1108 while (*src != 0) {
1109 if (*src == 0x20) {
1110 while (*src == 0x20) src++;
1111 if (*src != 0)
1112 *dst++ = 0x20;
1113 } else {
1114 *dst++ = *src++;
1115 }
1116 }
1117 *dst = 0;
1118 if (dst == src)
1119 return(NULL);
1120 return(dst);
1121 }
1122
1123 /**
1124 * xmlAttrNormalizeSpace2:
1125 * @src: the source string
1126 *
1127 * Normalize the space in non CDATA attribute values, a slightly more complex
1128 * front end to avoid allocation problems when running on attribute values
1129 * coming from the input.
1130 *
1131 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1132 * is needed.
1133 */
1134 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1135 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1136 {
1137 int i;
1138 int remove_head = 0;
1139 int need_realloc = 0;
1140 const xmlChar *cur;
1141
1142 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1143 return(NULL);
1144 i = *len;
1145 if (i <= 0)
1146 return(NULL);
1147
1148 cur = src;
1149 while (*cur == 0x20) {
1150 cur++;
1151 remove_head++;
1152 }
1153 while (*cur != 0) {
1154 if (*cur == 0x20) {
1155 cur++;
1156 if ((*cur == 0x20) || (*cur == 0)) {
1157 need_realloc = 1;
1158 break;
1159 }
1160 } else
1161 cur++;
1162 }
1163 if (need_realloc) {
1164 xmlChar *ret;
1165
1166 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1167 if (ret == NULL) {
1168 xmlErrMemory(ctxt, NULL);
1169 return(NULL);
1170 }
1171 xmlAttrNormalizeSpace(ret, ret);
1172 *len = (int) strlen((const char *)ret);
1173 return(ret);
1174 } else if (remove_head) {
1175 *len -= remove_head;
1176 memmove(src, src + remove_head, 1 + *len);
1177 return(src);
1178 }
1179 return(NULL);
1180 }
1181
1182 /**
1183 * xmlAddDefAttrs:
1184 * @ctxt: an XML parser context
1185 * @fullname: the element fullname
1186 * @fullattr: the attribute fullname
1187 * @value: the attribute value
1188 *
1189 * Add a defaulted attribute for an element
1190 */
1191 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1192 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1193 const xmlChar *fullname,
1194 const xmlChar *fullattr,
1195 const xmlChar *value) {
1196 xmlDefAttrsPtr defaults;
1197 int len;
1198 const xmlChar *name;
1199 const xmlChar *prefix;
1200
1201 /*
1202 * Allows to detect attribute redefinitions
1203 */
1204 if (ctxt->attsSpecial != NULL) {
1205 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1206 return;
1207 }
1208
1209 if (ctxt->attsDefault == NULL) {
1210 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1211 if (ctxt->attsDefault == NULL)
1212 goto mem_error;
1213 }
1214
1215 /*
1216 * split the element name into prefix:localname , the string found
1217 * are within the DTD and then not associated to namespace names.
1218 */
1219 name = xmlSplitQName3(fullname, &len);
1220 if (name == NULL) {
1221 name = xmlDictLookup(ctxt->dict, fullname, -1);
1222 prefix = NULL;
1223 } else {
1224 name = xmlDictLookup(ctxt->dict, name, -1);
1225 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1226 }
1227
1228 /*
1229 * make sure there is some storage
1230 */
1231 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1232 if (defaults == NULL) {
1233 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1234 (4 * 5) * sizeof(const xmlChar *));
1235 if (defaults == NULL)
1236 goto mem_error;
1237 defaults->nbAttrs = 0;
1238 defaults->maxAttrs = 4;
1239 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1240 defaults, NULL) < 0) {
1241 xmlFree(defaults);
1242 goto mem_error;
1243 }
1244 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1245 xmlDefAttrsPtr temp;
1246
1247 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1248 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1249 if (temp == NULL)
1250 goto mem_error;
1251 defaults = temp;
1252 defaults->maxAttrs *= 2;
1253 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 defaults, NULL) < 0) {
1255 xmlFree(defaults);
1256 goto mem_error;
1257 }
1258 }
1259
1260 /*
1261 * Split the element name into prefix:localname , the string found
1262 * are within the DTD and hen not associated to namespace names.
1263 */
1264 name = xmlSplitQName3(fullattr, &len);
1265 if (name == NULL) {
1266 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1267 prefix = NULL;
1268 } else {
1269 name = xmlDictLookup(ctxt->dict, name, -1);
1270 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1271 }
1272
1273 defaults->values[5 * defaults->nbAttrs] = name;
1274 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1275 /* intern the string and precompute the end */
1276 len = xmlStrlen(value);
1277 value = xmlDictLookup(ctxt->dict, value, len);
1278 defaults->values[5 * defaults->nbAttrs + 2] = value;
1279 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1280 if (ctxt->external)
1281 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1282 else
1283 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1284 defaults->nbAttrs++;
1285
1286 return;
1287
1288 mem_error:
1289 xmlErrMemory(ctxt, NULL);
1290 return;
1291 }
1292
1293 /**
1294 * xmlAddSpecialAttr:
1295 * @ctxt: an XML parser context
1296 * @fullname: the element fullname
1297 * @fullattr: the attribute fullname
1298 * @type: the attribute type
1299 *
1300 * Register this attribute type
1301 */
1302 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1303 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1304 const xmlChar *fullname,
1305 const xmlChar *fullattr,
1306 int type)
1307 {
1308 if (ctxt->attsSpecial == NULL) {
1309 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1310 if (ctxt->attsSpecial == NULL)
1311 goto mem_error;
1312 }
1313
1314 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1315 return;
1316
1317 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1318 (void *) (long) type);
1319 return;
1320
1321 mem_error:
1322 xmlErrMemory(ctxt, NULL);
1323 return;
1324 }
1325
1326 /**
1327 * xmlCleanSpecialAttrCallback:
1328 *
1329 * Removes CDATA attributes from the special attribute table
1330 */
1331 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1332 xmlCleanSpecialAttrCallback(void *payload, void *data,
1333 const xmlChar *fullname, const xmlChar *fullattr,
1334 const xmlChar *unused ATTRIBUTE_UNUSED) {
1335 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1336
1337 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1338 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1339 }
1340 }
1341
1342 /**
1343 * xmlCleanSpecialAttr:
1344 * @ctxt: an XML parser context
1345 *
1346 * Trim the list of attributes defined to remove all those of type
1347 * CDATA as they are not special. This call should be done when finishing
1348 * to parse the DTD and before starting to parse the document root.
1349 */
1350 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1351 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1352 {
1353 if (ctxt->attsSpecial == NULL)
1354 return;
1355
1356 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1357
1358 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1359 xmlHashFree(ctxt->attsSpecial, NULL);
1360 ctxt->attsSpecial = NULL;
1361 }
1362 return;
1363 }
1364
1365 /**
1366 * xmlCheckLanguageID:
1367 * @lang: pointer to the string value
1368 *
1369 * Checks that the value conforms to the LanguageID production:
1370 *
1371 * NOTE: this is somewhat deprecated, those productions were removed from
1372 * the XML Second edition.
1373 *
1374 * [33] LanguageID ::= Langcode ('-' Subcode)*
1375 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1376 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1377 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1378 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1379 * [38] Subcode ::= ([a-z] | [A-Z])+
1380 *
1381 * The current REC reference the sucessors of RFC 1766, currently 5646
1382 *
1383 * http://www.rfc-editor.org/rfc/rfc5646.txt
1384 * langtag = language
1385 * ["-" script]
1386 * ["-" region]
1387 * *("-" variant)
1388 * *("-" extension)
1389 * ["-" privateuse]
1390 * language = 2*3ALPHA ; shortest ISO 639 code
1391 * ["-" extlang] ; sometimes followed by
1392 * ; extended language subtags
1393 * / 4ALPHA ; or reserved for future use
1394 * / 5*8ALPHA ; or registered language subtag
1395 *
1396 * extlang = 3ALPHA ; selected ISO 639 codes
1397 * *2("-" 3ALPHA) ; permanently reserved
1398 *
1399 * script = 4ALPHA ; ISO 15924 code
1400 *
1401 * region = 2ALPHA ; ISO 3166-1 code
1402 * / 3DIGIT ; UN M.49 code
1403 *
1404 * variant = 5*8alphanum ; registered variants
1405 * / (DIGIT 3alphanum)
1406 *
1407 * extension = singleton 1*("-" (2*8alphanum))
1408 *
1409 * ; Single alphanumerics
1410 * ; "x" reserved for private use
1411 * singleton = DIGIT ; 0 - 9
1412 * / %x41-57 ; A - W
1413 * / %x59-5A ; Y - Z
1414 * / %x61-77 ; a - w
1415 * / %x79-7A ; y - z
1416 *
1417 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1418 * The parser below doesn't try to cope with extension or privateuse
1419 * that could be added but that's not interoperable anyway
1420 *
1421 * Returns 1 if correct 0 otherwise
1422 **/
1423 int
xmlCheckLanguageID(const xmlChar * lang)1424 xmlCheckLanguageID(const xmlChar * lang)
1425 {
1426 const xmlChar *cur = lang, *nxt;
1427
1428 if (cur == NULL)
1429 return (0);
1430 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1431 ((cur[0] == 'I') && (cur[1] == '-')) ||
1432 ((cur[0] == 'x') && (cur[1] == '-')) ||
1433 ((cur[0] == 'X') && (cur[1] == '-'))) {
1434 /*
1435 * Still allow IANA code and user code which were coming
1436 * from the previous version of the XML-1.0 specification
1437 * it's deprecated but we should not fail
1438 */
1439 cur += 2;
1440 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1441 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1442 cur++;
1443 return(cur[0] == 0);
1444 }
1445 nxt = cur;
1446 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1447 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1448 nxt++;
1449 if (nxt - cur >= 4) {
1450 /*
1451 * Reserved
1452 */
1453 if ((nxt - cur > 8) || (nxt[0] != 0))
1454 return(0);
1455 return(1);
1456 }
1457 if (nxt - cur < 2)
1458 return(0);
1459 /* we got an ISO 639 code */
1460 if (nxt[0] == 0)
1461 return(1);
1462 if (nxt[0] != '-')
1463 return(0);
1464
1465 nxt++;
1466 cur = nxt;
1467 /* now we can have extlang or script or region or variant */
1468 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1469 goto region_m49;
1470
1471 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1472 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1473 nxt++;
1474 if (nxt - cur == 4)
1475 goto script;
1476 if (nxt - cur == 2)
1477 goto region;
1478 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1479 goto variant;
1480 if (nxt - cur != 3)
1481 return(0);
1482 /* we parsed an extlang */
1483 if (nxt[0] == 0)
1484 return(1);
1485 if (nxt[0] != '-')
1486 return(0);
1487
1488 nxt++;
1489 cur = nxt;
1490 /* now we can have script or region or variant */
1491 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1492 goto region_m49;
1493
1494 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1495 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1496 nxt++;
1497 if (nxt - cur == 2)
1498 goto region;
1499 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1500 goto variant;
1501 if (nxt - cur != 4)
1502 return(0);
1503 /* we parsed a script */
1504 script:
1505 if (nxt[0] == 0)
1506 return(1);
1507 if (nxt[0] != '-')
1508 return(0);
1509
1510 nxt++;
1511 cur = nxt;
1512 /* now we can have region or variant */
1513 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1514 goto region_m49;
1515
1516 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1517 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1518 nxt++;
1519
1520 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1521 goto variant;
1522 if (nxt - cur != 2)
1523 return(0);
1524 /* we parsed a region */
1525 region:
1526 if (nxt[0] == 0)
1527 return(1);
1528 if (nxt[0] != '-')
1529 return(0);
1530
1531 nxt++;
1532 cur = nxt;
1533 /* now we can just have a variant */
1534 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1535 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1536 nxt++;
1537
1538 if ((nxt - cur < 5) || (nxt - cur > 8))
1539 return(0);
1540
1541 /* we parsed a variant */
1542 variant:
1543 if (nxt[0] == 0)
1544 return(1);
1545 if (nxt[0] != '-')
1546 return(0);
1547 /* extensions and private use subtags not checked */
1548 return (1);
1549
1550 region_m49:
1551 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1552 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1553 nxt += 3;
1554 goto region;
1555 }
1556 return(0);
1557 }
1558
1559 /************************************************************************
1560 * *
1561 * Parser stacks related functions and macros *
1562 * *
1563 ************************************************************************/
1564
1565 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1566 const xmlChar ** str);
1567
1568 #ifdef SAX2
1569 /**
1570 * nsPush:
1571 * @ctxt: an XML parser context
1572 * @prefix: the namespace prefix or NULL
1573 * @URL: the namespace name
1574 *
1575 * Pushes a new parser namespace on top of the ns stack
1576 *
1577 * Returns -1 in case of error, -2 if the namespace should be discarded
1578 * and the index in the stack otherwise.
1579 */
1580 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1581 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1582 {
1583 if (ctxt->options & XML_PARSE_NSCLEAN) {
1584 int i;
1585 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1586 if (ctxt->nsTab[i] == prefix) {
1587 /* in scope */
1588 if (ctxt->nsTab[i + 1] == URL)
1589 return(-2);
1590 /* out of scope keep it */
1591 break;
1592 }
1593 }
1594 }
1595 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1596 ctxt->nsMax = 10;
1597 ctxt->nsNr = 0;
1598 ctxt->nsTab = (const xmlChar **)
1599 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1600 if (ctxt->nsTab == NULL) {
1601 xmlErrMemory(ctxt, NULL);
1602 ctxt->nsMax = 0;
1603 return (-1);
1604 }
1605 } else if (ctxt->nsNr >= ctxt->nsMax) {
1606 const xmlChar ** tmp;
1607 ctxt->nsMax *= 2;
1608 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1609 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1610 if (tmp == NULL) {
1611 xmlErrMemory(ctxt, NULL);
1612 ctxt->nsMax /= 2;
1613 return (-1);
1614 }
1615 ctxt->nsTab = tmp;
1616 }
1617 ctxt->nsTab[ctxt->nsNr++] = prefix;
1618 ctxt->nsTab[ctxt->nsNr++] = URL;
1619 return (ctxt->nsNr);
1620 }
1621 /**
1622 * nsPop:
1623 * @ctxt: an XML parser context
1624 * @nr: the number to pop
1625 *
1626 * Pops the top @nr parser prefix/namespace from the ns stack
1627 *
1628 * Returns the number of namespaces removed
1629 */
1630 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1631 nsPop(xmlParserCtxtPtr ctxt, int nr)
1632 {
1633 int i;
1634
1635 if (ctxt->nsTab == NULL) return(0);
1636 if (ctxt->nsNr < nr) {
1637 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1638 nr = ctxt->nsNr;
1639 }
1640 if (ctxt->nsNr <= 0)
1641 return (0);
1642
1643 for (i = 0;i < nr;i++) {
1644 ctxt->nsNr--;
1645 ctxt->nsTab[ctxt->nsNr] = NULL;
1646 }
1647 return(nr);
1648 }
1649 #endif
1650
1651 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1652 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1653 const xmlChar **atts;
1654 int *attallocs;
1655 int maxatts;
1656
1657 if (ctxt->atts == NULL) {
1658 maxatts = 55; /* allow for 10 attrs by default */
1659 atts = (const xmlChar **)
1660 xmlMalloc(maxatts * sizeof(xmlChar *));
1661 if (atts == NULL) goto mem_error;
1662 ctxt->atts = atts;
1663 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1664 if (attallocs == NULL) goto mem_error;
1665 ctxt->attallocs = attallocs;
1666 ctxt->maxatts = maxatts;
1667 } else if (nr + 5 > ctxt->maxatts) {
1668 maxatts = (nr + 5) * 2;
1669 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1670 maxatts * sizeof(const xmlChar *));
1671 if (atts == NULL) goto mem_error;
1672 ctxt->atts = atts;
1673 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1674 (maxatts / 5) * sizeof(int));
1675 if (attallocs == NULL) goto mem_error;
1676 ctxt->attallocs = attallocs;
1677 ctxt->maxatts = maxatts;
1678 }
1679 return(ctxt->maxatts);
1680 mem_error:
1681 xmlErrMemory(ctxt, NULL);
1682 return(-1);
1683 }
1684
1685 /**
1686 * inputPush:
1687 * @ctxt: an XML parser context
1688 * @value: the parser input
1689 *
1690 * Pushes a new parser input on top of the input stack
1691 *
1692 * Returns -1 in case of error, the index in the stack otherwise
1693 */
1694 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1695 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1696 {
1697 if ((ctxt == NULL) || (value == NULL))
1698 return(-1);
1699 if (ctxt->inputNr >= ctxt->inputMax) {
1700 ctxt->inputMax *= 2;
1701 ctxt->inputTab =
1702 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1703 ctxt->inputMax *
1704 sizeof(ctxt->inputTab[0]));
1705 if (ctxt->inputTab == NULL) {
1706 xmlErrMemory(ctxt, NULL);
1707 xmlFreeInputStream(value);
1708 ctxt->inputMax /= 2;
1709 value = NULL;
1710 return (-1);
1711 }
1712 }
1713 ctxt->inputTab[ctxt->inputNr] = value;
1714 ctxt->input = value;
1715 return (ctxt->inputNr++);
1716 }
1717 /**
1718 * inputPop:
1719 * @ctxt: an XML parser context
1720 *
1721 * Pops the top parser input from the input stack
1722 *
1723 * Returns the input just removed
1724 */
1725 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1726 inputPop(xmlParserCtxtPtr ctxt)
1727 {
1728 xmlParserInputPtr ret;
1729
1730 if (ctxt == NULL)
1731 return(NULL);
1732 if (ctxt->inputNr <= 0)
1733 return (NULL);
1734 ctxt->inputNr--;
1735 if (ctxt->inputNr > 0)
1736 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1737 else
1738 ctxt->input = NULL;
1739 ret = ctxt->inputTab[ctxt->inputNr];
1740 ctxt->inputTab[ctxt->inputNr] = NULL;
1741 return (ret);
1742 }
1743 /**
1744 * nodePush:
1745 * @ctxt: an XML parser context
1746 * @value: the element node
1747 *
1748 * Pushes a new element node on top of the node stack
1749 *
1750 * Returns -1 in case of error, the index in the stack otherwise
1751 */
1752 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1753 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1754 {
1755 if (ctxt == NULL) return(0);
1756 if (ctxt->nodeNr >= ctxt->nodeMax) {
1757 xmlNodePtr *tmp;
1758
1759 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1760 ctxt->nodeMax * 2 *
1761 sizeof(ctxt->nodeTab[0]));
1762 if (tmp == NULL) {
1763 xmlErrMemory(ctxt, NULL);
1764 return (-1);
1765 }
1766 ctxt->nodeTab = tmp;
1767 ctxt->nodeMax *= 2;
1768 }
1769 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1770 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1771 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1772 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1773 xmlParserMaxDepth);
1774 ctxt->instate = XML_PARSER_EOF;
1775 return(-1);
1776 }
1777 ctxt->nodeTab[ctxt->nodeNr] = value;
1778 ctxt->node = value;
1779 return (ctxt->nodeNr++);
1780 }
1781
1782 /**
1783 * nodePop:
1784 * @ctxt: an XML parser context
1785 *
1786 * Pops the top element node from the node stack
1787 *
1788 * Returns the node just removed
1789 */
1790 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1791 nodePop(xmlParserCtxtPtr ctxt)
1792 {
1793 xmlNodePtr ret;
1794
1795 if (ctxt == NULL) return(NULL);
1796 if (ctxt->nodeNr <= 0)
1797 return (NULL);
1798 ctxt->nodeNr--;
1799 if (ctxt->nodeNr > 0)
1800 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1801 else
1802 ctxt->node = NULL;
1803 ret = ctxt->nodeTab[ctxt->nodeNr];
1804 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1805 return (ret);
1806 }
1807
1808 #ifdef LIBXML_PUSH_ENABLED
1809 /**
1810 * nameNsPush:
1811 * @ctxt: an XML parser context
1812 * @value: the element name
1813 * @prefix: the element prefix
1814 * @URI: the element namespace name
1815 *
1816 * Pushes a new element name/prefix/URL on top of the name stack
1817 *
1818 * Returns -1 in case of error, the index in the stack otherwise
1819 */
1820 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1821 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1822 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1823 {
1824 if (ctxt->nameNr >= ctxt->nameMax) {
1825 const xmlChar * *tmp;
1826 void **tmp2;
1827 ctxt->nameMax *= 2;
1828 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1829 ctxt->nameMax *
1830 sizeof(ctxt->nameTab[0]));
1831 if (tmp == NULL) {
1832 ctxt->nameMax /= 2;
1833 goto mem_error;
1834 }
1835 ctxt->nameTab = tmp;
1836 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1837 ctxt->nameMax * 3 *
1838 sizeof(ctxt->pushTab[0]));
1839 if (tmp2 == NULL) {
1840 ctxt->nameMax /= 2;
1841 goto mem_error;
1842 }
1843 ctxt->pushTab = tmp2;
1844 }
1845 ctxt->nameTab[ctxt->nameNr] = value;
1846 ctxt->name = value;
1847 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1848 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1849 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1850 return (ctxt->nameNr++);
1851 mem_error:
1852 xmlErrMemory(ctxt, NULL);
1853 return (-1);
1854 }
1855 /**
1856 * nameNsPop:
1857 * @ctxt: an XML parser context
1858 *
1859 * Pops the top element/prefix/URI name from the name stack
1860 *
1861 * Returns the name just removed
1862 */
1863 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1864 nameNsPop(xmlParserCtxtPtr ctxt)
1865 {
1866 const xmlChar *ret;
1867
1868 if (ctxt->nameNr <= 0)
1869 return (NULL);
1870 ctxt->nameNr--;
1871 if (ctxt->nameNr > 0)
1872 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1873 else
1874 ctxt->name = NULL;
1875 ret = ctxt->nameTab[ctxt->nameNr];
1876 ctxt->nameTab[ctxt->nameNr] = NULL;
1877 return (ret);
1878 }
1879 #endif /* LIBXML_PUSH_ENABLED */
1880
1881 /**
1882 * namePush:
1883 * @ctxt: an XML parser context
1884 * @value: the element name
1885 *
1886 * Pushes a new element name on top of the name stack
1887 *
1888 * Returns -1 in case of error, the index in the stack otherwise
1889 */
1890 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1891 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1892 {
1893 if (ctxt == NULL) return (-1);
1894
1895 if (ctxt->nameNr >= ctxt->nameMax) {
1896 const xmlChar * *tmp;
1897 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1898 ctxt->nameMax * 2 *
1899 sizeof(ctxt->nameTab[0]));
1900 if (tmp == NULL) {
1901 goto mem_error;
1902 }
1903 ctxt->nameTab = tmp;
1904 ctxt->nameMax *= 2;
1905 }
1906 ctxt->nameTab[ctxt->nameNr] = value;
1907 ctxt->name = value;
1908 return (ctxt->nameNr++);
1909 mem_error:
1910 xmlErrMemory(ctxt, NULL);
1911 return (-1);
1912 }
1913 /**
1914 * namePop:
1915 * @ctxt: an XML parser context
1916 *
1917 * Pops the top element name from the name stack
1918 *
1919 * Returns the name just removed
1920 */
1921 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1922 namePop(xmlParserCtxtPtr ctxt)
1923 {
1924 const xmlChar *ret;
1925
1926 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1927 return (NULL);
1928 ctxt->nameNr--;
1929 if (ctxt->nameNr > 0)
1930 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1931 else
1932 ctxt->name = NULL;
1933 ret = ctxt->nameTab[ctxt->nameNr];
1934 ctxt->nameTab[ctxt->nameNr] = NULL;
1935 return (ret);
1936 }
1937
spacePush(xmlParserCtxtPtr ctxt,int val)1938 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1939 if (ctxt->spaceNr >= ctxt->spaceMax) {
1940 int *tmp;
1941
1942 ctxt->spaceMax *= 2;
1943 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1944 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1945 if (tmp == NULL) {
1946 xmlErrMemory(ctxt, NULL);
1947 ctxt->spaceMax /=2;
1948 return(-1);
1949 }
1950 ctxt->spaceTab = tmp;
1951 }
1952 ctxt->spaceTab[ctxt->spaceNr] = val;
1953 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1954 return(ctxt->spaceNr++);
1955 }
1956
spacePop(xmlParserCtxtPtr ctxt)1957 static int spacePop(xmlParserCtxtPtr ctxt) {
1958 int ret;
1959 if (ctxt->spaceNr <= 0) return(0);
1960 ctxt->spaceNr--;
1961 if (ctxt->spaceNr > 0)
1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1963 else
1964 ctxt->space = &ctxt->spaceTab[0];
1965 ret = ctxt->spaceTab[ctxt->spaceNr];
1966 ctxt->spaceTab[ctxt->spaceNr] = -1;
1967 return(ret);
1968 }
1969
1970 /*
1971 * Macros for accessing the content. Those should be used only by the parser,
1972 * and not exported.
1973 *
1974 * Dirty macros, i.e. one often need to make assumption on the context to
1975 * use them
1976 *
1977 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1978 * To be used with extreme caution since operations consuming
1979 * characters may move the input buffer to a different location !
1980 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1981 * This should be used internally by the parser
1982 * only to compare to ASCII values otherwise it would break when
1983 * running with UTF-8 encoding.
1984 * RAW same as CUR but in the input buffer, bypass any token
1985 * extraction that may have been done
1986 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1987 * to compare on ASCII based substring.
1988 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1989 * strings without newlines within the parser.
1990 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1991 * defined char within the parser.
1992 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1993 *
1994 * NEXT Skip to the next character, this does the proper decoding
1995 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1996 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1997 * CUR_CHAR(l) returns the current unicode character (int), set l
1998 * to the number of xmlChars used for the encoding [0-5].
1999 * CUR_SCHAR same but operate on a string instead of the context
2000 * COPY_BUF copy the current unicode char to the target buffer, increment
2001 * the index
2002 * GROW, SHRINK handling of input buffers
2003 */
2004
2005 #define RAW (*ctxt->input->cur)
2006 #define CUR (*ctxt->input->cur)
2007 #define NXT(val) ctxt->input->cur[(val)]
2008 #define CUR_PTR ctxt->input->cur
2009
2010 #define CMP4( s, c1, c2, c3, c4 ) \
2011 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2012 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2013 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2014 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2015 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2016 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2017 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2018 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2019 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2020 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2021 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2022 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2023 ((unsigned char *) s)[ 8 ] == c9 )
2024 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2025 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2026 ((unsigned char *) s)[ 9 ] == c10 )
2027
2028 #define SKIP(val) do { \
2029 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2030 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2031 if ((*ctxt->input->cur == 0) && \
2032 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2033 xmlPopInput(ctxt); \
2034 } while (0)
2035
2036 #define SKIPL(val) do { \
2037 int skipl; \
2038 for(skipl=0; skipl<val; skipl++) { \
2039 if (*(ctxt->input->cur) == '\n') { \
2040 ctxt->input->line++; ctxt->input->col = 1; \
2041 } else ctxt->input->col++; \
2042 ctxt->nbChars++; \
2043 ctxt->input->cur++; \
2044 } \
2045 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2046 if ((*ctxt->input->cur == 0) && \
2047 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2048 xmlPopInput(ctxt); \
2049 } while (0)
2050
2051 #define SHRINK if ((ctxt->progressive == 0) && \
2052 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2053 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2054 xmlSHRINK (ctxt);
2055
xmlSHRINK(xmlParserCtxtPtr ctxt)2056 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2057 xmlParserInputShrink(ctxt->input);
2058 if ((*ctxt->input->cur == 0) &&
2059 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2060 xmlPopInput(ctxt);
2061 }
2062
2063 #define GROW if ((ctxt->progressive == 0) && \
2064 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2065 xmlGROW (ctxt);
2066
xmlGROW(xmlParserCtxtPtr ctxt)2067 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2068 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2069 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2070
2071 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2072 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2073 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2074 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2075 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2076 ctxt->instate = XML_PARSER_EOF;
2077 }
2078 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2079 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2080 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2081 xmlPopInput(ctxt);
2082 }
2083
2084 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2085
2086 #define NEXT xmlNextChar(ctxt)
2087
2088 #define NEXT1 { \
2089 ctxt->input->col++; \
2090 ctxt->input->cur++; \
2091 ctxt->nbChars++; \
2092 if (*ctxt->input->cur == 0) \
2093 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2094 }
2095
2096 #define NEXTL(l) do { \
2097 if (*(ctxt->input->cur) == '\n') { \
2098 ctxt->input->line++; ctxt->input->col = 1; \
2099 } else ctxt->input->col++; \
2100 ctxt->input->cur += l; \
2101 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2102 } while (0)
2103
2104 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2105 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2106
2107 #define COPY_BUF(l,b,i,v) \
2108 if (l == 1) b[i++] = (xmlChar) v; \
2109 else i += xmlCopyCharMultiByte(&b[i],v)
2110
2111 /**
2112 * xmlSkipBlankChars:
2113 * @ctxt: the XML parser context
2114 *
2115 * skip all blanks character found at that point in the input streams.
2116 * It pops up finished entities in the process if allowable at that point.
2117 *
2118 * Returns the number of space chars skipped
2119 */
2120
2121 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2122 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2123 int res = 0;
2124
2125 /*
2126 * It's Okay to use CUR/NEXT here since all the blanks are on
2127 * the ASCII range.
2128 */
2129 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2130 const xmlChar *cur;
2131 /*
2132 * if we are in the document content, go really fast
2133 */
2134 cur = ctxt->input->cur;
2135 while (IS_BLANK_CH(*cur)) {
2136 if (*cur == '\n') {
2137 ctxt->input->line++; ctxt->input->col = 1;
2138 } else {
2139 ctxt->input->col++;
2140 }
2141 cur++;
2142 res++;
2143 if (*cur == 0) {
2144 ctxt->input->cur = cur;
2145 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2146 cur = ctxt->input->cur;
2147 }
2148 }
2149 ctxt->input->cur = cur;
2150 } else {
2151 int cur;
2152 do {
2153 cur = CUR;
2154 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2155 NEXT;
2156 cur = CUR;
2157 res++;
2158 }
2159 while ((cur == 0) && (ctxt->inputNr > 1) &&
2160 (ctxt->instate != XML_PARSER_COMMENT)) {
2161 xmlPopInput(ctxt);
2162 cur = CUR;
2163 }
2164 /*
2165 * Need to handle support of entities branching here
2166 */
2167 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2168 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2169 }
2170 return(res);
2171 }
2172
2173 /************************************************************************
2174 * *
2175 * Commodity functions to handle entities *
2176 * *
2177 ************************************************************************/
2178
2179 /**
2180 * xmlPopInput:
2181 * @ctxt: an XML parser context
2182 *
2183 * xmlPopInput: the current input pointed by ctxt->input came to an end
2184 * pop it and return the next char.
2185 *
2186 * Returns the current xmlChar in the parser context
2187 */
2188 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2189 xmlPopInput(xmlParserCtxtPtr ctxt) {
2190 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2191 if (xmlParserDebugEntities)
2192 xmlGenericError(xmlGenericErrorContext,
2193 "Popping input %d\n", ctxt->inputNr);
2194 xmlFreeInputStream(inputPop(ctxt));
2195 if ((*ctxt->input->cur == 0) &&
2196 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2197 return(xmlPopInput(ctxt));
2198 return(CUR);
2199 }
2200
2201 /**
2202 * xmlPushInput:
2203 * @ctxt: an XML parser context
2204 * @input: an XML parser input fragment (entity, XML fragment ...).
2205 *
2206 * xmlPushInput: switch to a new input stream which is stacked on top
2207 * of the previous one(s).
2208 * Returns -1 in case of error or the index in the input stack
2209 */
2210 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2211 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2212 int ret;
2213 if (input == NULL) return(-1);
2214
2215 if (xmlParserDebugEntities) {
2216 if ((ctxt->input != NULL) && (ctxt->input->filename))
2217 xmlGenericError(xmlGenericErrorContext,
2218 "%s(%d): ", ctxt->input->filename,
2219 ctxt->input->line);
2220 xmlGenericError(xmlGenericErrorContext,
2221 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2222 }
2223 ret = inputPush(ctxt, input);
2224 if (ctxt->instate == XML_PARSER_EOF)
2225 return(-1);
2226 GROW;
2227 return(ret);
2228 }
2229
2230 /**
2231 * xmlParseCharRef:
2232 * @ctxt: an XML parser context
2233 *
2234 * parse Reference declarations
2235 *
2236 * [66] CharRef ::= '&#' [0-9]+ ';' |
2237 * '&#x' [0-9a-fA-F]+ ';'
2238 *
2239 * [ WFC: Legal Character ]
2240 * Characters referred to using character references must match the
2241 * production for Char.
2242 *
2243 * Returns the value parsed (as an int), 0 in case of error
2244 */
2245 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2246 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2247 unsigned int val = 0;
2248 int count = 0;
2249 unsigned int outofrange = 0;
2250
2251 /*
2252 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2253 */
2254 if ((RAW == '&') && (NXT(1) == '#') &&
2255 (NXT(2) == 'x')) {
2256 SKIP(3);
2257 GROW;
2258 while (RAW != ';') { /* loop blocked by count */
2259 if (count++ > 20) {
2260 count = 0;
2261 GROW;
2262 if (ctxt->instate == XML_PARSER_EOF)
2263 return(0);
2264 }
2265 if ((RAW >= '0') && (RAW <= '9'))
2266 val = val * 16 + (CUR - '0');
2267 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2268 val = val * 16 + (CUR - 'a') + 10;
2269 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2270 val = val * 16 + (CUR - 'A') + 10;
2271 else {
2272 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2273 val = 0;
2274 break;
2275 }
2276 if (val > 0x10FFFF)
2277 outofrange = val;
2278
2279 NEXT;
2280 count++;
2281 }
2282 if (RAW == ';') {
2283 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2284 ctxt->input->col++;
2285 ctxt->nbChars ++;
2286 ctxt->input->cur++;
2287 }
2288 } else if ((RAW == '&') && (NXT(1) == '#')) {
2289 SKIP(2);
2290 GROW;
2291 while (RAW != ';') { /* loop blocked by count */
2292 if (count++ > 20) {
2293 count = 0;
2294 GROW;
2295 if (ctxt->instate == XML_PARSER_EOF)
2296 return(0);
2297 }
2298 if ((RAW >= '0') && (RAW <= '9'))
2299 val = val * 10 + (CUR - '0');
2300 else {
2301 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2302 val = 0;
2303 break;
2304 }
2305 if (val > 0x10FFFF)
2306 outofrange = val;
2307
2308 NEXT;
2309 count++;
2310 }
2311 if (RAW == ';') {
2312 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2313 ctxt->input->col++;
2314 ctxt->nbChars ++;
2315 ctxt->input->cur++;
2316 }
2317 } else {
2318 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2319 }
2320
2321 /*
2322 * [ WFC: Legal Character ]
2323 * Characters referred to using character references must match the
2324 * production for Char.
2325 */
2326 if ((IS_CHAR(val) && (outofrange == 0))) {
2327 return(val);
2328 } else {
2329 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2330 "xmlParseCharRef: invalid xmlChar value %d\n",
2331 val);
2332 }
2333 return(0);
2334 }
2335
2336 /**
2337 * xmlParseStringCharRef:
2338 * @ctxt: an XML parser context
2339 * @str: a pointer to an index in the string
2340 *
2341 * parse Reference declarations, variant parsing from a string rather
2342 * than an an input flow.
2343 *
2344 * [66] CharRef ::= '&#' [0-9]+ ';' |
2345 * '&#x' [0-9a-fA-F]+ ';'
2346 *
2347 * [ WFC: Legal Character ]
2348 * Characters referred to using character references must match the
2349 * production for Char.
2350 *
2351 * Returns the value parsed (as an int), 0 in case of error, str will be
2352 * updated to the current value of the index
2353 */
2354 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2355 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2356 const xmlChar *ptr;
2357 xmlChar cur;
2358 unsigned int val = 0;
2359 unsigned int outofrange = 0;
2360
2361 if ((str == NULL) || (*str == NULL)) return(0);
2362 ptr = *str;
2363 cur = *ptr;
2364 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2365 ptr += 3;
2366 cur = *ptr;
2367 while (cur != ';') { /* Non input consuming loop */
2368 if ((cur >= '0') && (cur <= '9'))
2369 val = val * 16 + (cur - '0');
2370 else if ((cur >= 'a') && (cur <= 'f'))
2371 val = val * 16 + (cur - 'a') + 10;
2372 else if ((cur >= 'A') && (cur <= 'F'))
2373 val = val * 16 + (cur - 'A') + 10;
2374 else {
2375 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2376 val = 0;
2377 break;
2378 }
2379 if (val > 0x10FFFF)
2380 outofrange = val;
2381
2382 ptr++;
2383 cur = *ptr;
2384 }
2385 if (cur == ';')
2386 ptr++;
2387 } else if ((cur == '&') && (ptr[1] == '#')){
2388 ptr += 2;
2389 cur = *ptr;
2390 while (cur != ';') { /* Non input consuming loops */
2391 if ((cur >= '0') && (cur <= '9'))
2392 val = val * 10 + (cur - '0');
2393 else {
2394 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2395 val = 0;
2396 break;
2397 }
2398 if (val > 0x10FFFF)
2399 outofrange = val;
2400
2401 ptr++;
2402 cur = *ptr;
2403 }
2404 if (cur == ';')
2405 ptr++;
2406 } else {
2407 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2408 return(0);
2409 }
2410 *str = ptr;
2411
2412 /*
2413 * [ WFC: Legal Character ]
2414 * Characters referred to using character references must match the
2415 * production for Char.
2416 */
2417 if ((IS_CHAR(val) && (outofrange == 0))) {
2418 return(val);
2419 } else {
2420 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2421 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2422 val);
2423 }
2424 return(0);
2425 }
2426
2427 /**
2428 * xmlNewBlanksWrapperInputStream:
2429 * @ctxt: an XML parser context
2430 * @entity: an Entity pointer
2431 *
2432 * Create a new input stream for wrapping
2433 * blanks around a PEReference
2434 *
2435 * Returns the new input stream or NULL
2436 */
2437
deallocblankswrapper(xmlChar * str)2438 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2439
2440 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2441 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2442 xmlParserInputPtr input;
2443 xmlChar *buffer;
2444 size_t length;
2445 if (entity == NULL) {
2446 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2447 "xmlNewBlanksWrapperInputStream entity\n");
2448 return(NULL);
2449 }
2450 if (xmlParserDebugEntities)
2451 xmlGenericError(xmlGenericErrorContext,
2452 "new blanks wrapper for entity: %s\n", entity->name);
2453 input = xmlNewInputStream(ctxt);
2454 if (input == NULL) {
2455 return(NULL);
2456 }
2457 length = xmlStrlen(entity->name) + 5;
2458 buffer = xmlMallocAtomic(length);
2459 if (buffer == NULL) {
2460 xmlErrMemory(ctxt, NULL);
2461 xmlFree(input);
2462 return(NULL);
2463 }
2464 buffer [0] = ' ';
2465 buffer [1] = '%';
2466 buffer [length-3] = ';';
2467 buffer [length-2] = ' ';
2468 buffer [length-1] = 0;
2469 memcpy(buffer + 2, entity->name, length - 5);
2470 input->free = deallocblankswrapper;
2471 input->base = buffer;
2472 input->cur = buffer;
2473 input->length = length;
2474 input->end = &buffer[length];
2475 return(input);
2476 }
2477
2478 /**
2479 * xmlParserHandlePEReference:
2480 * @ctxt: the parser context
2481 *
2482 * [69] PEReference ::= '%' Name ';'
2483 *
2484 * [ WFC: No Recursion ]
2485 * A parsed entity must not contain a recursive
2486 * reference to itself, either directly or indirectly.
2487 *
2488 * [ WFC: Entity Declared ]
2489 * In a document without any DTD, a document with only an internal DTD
2490 * subset which contains no parameter entity references, or a document
2491 * with "standalone='yes'", ... ... The declaration of a parameter
2492 * entity must precede any reference to it...
2493 *
2494 * [ VC: Entity Declared ]
2495 * In a document with an external subset or external parameter entities
2496 * with "standalone='no'", ... ... The declaration of a parameter entity
2497 * must precede any reference to it...
2498 *
2499 * [ WFC: In DTD ]
2500 * Parameter-entity references may only appear in the DTD.
2501 * NOTE: misleading but this is handled.
2502 *
2503 * A PEReference may have been detected in the current input stream
2504 * the handling is done accordingly to
2505 * http://www.w3.org/TR/REC-xml#entproc
2506 * i.e.
2507 * - Included in literal in entity values
2508 * - Included as Parameter Entity reference within DTDs
2509 */
2510 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2511 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2512 const xmlChar *name;
2513 xmlEntityPtr entity = NULL;
2514 xmlParserInputPtr input;
2515
2516 if (RAW != '%') return;
2517 switch(ctxt->instate) {
2518 case XML_PARSER_CDATA_SECTION:
2519 return;
2520 case XML_PARSER_COMMENT:
2521 return;
2522 case XML_PARSER_START_TAG:
2523 return;
2524 case XML_PARSER_END_TAG:
2525 return;
2526 case XML_PARSER_EOF:
2527 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2528 return;
2529 case XML_PARSER_PROLOG:
2530 case XML_PARSER_START:
2531 case XML_PARSER_MISC:
2532 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2533 return;
2534 case XML_PARSER_ENTITY_DECL:
2535 case XML_PARSER_CONTENT:
2536 case XML_PARSER_ATTRIBUTE_VALUE:
2537 case XML_PARSER_PI:
2538 case XML_PARSER_SYSTEM_LITERAL:
2539 case XML_PARSER_PUBLIC_LITERAL:
2540 /* we just ignore it there */
2541 return;
2542 case XML_PARSER_EPILOG:
2543 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2544 return;
2545 case XML_PARSER_ENTITY_VALUE:
2546 /*
2547 * NOTE: in the case of entity values, we don't do the
2548 * substitution here since we need the literal
2549 * entity value to be able to save the internal
2550 * subset of the document.
2551 * This will be handled by xmlStringDecodeEntities
2552 */
2553 return;
2554 case XML_PARSER_DTD:
2555 /*
2556 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2557 * In the internal DTD subset, parameter-entity references
2558 * can occur only where markup declarations can occur, not
2559 * within markup declarations.
2560 * In that case this is handled in xmlParseMarkupDecl
2561 */
2562 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2563 return;
2564 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2565 return;
2566 break;
2567 case XML_PARSER_IGNORE:
2568 return;
2569 }
2570
2571 NEXT;
2572 name = xmlParseName(ctxt);
2573 if (xmlParserDebugEntities)
2574 xmlGenericError(xmlGenericErrorContext,
2575 "PEReference: %s\n", name);
2576 if (name == NULL) {
2577 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2578 } else {
2579 if (RAW == ';') {
2580 NEXT;
2581 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2582 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2583 if (ctxt->instate == XML_PARSER_EOF)
2584 return;
2585 if (entity == NULL) {
2586
2587 /*
2588 * [ WFC: Entity Declared ]
2589 * In a document without any DTD, a document with only an
2590 * internal DTD subset which contains no parameter entity
2591 * references, or a document with "standalone='yes'", ...
2592 * ... The declaration of a parameter entity must precede
2593 * any reference to it...
2594 */
2595 if ((ctxt->standalone == 1) ||
2596 ((ctxt->hasExternalSubset == 0) &&
2597 (ctxt->hasPErefs == 0))) {
2598 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2599 "PEReference: %%%s; not found\n", name);
2600 } else {
2601 /*
2602 * [ VC: Entity Declared ]
2603 * In a document with an external subset or external
2604 * parameter entities with "standalone='no'", ...
2605 * ... The declaration of a parameter entity must precede
2606 * any reference to it...
2607 */
2608 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2609 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2610 "PEReference: %%%s; not found\n",
2611 name, NULL);
2612 } else
2613 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2614 "PEReference: %%%s; not found\n",
2615 name, NULL);
2616 ctxt->valid = 0;
2617 }
2618 xmlParserEntityCheck(ctxt, 0, NULL, 0);
2619 } else if (ctxt->input->free != deallocblankswrapper) {
2620 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2621 if (xmlPushInput(ctxt, input) < 0)
2622 return;
2623 } else {
2624 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2625 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2626 xmlChar start[4];
2627 xmlCharEncoding enc;
2628
2629 /*
2630 * Note: external parameter entities will not be loaded, it
2631 * is not required for a non-validating parser, unless the
2632 * option of validating, or substituting entities were
2633 * given. Doing so is far more secure as the parser will
2634 * only process data coming from the document entity by
2635 * default.
2636 */
2637 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2638 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2639 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2640 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2641 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2642 (ctxt->replaceEntities == 0) &&
2643 (ctxt->validate == 0))
2644 return;
2645
2646 /*
2647 * handle the extra spaces added before and after
2648 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2649 * this is done independently.
2650 */
2651 input = xmlNewEntityInputStream(ctxt, entity);
2652 if (xmlPushInput(ctxt, input) < 0)
2653 return;
2654
2655 /*
2656 * Get the 4 first bytes and decode the charset
2657 * if enc != XML_CHAR_ENCODING_NONE
2658 * plug some encoding conversion routines.
2659 * Note that, since we may have some non-UTF8
2660 * encoding (like UTF16, bug 135229), the 'length'
2661 * is not known, but we can calculate based upon
2662 * the amount of data in the buffer.
2663 */
2664 GROW
2665 if (ctxt->instate == XML_PARSER_EOF)
2666 return;
2667 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2668 start[0] = RAW;
2669 start[1] = NXT(1);
2670 start[2] = NXT(2);
2671 start[3] = NXT(3);
2672 enc = xmlDetectCharEncoding(start, 4);
2673 if (enc != XML_CHAR_ENCODING_NONE) {
2674 xmlSwitchEncoding(ctxt, enc);
2675 }
2676 }
2677
2678 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2679 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2680 (IS_BLANK_CH(NXT(5)))) {
2681 xmlParseTextDecl(ctxt);
2682 }
2683 } else {
2684 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2685 "PEReference: %s is not a parameter entity\n",
2686 name);
2687 }
2688 }
2689 } else {
2690 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2691 }
2692 }
2693 }
2694
2695 /*
2696 * Macro used to grow the current buffer.
2697 * buffer##_size is expected to be a size_t
2698 * mem_error: is expected to handle memory allocation failures
2699 */
2700 #define growBuffer(buffer, n) { \
2701 xmlChar *tmp; \
2702 size_t new_size = buffer##_size * 2 + n; \
2703 if (new_size < buffer##_size) goto mem_error; \
2704 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2705 if (tmp == NULL) goto mem_error; \
2706 buffer = tmp; \
2707 buffer##_size = new_size; \
2708 }
2709
2710 /**
2711 * xmlStringLenDecodeEntities:
2712 * @ctxt: the parser context
2713 * @str: the input string
2714 * @len: the string length
2715 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2716 * @end: an end marker xmlChar, 0 if none
2717 * @end2: an end marker xmlChar, 0 if none
2718 * @end3: an end marker xmlChar, 0 if none
2719 *
2720 * Takes a entity string content and process to do the adequate substitutions.
2721 *
2722 * [67] Reference ::= EntityRef | CharRef
2723 *
2724 * [69] PEReference ::= '%' Name ';'
2725 *
2726 * Returns A newly allocated string with the substitution done. The caller
2727 * must deallocate it !
2728 */
2729 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2730 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2731 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2732 xmlChar *buffer = NULL;
2733 size_t buffer_size = 0;
2734 size_t nbchars = 0;
2735
2736 xmlChar *current = NULL;
2737 xmlChar *rep = NULL;
2738 const xmlChar *last;
2739 xmlEntityPtr ent;
2740 int c,l;
2741
2742 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2743 return(NULL);
2744 last = str + len;
2745
2746 if (((ctxt->depth > 40) &&
2747 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2748 (ctxt->depth > 1024)) {
2749 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2750 return(NULL);
2751 }
2752
2753 /*
2754 * allocate a translation buffer.
2755 */
2756 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2757 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2758 if (buffer == NULL) goto mem_error;
2759
2760 /*
2761 * OK loop until we reach one of the ending char or a size limit.
2762 * we are operating on already parsed values.
2763 */
2764 if (str < last)
2765 c = CUR_SCHAR(str, l);
2766 else
2767 c = 0;
2768 while ((c != 0) && (c != end) && /* non input consuming loop */
2769 (c != end2) && (c != end3)) {
2770
2771 if (c == 0) break;
2772 if ((c == '&') && (str[1] == '#')) {
2773 int val = xmlParseStringCharRef(ctxt, &str);
2774 if (val != 0) {
2775 COPY_BUF(0,buffer,nbchars,val);
2776 }
2777 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2778 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2779 }
2780 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2781 if (xmlParserDebugEntities)
2782 xmlGenericError(xmlGenericErrorContext,
2783 "String decoding Entity Reference: %.30s\n",
2784 str);
2785 ent = xmlParseStringEntityRef(ctxt, &str);
2786 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2787 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2788 goto int_error;
2789 xmlParserEntityCheck(ctxt, 0, ent, 0);
2790 if (ent != NULL)
2791 ctxt->nbentities += ent->checked / 2;
2792 if ((ent != NULL) &&
2793 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2794 if (ent->content != NULL) {
2795 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798 }
2799 } else {
2800 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2801 "predefined entity has no content\n");
2802 }
2803 } else if ((ent != NULL) && (ent->content != NULL)) {
2804 ctxt->depth++;
2805 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2806 0, 0, 0);
2807 ctxt->depth--;
2808
2809 if (rep != NULL) {
2810 current = rep;
2811 while (*current != 0) { /* non input consuming loop */
2812 buffer[nbchars++] = *current++;
2813 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2814 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2815 goto int_error;
2816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2817 }
2818 }
2819 xmlFree(rep);
2820 rep = NULL;
2821 }
2822 } else if (ent != NULL) {
2823 int i = xmlStrlen(ent->name);
2824 const xmlChar *cur = ent->name;
2825
2826 buffer[nbchars++] = '&';
2827 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2828 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2829 }
2830 for (;i > 0;i--)
2831 buffer[nbchars++] = *cur++;
2832 buffer[nbchars++] = ';';
2833 }
2834 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2835 if (xmlParserDebugEntities)
2836 xmlGenericError(xmlGenericErrorContext,
2837 "String decoding PE Reference: %.30s\n", str);
2838 ent = xmlParseStringPEReference(ctxt, &str);
2839 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2840 goto int_error;
2841 xmlParserEntityCheck(ctxt, 0, ent, 0);
2842 if (ent != NULL)
2843 ctxt->nbentities += ent->checked / 2;
2844 if (ent != NULL) {
2845 if (ent->content == NULL) {
2846 xmlLoadEntityContent(ctxt, ent);
2847 }
2848 ctxt->depth++;
2849 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2850 0, 0, 0);
2851 ctxt->depth--;
2852 if (rep != NULL) {
2853 current = rep;
2854 while (*current != 0) { /* non input consuming loop */
2855 buffer[nbchars++] = *current++;
2856 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2857 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2858 goto int_error;
2859 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2860 }
2861 }
2862 xmlFree(rep);
2863 rep = NULL;
2864 }
2865 }
2866 } else {
2867 COPY_BUF(l,buffer,nbchars,c);
2868 str += l;
2869 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2870 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2871 }
2872 }
2873 if (str < last)
2874 c = CUR_SCHAR(str, l);
2875 else
2876 c = 0;
2877 }
2878 buffer[nbchars] = 0;
2879 return(buffer);
2880
2881 mem_error:
2882 xmlErrMemory(ctxt, NULL);
2883 int_error:
2884 if (rep != NULL)
2885 xmlFree(rep);
2886 if (buffer != NULL)
2887 xmlFree(buffer);
2888 return(NULL);
2889 }
2890
2891 /**
2892 * xmlStringDecodeEntities:
2893 * @ctxt: the parser context
2894 * @str: the input string
2895 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2896 * @end: an end marker xmlChar, 0 if none
2897 * @end2: an end marker xmlChar, 0 if none
2898 * @end3: an end marker xmlChar, 0 if none
2899 *
2900 * Takes a entity string content and process to do the adequate substitutions.
2901 *
2902 * [67] Reference ::= EntityRef | CharRef
2903 *
2904 * [69] PEReference ::= '%' Name ';'
2905 *
2906 * Returns A newly allocated string with the substitution done. The caller
2907 * must deallocate it !
2908 */
2909 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2910 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2911 xmlChar end, xmlChar end2, xmlChar end3) {
2912 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2913 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2914 end, end2, end3));
2915 }
2916
2917 /************************************************************************
2918 * *
2919 * Commodity functions, cleanup needed ? *
2920 * *
2921 ************************************************************************/
2922
2923 /**
2924 * areBlanks:
2925 * @ctxt: an XML parser context
2926 * @str: a xmlChar *
2927 * @len: the size of @str
2928 * @blank_chars: we know the chars are blanks
2929 *
2930 * Is this a sequence of blank chars that one can ignore ?
2931 *
2932 * Returns 1 if ignorable 0 otherwise.
2933 */
2934
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2935 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2936 int blank_chars) {
2937 int i, ret;
2938 xmlNodePtr lastChild;
2939
2940 /*
2941 * Don't spend time trying to differentiate them, the same callback is
2942 * used !
2943 */
2944 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2945 return(0);
2946
2947 /*
2948 * Check for xml:space value.
2949 */
2950 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2951 (*(ctxt->space) == -2))
2952 return(0);
2953
2954 /*
2955 * Check that the string is made of blanks
2956 */
2957 if (blank_chars == 0) {
2958 for (i = 0;i < len;i++)
2959 if (!(IS_BLANK_CH(str[i]))) return(0);
2960 }
2961
2962 /*
2963 * Look if the element is mixed content in the DTD if available
2964 */
2965 if (ctxt->node == NULL) return(0);
2966 if (ctxt->myDoc != NULL) {
2967 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2968 if (ret == 0) return(1);
2969 if (ret == 1) return(0);
2970 }
2971
2972 /*
2973 * Otherwise, heuristic :-\
2974 */
2975 if ((RAW != '<') && (RAW != 0xD)) return(0);
2976 if ((ctxt->node->children == NULL) &&
2977 (RAW == '<') && (NXT(1) == '/')) return(0);
2978
2979 lastChild = xmlGetLastChild(ctxt->node);
2980 if (lastChild == NULL) {
2981 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2982 (ctxt->node->content != NULL)) return(0);
2983 } else if (xmlNodeIsText(lastChild))
2984 return(0);
2985 else if ((ctxt->node->children != NULL) &&
2986 (xmlNodeIsText(ctxt->node->children)))
2987 return(0);
2988 return(1);
2989 }
2990
2991 /************************************************************************
2992 * *
2993 * Extra stuff for namespace support *
2994 * Relates to http://www.w3.org/TR/WD-xml-names *
2995 * *
2996 ************************************************************************/
2997
2998 /**
2999 * xmlSplitQName:
3000 * @ctxt: an XML parser context
3001 * @name: an XML parser context
3002 * @prefix: a xmlChar **
3003 *
3004 * parse an UTF8 encoded XML qualified name string
3005 *
3006 * [NS 5] QName ::= (Prefix ':')? LocalPart
3007 *
3008 * [NS 6] Prefix ::= NCName
3009 *
3010 * [NS 7] LocalPart ::= NCName
3011 *
3012 * Returns the local part, and prefix is updated
3013 * to get the Prefix if any.
3014 */
3015
3016 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)3017 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3018 xmlChar buf[XML_MAX_NAMELEN + 5];
3019 xmlChar *buffer = NULL;
3020 int len = 0;
3021 int max = XML_MAX_NAMELEN;
3022 xmlChar *ret = NULL;
3023 const xmlChar *cur = name;
3024 int c;
3025
3026 if (prefix == NULL) return(NULL);
3027 *prefix = NULL;
3028
3029 if (cur == NULL) return(NULL);
3030
3031 #ifndef XML_XML_NAMESPACE
3032 /* xml: prefix is not really a namespace */
3033 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3034 (cur[2] == 'l') && (cur[3] == ':'))
3035 return(xmlStrdup(name));
3036 #endif
3037
3038 /* nasty but well=formed */
3039 if (cur[0] == ':')
3040 return(xmlStrdup(name));
3041
3042 c = *cur++;
3043 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3044 buf[len++] = c;
3045 c = *cur++;
3046 }
3047 if (len >= max) {
3048 /*
3049 * Okay someone managed to make a huge name, so he's ready to pay
3050 * for the processing speed.
3051 */
3052 max = len * 2;
3053
3054 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3055 if (buffer == NULL) {
3056 xmlErrMemory(ctxt, NULL);
3057 return(NULL);
3058 }
3059 memcpy(buffer, buf, len);
3060 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3061 if (len + 10 > max) {
3062 xmlChar *tmp;
3063
3064 max *= 2;
3065 tmp = (xmlChar *) xmlRealloc(buffer,
3066 max * sizeof(xmlChar));
3067 if (tmp == NULL) {
3068 xmlFree(buffer);
3069 xmlErrMemory(ctxt, NULL);
3070 return(NULL);
3071 }
3072 buffer = tmp;
3073 }
3074 buffer[len++] = c;
3075 c = *cur++;
3076 }
3077 buffer[len] = 0;
3078 }
3079
3080 if ((c == ':') && (*cur == 0)) {
3081 if (buffer != NULL)
3082 xmlFree(buffer);
3083 *prefix = NULL;
3084 return(xmlStrdup(name));
3085 }
3086
3087 if (buffer == NULL)
3088 ret = xmlStrndup(buf, len);
3089 else {
3090 ret = buffer;
3091 buffer = NULL;
3092 max = XML_MAX_NAMELEN;
3093 }
3094
3095
3096 if (c == ':') {
3097 c = *cur;
3098 *prefix = ret;
3099 if (c == 0) {
3100 return(xmlStrndup(BAD_CAST "", 0));
3101 }
3102 len = 0;
3103
3104 /*
3105 * Check that the first character is proper to start
3106 * a new name
3107 */
3108 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3109 ((c >= 0x41) && (c <= 0x5A)) ||
3110 (c == '_') || (c == ':'))) {
3111 int l;
3112 int first = CUR_SCHAR(cur, l);
3113
3114 if (!IS_LETTER(first) && (first != '_')) {
3115 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3116 "Name %s is not XML Namespace compliant\n",
3117 name);
3118 }
3119 }
3120 cur++;
3121
3122 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3123 buf[len++] = c;
3124 c = *cur++;
3125 }
3126 if (len >= max) {
3127 /*
3128 * Okay someone managed to make a huge name, so he's ready to pay
3129 * for the processing speed.
3130 */
3131 max = len * 2;
3132
3133 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3134 if (buffer == NULL) {
3135 xmlErrMemory(ctxt, NULL);
3136 return(NULL);
3137 }
3138 memcpy(buffer, buf, len);
3139 while (c != 0) { /* tested bigname2.xml */
3140 if (len + 10 > max) {
3141 xmlChar *tmp;
3142
3143 max *= 2;
3144 tmp = (xmlChar *) xmlRealloc(buffer,
3145 max * sizeof(xmlChar));
3146 if (tmp == NULL) {
3147 xmlErrMemory(ctxt, NULL);
3148 xmlFree(buffer);
3149 return(NULL);
3150 }
3151 buffer = tmp;
3152 }
3153 buffer[len++] = c;
3154 c = *cur++;
3155 }
3156 buffer[len] = 0;
3157 }
3158
3159 if (buffer == NULL)
3160 ret = xmlStrndup(buf, len);
3161 else {
3162 ret = buffer;
3163 }
3164 }
3165
3166 return(ret);
3167 }
3168
3169 /************************************************************************
3170 * *
3171 * The parser itself *
3172 * Relates to http://www.w3.org/TR/REC-xml *
3173 * *
3174 ************************************************************************/
3175
3176 /************************************************************************
3177 * *
3178 * Routines to parse Name, NCName and NmToken *
3179 * *
3180 ************************************************************************/
3181 #ifdef DEBUG
3182 static unsigned long nbParseName = 0;
3183 static unsigned long nbParseNmToken = 0;
3184 static unsigned long nbParseNCName = 0;
3185 static unsigned long nbParseNCNameComplex = 0;
3186 static unsigned long nbParseNameComplex = 0;
3187 static unsigned long nbParseStringName = 0;
3188 #endif
3189
3190 /*
3191 * The two following functions are related to the change of accepted
3192 * characters for Name and NmToken in the Revision 5 of XML-1.0
3193 * They correspond to the modified production [4] and the new production [4a]
3194 * changes in that revision. Also note that the macros used for the
3195 * productions Letter, Digit, CombiningChar and Extender are not needed
3196 * anymore.
3197 * We still keep compatibility to pre-revision5 parsing semantic if the
3198 * new XML_PARSE_OLD10 option is given to the parser.
3199 */
3200 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3201 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3202 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3203 /*
3204 * Use the new checks of production [4] [4a] amd [5] of the
3205 * Update 5 of XML-1.0
3206 */
3207 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 (((c >= 'a') && (c <= 'z')) ||
3209 ((c >= 'A') && (c <= 'Z')) ||
3210 (c == '_') || (c == ':') ||
3211 ((c >= 0xC0) && (c <= 0xD6)) ||
3212 ((c >= 0xD8) && (c <= 0xF6)) ||
3213 ((c >= 0xF8) && (c <= 0x2FF)) ||
3214 ((c >= 0x370) && (c <= 0x37D)) ||
3215 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3216 ((c >= 0x200C) && (c <= 0x200D)) ||
3217 ((c >= 0x2070) && (c <= 0x218F)) ||
3218 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3219 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3220 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3221 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3222 ((c >= 0x10000) && (c <= 0xEFFFF))))
3223 return(1);
3224 } else {
3225 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3226 return(1);
3227 }
3228 return(0);
3229 }
3230
3231 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3232 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3233 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3234 /*
3235 * Use the new checks of production [4] [4a] amd [5] of the
3236 * Update 5 of XML-1.0
3237 */
3238 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3239 (((c >= 'a') && (c <= 'z')) ||
3240 ((c >= 'A') && (c <= 'Z')) ||
3241 ((c >= '0') && (c <= '9')) || /* !start */
3242 (c == '_') || (c == ':') ||
3243 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3244 ((c >= 0xC0) && (c <= 0xD6)) ||
3245 ((c >= 0xD8) && (c <= 0xF6)) ||
3246 ((c >= 0xF8) && (c <= 0x2FF)) ||
3247 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3248 ((c >= 0x370) && (c <= 0x37D)) ||
3249 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3250 ((c >= 0x200C) && (c <= 0x200D)) ||
3251 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3252 ((c >= 0x2070) && (c <= 0x218F)) ||
3253 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3254 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3255 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3256 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3257 ((c >= 0x10000) && (c <= 0xEFFFF))))
3258 return(1);
3259 } else {
3260 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3261 (c == '.') || (c == '-') ||
3262 (c == '_') || (c == ':') ||
3263 (IS_COMBINING(c)) ||
3264 (IS_EXTENDER(c)))
3265 return(1);
3266 }
3267 return(0);
3268 }
3269
3270 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3271 int *len, int *alloc, int normalize);
3272
3273 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3274 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3275 int len = 0, l;
3276 int c;
3277 int count = 0;
3278
3279 #ifdef DEBUG
3280 nbParseNameComplex++;
3281 #endif
3282
3283 /*
3284 * Handler for more complex cases
3285 */
3286 GROW;
3287 if (ctxt->instate == XML_PARSER_EOF)
3288 return(NULL);
3289 c = CUR_CHAR(l);
3290 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3291 /*
3292 * Use the new checks of production [4] [4a] amd [5] of the
3293 * Update 5 of XML-1.0
3294 */
3295 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296 (!(((c >= 'a') && (c <= 'z')) ||
3297 ((c >= 'A') && (c <= 'Z')) ||
3298 (c == '_') || (c == ':') ||
3299 ((c >= 0xC0) && (c <= 0xD6)) ||
3300 ((c >= 0xD8) && (c <= 0xF6)) ||
3301 ((c >= 0xF8) && (c <= 0x2FF)) ||
3302 ((c >= 0x370) && (c <= 0x37D)) ||
3303 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3304 ((c >= 0x200C) && (c <= 0x200D)) ||
3305 ((c >= 0x2070) && (c <= 0x218F)) ||
3306 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3307 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3308 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3309 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3310 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3311 return(NULL);
3312 }
3313 len += l;
3314 NEXTL(l);
3315 c = CUR_CHAR(l);
3316 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3317 (((c >= 'a') && (c <= 'z')) ||
3318 ((c >= 'A') && (c <= 'Z')) ||
3319 ((c >= '0') && (c <= '9')) || /* !start */
3320 (c == '_') || (c == ':') ||
3321 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3322 ((c >= 0xC0) && (c <= 0xD6)) ||
3323 ((c >= 0xD8) && (c <= 0xF6)) ||
3324 ((c >= 0xF8) && (c <= 0x2FF)) ||
3325 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3326 ((c >= 0x370) && (c <= 0x37D)) ||
3327 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3328 ((c >= 0x200C) && (c <= 0x200D)) ||
3329 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3330 ((c >= 0x2070) && (c <= 0x218F)) ||
3331 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3332 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3333 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3334 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3335 ((c >= 0x10000) && (c <= 0xEFFFF))
3336 )) {
3337 if (count++ > XML_PARSER_CHUNK_SIZE) {
3338 count = 0;
3339 GROW;
3340 if (ctxt->instate == XML_PARSER_EOF)
3341 return(NULL);
3342 }
3343 len += l;
3344 NEXTL(l);
3345 c = CUR_CHAR(l);
3346 }
3347 } else {
3348 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349 (!IS_LETTER(c) && (c != '_') &&
3350 (c != ':'))) {
3351 return(NULL);
3352 }
3353 len += l;
3354 NEXTL(l);
3355 c = CUR_CHAR(l);
3356
3357 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3358 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3359 (c == '.') || (c == '-') ||
3360 (c == '_') || (c == ':') ||
3361 (IS_COMBINING(c)) ||
3362 (IS_EXTENDER(c)))) {
3363 if (count++ > XML_PARSER_CHUNK_SIZE) {
3364 count = 0;
3365 GROW;
3366 if (ctxt->instate == XML_PARSER_EOF)
3367 return(NULL);
3368 }
3369 len += l;
3370 NEXTL(l);
3371 c = CUR_CHAR(l);
3372 if (c == 0) {
3373 count = 0;
3374 GROW;
3375 if (ctxt->instate == XML_PARSER_EOF)
3376 return(NULL);
3377 c = CUR_CHAR(l);
3378 }
3379 }
3380 }
3381 if ((len > XML_MAX_NAME_LENGTH) &&
3382 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3383 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384 return(NULL);
3385 }
3386 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3387 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3388 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3389 }
3390
3391 /**
3392 * xmlParseName:
3393 * @ctxt: an XML parser context
3394 *
3395 * parse an XML name.
3396 *
3397 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3398 * CombiningChar | Extender
3399 *
3400 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3401 *
3402 * [6] Names ::= Name (#x20 Name)*
3403 *
3404 * Returns the Name parsed or NULL
3405 */
3406
3407 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3408 xmlParseName(xmlParserCtxtPtr ctxt) {
3409 const xmlChar *in;
3410 const xmlChar *ret;
3411 int count = 0;
3412
3413 GROW;
3414
3415 #ifdef DEBUG
3416 nbParseName++;
3417 #endif
3418
3419 /*
3420 * Accelerator for simple ASCII names
3421 */
3422 in = ctxt->input->cur;
3423 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3424 ((*in >= 0x41) && (*in <= 0x5A)) ||
3425 (*in == '_') || (*in == ':')) {
3426 in++;
3427 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3428 ((*in >= 0x41) && (*in <= 0x5A)) ||
3429 ((*in >= 0x30) && (*in <= 0x39)) ||
3430 (*in == '_') || (*in == '-') ||
3431 (*in == ':') || (*in == '.'))
3432 in++;
3433 if ((*in > 0) && (*in < 0x80)) {
3434 count = in - ctxt->input->cur;
3435 if ((count > XML_MAX_NAME_LENGTH) &&
3436 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3437 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3438 return(NULL);
3439 }
3440 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3441 ctxt->input->cur = in;
3442 ctxt->nbChars += count;
3443 ctxt->input->col += count;
3444 if (ret == NULL)
3445 xmlErrMemory(ctxt, NULL);
3446 return(ret);
3447 }
3448 }
3449 /* accelerator for special cases */
3450 return(xmlParseNameComplex(ctxt));
3451 }
3452
3453 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3454 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3455 int len = 0, l;
3456 int c;
3457 int count = 0;
3458 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
3459
3460 #ifdef DEBUG
3461 nbParseNCNameComplex++;
3462 #endif
3463
3464 /*
3465 * Handler for more complex cases
3466 */
3467 GROW;
3468 end = ctxt->input->cur;
3469 c = CUR_CHAR(l);
3470 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3471 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3472 return(NULL);
3473 }
3474
3475 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3476 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3477 if (count++ > XML_PARSER_CHUNK_SIZE) {
3478 if ((len > XML_MAX_NAME_LENGTH) &&
3479 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3480 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3481 return(NULL);
3482 }
3483 count = 0;
3484 GROW;
3485 if (ctxt->instate == XML_PARSER_EOF)
3486 return(NULL);
3487 }
3488 len += l;
3489 NEXTL(l);
3490 end = ctxt->input->cur;
3491 c = CUR_CHAR(l);
3492 if (c == 0) {
3493 count = 0;
3494 GROW;
3495 if (ctxt->instate == XML_PARSER_EOF)
3496 return(NULL);
3497 end = ctxt->input->cur;
3498 c = CUR_CHAR(l);
3499 }
3500 }
3501 if ((len > XML_MAX_NAME_LENGTH) &&
3502 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3503 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3504 return(NULL);
3505 }
3506 return(xmlDictLookup(ctxt->dict, end - len, len));
3507 }
3508
3509 /**
3510 * xmlParseNCName:
3511 * @ctxt: an XML parser context
3512 * @len: length of the string parsed
3513 *
3514 * parse an XML name.
3515 *
3516 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3517 * CombiningChar | Extender
3518 *
3519 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3520 *
3521 * Returns the Name parsed or NULL
3522 */
3523
3524 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3525 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3526 const xmlChar *in;
3527 const xmlChar *ret;
3528 int count = 0;
3529
3530 #ifdef DEBUG
3531 nbParseNCName++;
3532 #endif
3533
3534 /*
3535 * Accelerator for simple ASCII names
3536 */
3537 in = ctxt->input->cur;
3538 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3539 ((*in >= 0x41) && (*in <= 0x5A)) ||
3540 (*in == '_')) {
3541 in++;
3542 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3543 ((*in >= 0x41) && (*in <= 0x5A)) ||
3544 ((*in >= 0x30) && (*in <= 0x39)) ||
3545 (*in == '_') || (*in == '-') ||
3546 (*in == '.'))
3547 in++;
3548 if ((*in > 0) && (*in < 0x80)) {
3549 count = in - ctxt->input->cur;
3550 if ((count > XML_MAX_NAME_LENGTH) &&
3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 return(NULL);
3554 }
3555 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3556 ctxt->input->cur = in;
3557 ctxt->nbChars += count;
3558 ctxt->input->col += count;
3559 if (ret == NULL) {
3560 xmlErrMemory(ctxt, NULL);
3561 }
3562 return(ret);
3563 }
3564 }
3565 return(xmlParseNCNameComplex(ctxt));
3566 }
3567
3568 /**
3569 * xmlParseNameAndCompare:
3570 * @ctxt: an XML parser context
3571 *
3572 * parse an XML name and compares for match
3573 * (specialized for endtag parsing)
3574 *
3575 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3576 * and the name for mismatch
3577 */
3578
3579 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3580 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3581 register const xmlChar *cmp = other;
3582 register const xmlChar *in;
3583 const xmlChar *ret;
3584
3585 GROW;
3586 if (ctxt->instate == XML_PARSER_EOF)
3587 return(NULL);
3588
3589 in = ctxt->input->cur;
3590 while (*in != 0 && *in == *cmp) {
3591 ++in;
3592 ++cmp;
3593 ctxt->input->col++;
3594 }
3595 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3596 /* success */
3597 ctxt->input->cur = in;
3598 return (const xmlChar*) 1;
3599 }
3600 /* failure (or end of input buffer), check with full function */
3601 ret = xmlParseName (ctxt);
3602 /* strings coming from the dictionnary direct compare possible */
3603 if (ret == other) {
3604 return (const xmlChar*) 1;
3605 }
3606 return ret;
3607 }
3608
3609 /**
3610 * xmlParseStringName:
3611 * @ctxt: an XML parser context
3612 * @str: a pointer to the string pointer (IN/OUT)
3613 *
3614 * parse an XML name.
3615 *
3616 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3617 * CombiningChar | Extender
3618 *
3619 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3620 *
3621 * [6] Names ::= Name (#x20 Name)*
3622 *
3623 * Returns the Name parsed or NULL. The @str pointer
3624 * is updated to the current location in the string.
3625 */
3626
3627 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3628 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3629 xmlChar buf[XML_MAX_NAMELEN + 5];
3630 const xmlChar *cur = *str;
3631 int len = 0, l;
3632 int c;
3633
3634 #ifdef DEBUG
3635 nbParseStringName++;
3636 #endif
3637
3638 c = CUR_SCHAR(cur, l);
3639 if (!xmlIsNameStartChar(ctxt, c)) {
3640 return(NULL);
3641 }
3642
3643 COPY_BUF(l,buf,len,c);
3644 cur += l;
3645 c = CUR_SCHAR(cur, l);
3646 while (xmlIsNameChar(ctxt, c)) {
3647 COPY_BUF(l,buf,len,c);
3648 cur += l;
3649 c = CUR_SCHAR(cur, l);
3650 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3651 /*
3652 * Okay someone managed to make a huge name, so he's ready to pay
3653 * for the processing speed.
3654 */
3655 xmlChar *buffer;
3656 int max = len * 2;
3657
3658 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3659 if (buffer == NULL) {
3660 xmlErrMemory(ctxt, NULL);
3661 return(NULL);
3662 }
3663 memcpy(buffer, buf, len);
3664 while (xmlIsNameChar(ctxt, c)) {
3665 if (len + 10 > max) {
3666 xmlChar *tmp;
3667
3668 if ((len > XML_MAX_NAME_LENGTH) &&
3669 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3670 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3671 xmlFree(buffer);
3672 return(NULL);
3673 }
3674 max *= 2;
3675 tmp = (xmlChar *) xmlRealloc(buffer,
3676 max * sizeof(xmlChar));
3677 if (tmp == NULL) {
3678 xmlErrMemory(ctxt, NULL);
3679 xmlFree(buffer);
3680 return(NULL);
3681 }
3682 buffer = tmp;
3683 }
3684 COPY_BUF(l,buffer,len,c);
3685 cur += l;
3686 c = CUR_SCHAR(cur, l);
3687 }
3688 buffer[len] = 0;
3689 *str = cur;
3690 return(buffer);
3691 }
3692 }
3693 if ((len > XML_MAX_NAME_LENGTH) &&
3694 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3695 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3696 return(NULL);
3697 }
3698 *str = cur;
3699 return(xmlStrndup(buf, len));
3700 }
3701
3702 /**
3703 * xmlParseNmtoken:
3704 * @ctxt: an XML parser context
3705 *
3706 * parse an XML Nmtoken.
3707 *
3708 * [7] Nmtoken ::= (NameChar)+
3709 *
3710 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3711 *
3712 * Returns the Nmtoken parsed or NULL
3713 */
3714
3715 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3716 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3717 xmlChar buf[XML_MAX_NAMELEN + 5];
3718 int len = 0, l;
3719 int c;
3720 int count = 0;
3721
3722 #ifdef DEBUG
3723 nbParseNmToken++;
3724 #endif
3725
3726 GROW;
3727 if (ctxt->instate == XML_PARSER_EOF)
3728 return(NULL);
3729 c = CUR_CHAR(l);
3730
3731 while (xmlIsNameChar(ctxt, c)) {
3732 if (count++ > XML_PARSER_CHUNK_SIZE) {
3733 count = 0;
3734 GROW;
3735 }
3736 COPY_BUF(l,buf,len,c);
3737 NEXTL(l);
3738 c = CUR_CHAR(l);
3739 if (c == 0) {
3740 count = 0;
3741 GROW;
3742 if (ctxt->instate == XML_PARSER_EOF)
3743 return(NULL);
3744 c = CUR_CHAR(l);
3745 }
3746 if (len >= XML_MAX_NAMELEN) {
3747 /*
3748 * Okay someone managed to make a huge token, so he's ready to pay
3749 * for the processing speed.
3750 */
3751 xmlChar *buffer;
3752 int max = len * 2;
3753
3754 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3755 if (buffer == NULL) {
3756 xmlErrMemory(ctxt, NULL);
3757 return(NULL);
3758 }
3759 memcpy(buffer, buf, len);
3760 while (xmlIsNameChar(ctxt, c)) {
3761 if (count++ > XML_PARSER_CHUNK_SIZE) {
3762 count = 0;
3763 GROW;
3764 if (ctxt->instate == XML_PARSER_EOF) {
3765 xmlFree(buffer);
3766 return(NULL);
3767 }
3768 }
3769 if (len + 10 > max) {
3770 xmlChar *tmp;
3771
3772 if ((max > XML_MAX_NAME_LENGTH) &&
3773 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3774 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3775 xmlFree(buffer);
3776 return(NULL);
3777 }
3778 max *= 2;
3779 tmp = (xmlChar *) xmlRealloc(buffer,
3780 max * sizeof(xmlChar));
3781 if (tmp == NULL) {
3782 xmlErrMemory(ctxt, NULL);
3783 xmlFree(buffer);
3784 return(NULL);
3785 }
3786 buffer = tmp;
3787 }
3788 COPY_BUF(l,buffer,len,c);
3789 NEXTL(l);
3790 c = CUR_CHAR(l);
3791 }
3792 buffer[len] = 0;
3793 return(buffer);
3794 }
3795 }
3796 if (len == 0)
3797 return(NULL);
3798 if ((len > XML_MAX_NAME_LENGTH) &&
3799 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3800 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3801 return(NULL);
3802 }
3803 return(xmlStrndup(buf, len));
3804 }
3805
3806 /**
3807 * xmlParseEntityValue:
3808 * @ctxt: an XML parser context
3809 * @orig: if non-NULL store a copy of the original entity value
3810 *
3811 * parse a value for ENTITY declarations
3812 *
3813 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3814 * "'" ([^%&'] | PEReference | Reference)* "'"
3815 *
3816 * Returns the EntityValue parsed with reference substituted or NULL
3817 */
3818
3819 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3820 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3821 xmlChar *buf = NULL;
3822 int len = 0;
3823 int size = XML_PARSER_BUFFER_SIZE;
3824 int c, l;
3825 xmlChar stop;
3826 xmlChar *ret = NULL;
3827 const xmlChar *cur = NULL;
3828 xmlParserInputPtr input;
3829
3830 if (RAW == '"') stop = '"';
3831 else if (RAW == '\'') stop = '\'';
3832 else {
3833 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3834 return(NULL);
3835 }
3836 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3837 if (buf == NULL) {
3838 xmlErrMemory(ctxt, NULL);
3839 return(NULL);
3840 }
3841
3842 /*
3843 * The content of the entity definition is copied in a buffer.
3844 */
3845
3846 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3847 input = ctxt->input;
3848 GROW;
3849 if (ctxt->instate == XML_PARSER_EOF) {
3850 xmlFree(buf);
3851 return(NULL);
3852 }
3853 NEXT;
3854 c = CUR_CHAR(l);
3855 /*
3856 * NOTE: 4.4.5 Included in Literal
3857 * When a parameter entity reference appears in a literal entity
3858 * value, ... a single or double quote character in the replacement
3859 * text is always treated as a normal data character and will not
3860 * terminate the literal.
3861 * In practice it means we stop the loop only when back at parsing
3862 * the initial entity and the quote is found
3863 */
3864 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3865 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3866 if (len + 5 >= size) {
3867 xmlChar *tmp;
3868
3869 size *= 2;
3870 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3871 if (tmp == NULL) {
3872 xmlErrMemory(ctxt, NULL);
3873 xmlFree(buf);
3874 return(NULL);
3875 }
3876 buf = tmp;
3877 }
3878 COPY_BUF(l,buf,len,c);
3879 NEXTL(l);
3880 /*
3881 * Pop-up of finished entities.
3882 */
3883 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3884 xmlPopInput(ctxt);
3885
3886 GROW;
3887 c = CUR_CHAR(l);
3888 if (c == 0) {
3889 GROW;
3890 c = CUR_CHAR(l);
3891 }
3892 }
3893 buf[len] = 0;
3894 if (ctxt->instate == XML_PARSER_EOF) {
3895 xmlFree(buf);
3896 return(NULL);
3897 }
3898
3899 /*
3900 * Raise problem w.r.t. '&' and '%' being used in non-entities
3901 * reference constructs. Note Charref will be handled in
3902 * xmlStringDecodeEntities()
3903 */
3904 cur = buf;
3905 while (*cur != 0) { /* non input consuming */
3906 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3907 xmlChar *name;
3908 xmlChar tmp = *cur;
3909
3910 cur++;
3911 name = xmlParseStringName(ctxt, &cur);
3912 if ((name == NULL) || (*cur != ';')) {
3913 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3914 "EntityValue: '%c' forbidden except for entities references\n",
3915 tmp);
3916 }
3917 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3918 (ctxt->inputNr == 1)) {
3919 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3920 }
3921 if (name != NULL)
3922 xmlFree(name);
3923 if (*cur == 0)
3924 break;
3925 }
3926 cur++;
3927 }
3928
3929 /*
3930 * Then PEReference entities are substituted.
3931 */
3932 if (c != stop) {
3933 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3934 xmlFree(buf);
3935 } else {
3936 NEXT;
3937 /*
3938 * NOTE: 4.4.7 Bypassed
3939 * When a general entity reference appears in the EntityValue in
3940 * an entity declaration, it is bypassed and left as is.
3941 * so XML_SUBSTITUTE_REF is not set here.
3942 */
3943 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3944 0, 0, 0);
3945 if (orig != NULL)
3946 *orig = buf;
3947 else
3948 xmlFree(buf);
3949 }
3950
3951 return(ret);
3952 }
3953
3954 /**
3955 * xmlParseAttValueComplex:
3956 * @ctxt: an XML parser context
3957 * @len: the resulting attribute len
3958 * @normalize: wether to apply the inner normalization
3959 *
3960 * parse a value for an attribute, this is the fallback function
3961 * of xmlParseAttValue() when the attribute parsing requires handling
3962 * of non-ASCII characters, or normalization compaction.
3963 *
3964 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3965 */
3966 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3967 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3968 xmlChar limit = 0;
3969 xmlChar *buf = NULL;
3970 xmlChar *rep = NULL;
3971 size_t len = 0;
3972 size_t buf_size = 0;
3973 int c, l, in_space = 0;
3974 xmlChar *current = NULL;
3975 xmlEntityPtr ent;
3976
3977 if (NXT(0) == '"') {
3978 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3979 limit = '"';
3980 NEXT;
3981 } else if (NXT(0) == '\'') {
3982 limit = '\'';
3983 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3984 NEXT;
3985 } else {
3986 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3987 return(NULL);
3988 }
3989
3990 /*
3991 * allocate a translation buffer.
3992 */
3993 buf_size = XML_PARSER_BUFFER_SIZE;
3994 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3995 if (buf == NULL) goto mem_error;
3996
3997 /*
3998 * OK loop until we reach one of the ending char or a size limit.
3999 */
4000 c = CUR_CHAR(l);
4001 while (((NXT(0) != limit) && /* checked */
4002 (IS_CHAR(c)) && (c != '<')) &&
4003 (ctxt->instate != XML_PARSER_EOF)) {
4004 /*
4005 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4006 * special option is given
4007 */
4008 if ((len > XML_MAX_TEXT_LENGTH) &&
4009 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4010 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4011 "AttValue length too long\n");
4012 goto mem_error;
4013 }
4014 if (c == 0) break;
4015 if (c == '&') {
4016 in_space = 0;
4017 if (NXT(1) == '#') {
4018 int val = xmlParseCharRef(ctxt);
4019
4020 if (val == '&') {
4021 if (ctxt->replaceEntities) {
4022 if (len + 10 > buf_size) {
4023 growBuffer(buf, 10);
4024 }
4025 buf[len++] = '&';
4026 } else {
4027 /*
4028 * The reparsing will be done in xmlStringGetNodeList()
4029 * called by the attribute() function in SAX.c
4030 */
4031 if (len + 10 > buf_size) {
4032 growBuffer(buf, 10);
4033 }
4034 buf[len++] = '&';
4035 buf[len++] = '#';
4036 buf[len++] = '3';
4037 buf[len++] = '8';
4038 buf[len++] = ';';
4039 }
4040 } else if (val != 0) {
4041 if (len + 10 > buf_size) {
4042 growBuffer(buf, 10);
4043 }
4044 len += xmlCopyChar(0, &buf[len], val);
4045 }
4046 } else {
4047 ent = xmlParseEntityRef(ctxt);
4048 ctxt->nbentities++;
4049 if (ent != NULL)
4050 ctxt->nbentities += ent->owner;
4051 if ((ent != NULL) &&
4052 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4053 if (len + 10 > buf_size) {
4054 growBuffer(buf, 10);
4055 }
4056 if ((ctxt->replaceEntities == 0) &&
4057 (ent->content[0] == '&')) {
4058 buf[len++] = '&';
4059 buf[len++] = '#';
4060 buf[len++] = '3';
4061 buf[len++] = '8';
4062 buf[len++] = ';';
4063 } else {
4064 buf[len++] = ent->content[0];
4065 }
4066 } else if ((ent != NULL) &&
4067 (ctxt->replaceEntities != 0)) {
4068 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4069 rep = xmlStringDecodeEntities(ctxt, ent->content,
4070 XML_SUBSTITUTE_REF,
4071 0, 0, 0);
4072 if (rep != NULL) {
4073 current = rep;
4074 while (*current != 0) { /* non input consuming */
4075 if ((*current == 0xD) || (*current == 0xA) ||
4076 (*current == 0x9)) {
4077 buf[len++] = 0x20;
4078 current++;
4079 } else
4080 buf[len++] = *current++;
4081 if (len + 10 > buf_size) {
4082 growBuffer(buf, 10);
4083 }
4084 }
4085 xmlFree(rep);
4086 rep = NULL;
4087 }
4088 } else {
4089 if (len + 10 > buf_size) {
4090 growBuffer(buf, 10);
4091 }
4092 if (ent->content != NULL)
4093 buf[len++] = ent->content[0];
4094 }
4095 } else if (ent != NULL) {
4096 int i = xmlStrlen(ent->name);
4097 const xmlChar *cur = ent->name;
4098
4099 /*
4100 * This may look absurd but is needed to detect
4101 * entities problems
4102 */
4103 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4104 (ent->content != NULL) && (ent->checked == 0)) {
4105 unsigned long oldnbent = ctxt->nbentities;
4106
4107 rep = xmlStringDecodeEntities(ctxt, ent->content,
4108 XML_SUBSTITUTE_REF, 0, 0, 0);
4109
4110 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4111 if (rep != NULL) {
4112 if (xmlStrchr(rep, '<'))
4113 ent->checked |= 1;
4114 xmlFree(rep);
4115 rep = NULL;
4116 }
4117 }
4118
4119 /*
4120 * Just output the reference
4121 */
4122 buf[len++] = '&';
4123 while (len + i + 10 > buf_size) {
4124 growBuffer(buf, i + 10);
4125 }
4126 for (;i > 0;i--)
4127 buf[len++] = *cur++;
4128 buf[len++] = ';';
4129 }
4130 }
4131 } else {
4132 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133 if ((len != 0) || (!normalize)) {
4134 if ((!normalize) || (!in_space)) {
4135 COPY_BUF(l,buf,len,0x20);
4136 while (len + 10 > buf_size) {
4137 growBuffer(buf, 10);
4138 }
4139 }
4140 in_space = 1;
4141 }
4142 } else {
4143 in_space = 0;
4144 COPY_BUF(l,buf,len,c);
4145 if (len + 10 > buf_size) {
4146 growBuffer(buf, 10);
4147 }
4148 }
4149 NEXTL(l);
4150 }
4151 GROW;
4152 c = CUR_CHAR(l);
4153 }
4154 if (ctxt->instate == XML_PARSER_EOF)
4155 goto error;
4156
4157 if ((in_space) && (normalize)) {
4158 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4159 }
4160 buf[len] = 0;
4161 if (RAW == '<') {
4162 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4163 } else if (RAW != limit) {
4164 if ((c != 0) && (!IS_CHAR(c))) {
4165 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4166 "invalid character in attribute value\n");
4167 } else {
4168 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4169 "AttValue: ' expected\n");
4170 }
4171 } else
4172 NEXT;
4173
4174 /*
4175 * There we potentially risk an overflow, don't allow attribute value of
4176 * length more than INT_MAX it is a very reasonnable assumption !
4177 */
4178 if (len >= INT_MAX) {
4179 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4180 "AttValue length too long\n");
4181 goto mem_error;
4182 }
4183
4184 if (attlen != NULL) *attlen = (int) len;
4185 return(buf);
4186
4187 mem_error:
4188 xmlErrMemory(ctxt, NULL);
4189 error:
4190 if (buf != NULL)
4191 xmlFree(buf);
4192 if (rep != NULL)
4193 xmlFree(rep);
4194 return(NULL);
4195 }
4196
4197 /**
4198 * xmlParseAttValue:
4199 * @ctxt: an XML parser context
4200 *
4201 * parse a value for an attribute
4202 * Note: the parser won't do substitution of entities here, this
4203 * will be handled later in xmlStringGetNodeList
4204 *
4205 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4206 * "'" ([^<&'] | Reference)* "'"
4207 *
4208 * 3.3.3 Attribute-Value Normalization:
4209 * Before the value of an attribute is passed to the application or
4210 * checked for validity, the XML processor must normalize it as follows:
4211 * - a character reference is processed by appending the referenced
4212 * character to the attribute value
4213 * - an entity reference is processed by recursively processing the
4214 * replacement text of the entity
4215 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4216 * appending #x20 to the normalized value, except that only a single
4217 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4218 * parsed entity or the literal entity value of an internal parsed entity
4219 * - other characters are processed by appending them to the normalized value
4220 * If the declared value is not CDATA, then the XML processor must further
4221 * process the normalized attribute value by discarding any leading and
4222 * trailing space (#x20) characters, and by replacing sequences of space
4223 * (#x20) characters by a single space (#x20) character.
4224 * All attributes for which no declaration has been read should be treated
4225 * by a non-validating parser as if declared CDATA.
4226 *
4227 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4228 */
4229
4230
4231 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4232 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4233 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4234 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4235 }
4236
4237 /**
4238 * xmlParseSystemLiteral:
4239 * @ctxt: an XML parser context
4240 *
4241 * parse an XML Literal
4242 *
4243 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4244 *
4245 * Returns the SystemLiteral parsed or NULL
4246 */
4247
4248 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4249 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4250 xmlChar *buf = NULL;
4251 int len = 0;
4252 int size = XML_PARSER_BUFFER_SIZE;
4253 int cur, l;
4254 xmlChar stop;
4255 int state = ctxt->instate;
4256 int count = 0;
4257
4258 SHRINK;
4259 if (RAW == '"') {
4260 NEXT;
4261 stop = '"';
4262 } else if (RAW == '\'') {
4263 NEXT;
4264 stop = '\'';
4265 } else {
4266 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4267 return(NULL);
4268 }
4269
4270 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4271 if (buf == NULL) {
4272 xmlErrMemory(ctxt, NULL);
4273 return(NULL);
4274 }
4275 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4276 cur = CUR_CHAR(l);
4277 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4278 if (len + 5 >= size) {
4279 xmlChar *tmp;
4280
4281 if ((size > XML_MAX_NAME_LENGTH) &&
4282 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4283 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4284 xmlFree(buf);
4285 ctxt->instate = (xmlParserInputState) state;
4286 return(NULL);
4287 }
4288 size *= 2;
4289 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4290 if (tmp == NULL) {
4291 xmlFree(buf);
4292 xmlErrMemory(ctxt, NULL);
4293 ctxt->instate = (xmlParserInputState) state;
4294 return(NULL);
4295 }
4296 buf = tmp;
4297 }
4298 count++;
4299 if (count > 50) {
4300 GROW;
4301 count = 0;
4302 if (ctxt->instate == XML_PARSER_EOF) {
4303 xmlFree(buf);
4304 return(NULL);
4305 }
4306 }
4307 COPY_BUF(l,buf,len,cur);
4308 NEXTL(l);
4309 cur = CUR_CHAR(l);
4310 if (cur == 0) {
4311 GROW;
4312 SHRINK;
4313 cur = CUR_CHAR(l);
4314 }
4315 }
4316 buf[len] = 0;
4317 ctxt->instate = (xmlParserInputState) state;
4318 if (!IS_CHAR(cur)) {
4319 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4320 } else {
4321 NEXT;
4322 }
4323 return(buf);
4324 }
4325
4326 /**
4327 * xmlParsePubidLiteral:
4328 * @ctxt: an XML parser context
4329 *
4330 * parse an XML public literal
4331 *
4332 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4333 *
4334 * Returns the PubidLiteral parsed or NULL.
4335 */
4336
4337 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4338 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4339 xmlChar *buf = NULL;
4340 int len = 0;
4341 int size = XML_PARSER_BUFFER_SIZE;
4342 xmlChar cur;
4343 xmlChar stop;
4344 int count = 0;
4345 xmlParserInputState oldstate = ctxt->instate;
4346
4347 SHRINK;
4348 if (RAW == '"') {
4349 NEXT;
4350 stop = '"';
4351 } else if (RAW == '\'') {
4352 NEXT;
4353 stop = '\'';
4354 } else {
4355 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4356 return(NULL);
4357 }
4358 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4359 if (buf == NULL) {
4360 xmlErrMemory(ctxt, NULL);
4361 return(NULL);
4362 }
4363 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4364 cur = CUR;
4365 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4366 if (len + 1 >= size) {
4367 xmlChar *tmp;
4368
4369 if ((size > XML_MAX_NAME_LENGTH) &&
4370 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4371 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4372 xmlFree(buf);
4373 return(NULL);
4374 }
4375 size *= 2;
4376 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4377 if (tmp == NULL) {
4378 xmlErrMemory(ctxt, NULL);
4379 xmlFree(buf);
4380 return(NULL);
4381 }
4382 buf = tmp;
4383 }
4384 buf[len++] = cur;
4385 count++;
4386 if (count > 50) {
4387 GROW;
4388 count = 0;
4389 if (ctxt->instate == XML_PARSER_EOF) {
4390 xmlFree(buf);
4391 return(NULL);
4392 }
4393 }
4394 NEXT;
4395 cur = CUR;
4396 if (cur == 0) {
4397 GROW;
4398 SHRINK;
4399 cur = CUR;
4400 }
4401 }
4402 buf[len] = 0;
4403 if (cur != stop) {
4404 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4405 } else {
4406 NEXT;
4407 }
4408 ctxt->instate = oldstate;
4409 return(buf);
4410 }
4411
4412 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4413
4414 /*
4415 * used for the test in the inner loop of the char data testing
4416 */
4417 static const unsigned char test_char_data[256] = {
4418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4420 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4422 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4423 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4424 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4425 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4426 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4427 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4428 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4429 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4430 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4431 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4432 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4433 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4434 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4435 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4436 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4450 };
4451
4452 /**
4453 * xmlParseCharData:
4454 * @ctxt: an XML parser context
4455 * @cdata: int indicating whether we are within a CDATA section
4456 *
4457 * parse a CharData section.
4458 * if we are within a CDATA section ']]>' marks an end of section.
4459 *
4460 * The right angle bracket (>) may be represented using the string ">",
4461 * and must, for compatibility, be escaped using ">" or a character
4462 * reference when it appears in the string "]]>" in content, when that
4463 * string is not marking the end of a CDATA section.
4464 *
4465 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4466 */
4467
4468 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4469 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4470 const xmlChar *in;
4471 int nbchar = 0;
4472 int line = ctxt->input->line;
4473 int col = ctxt->input->col;
4474 int ccol;
4475
4476 SHRINK;
4477 GROW;
4478 /*
4479 * Accelerated common case where input don't need to be
4480 * modified before passing it to the handler.
4481 */
4482 if (!cdata) {
4483 in = ctxt->input->cur;
4484 do {
4485 get_more_space:
4486 while (*in == 0x20) { in++; ctxt->input->col++; }
4487 if (*in == 0xA) {
4488 do {
4489 ctxt->input->line++; ctxt->input->col = 1;
4490 in++;
4491 } while (*in == 0xA);
4492 goto get_more_space;
4493 }
4494 if (*in == '<') {
4495 nbchar = in - ctxt->input->cur;
4496 if (nbchar > 0) {
4497 const xmlChar *tmp = ctxt->input->cur;
4498 ctxt->input->cur = in;
4499
4500 if ((ctxt->sax != NULL) &&
4501 (ctxt->sax->ignorableWhitespace !=
4502 ctxt->sax->characters)) {
4503 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4504 if (ctxt->sax->ignorableWhitespace != NULL)
4505 ctxt->sax->ignorableWhitespace(ctxt->userData,
4506 tmp, nbchar);
4507 } else {
4508 if (ctxt->sax->characters != NULL)
4509 ctxt->sax->characters(ctxt->userData,
4510 tmp, nbchar);
4511 if (*ctxt->space == -1)
4512 *ctxt->space = -2;
4513 }
4514 } else if ((ctxt->sax != NULL) &&
4515 (ctxt->sax->characters != NULL)) {
4516 ctxt->sax->characters(ctxt->userData,
4517 tmp, nbchar);
4518 }
4519 }
4520 return;
4521 }
4522
4523 get_more:
4524 ccol = ctxt->input->col;
4525 while (test_char_data[*in]) {
4526 in++;
4527 ccol++;
4528 }
4529 ctxt->input->col = ccol;
4530 if (*in == 0xA) {
4531 do {
4532 ctxt->input->line++; ctxt->input->col = 1;
4533 in++;
4534 } while (*in == 0xA);
4535 goto get_more;
4536 }
4537 if (*in == ']') {
4538 if ((in[1] == ']') && (in[2] == '>')) {
4539 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4540 ctxt->input->cur = in;
4541 return;
4542 }
4543 in++;
4544 ctxt->input->col++;
4545 goto get_more;
4546 }
4547 nbchar = in - ctxt->input->cur;
4548 if (nbchar > 0) {
4549 if ((ctxt->sax != NULL) &&
4550 (ctxt->sax->ignorableWhitespace !=
4551 ctxt->sax->characters) &&
4552 (IS_BLANK_CH(*ctxt->input->cur))) {
4553 const xmlChar *tmp = ctxt->input->cur;
4554 ctxt->input->cur = in;
4555
4556 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4557 if (ctxt->sax->ignorableWhitespace != NULL)
4558 ctxt->sax->ignorableWhitespace(ctxt->userData,
4559 tmp, nbchar);
4560 } else {
4561 if (ctxt->sax->characters != NULL)
4562 ctxt->sax->characters(ctxt->userData,
4563 tmp, nbchar);
4564 if (*ctxt->space == -1)
4565 *ctxt->space = -2;
4566 }
4567 line = ctxt->input->line;
4568 col = ctxt->input->col;
4569 } else if (ctxt->sax != NULL) {
4570 if (ctxt->sax->characters != NULL)
4571 ctxt->sax->characters(ctxt->userData,
4572 ctxt->input->cur, nbchar);
4573 line = ctxt->input->line;
4574 col = ctxt->input->col;
4575 }
4576 /* something really bad happened in the SAX callback */
4577 if (ctxt->instate != XML_PARSER_CONTENT)
4578 return;
4579 }
4580 ctxt->input->cur = in;
4581 if (*in == 0xD) {
4582 in++;
4583 if (*in == 0xA) {
4584 ctxt->input->cur = in;
4585 in++;
4586 ctxt->input->line++; ctxt->input->col = 1;
4587 continue; /* while */
4588 }
4589 in--;
4590 }
4591 if (*in == '<') {
4592 return;
4593 }
4594 if (*in == '&') {
4595 return;
4596 }
4597 SHRINK;
4598 GROW;
4599 if (ctxt->instate == XML_PARSER_EOF)
4600 return;
4601 in = ctxt->input->cur;
4602 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4603 nbchar = 0;
4604 }
4605 ctxt->input->line = line;
4606 ctxt->input->col = col;
4607 xmlParseCharDataComplex(ctxt, cdata);
4608 }
4609
4610 /**
4611 * xmlParseCharDataComplex:
4612 * @ctxt: an XML parser context
4613 * @cdata: int indicating whether we are within a CDATA section
4614 *
4615 * parse a CharData section.this is the fallback function
4616 * of xmlParseCharData() when the parsing requires handling
4617 * of non-ASCII characters.
4618 */
4619 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4620 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4622 int nbchar = 0;
4623 int cur, l;
4624 int count = 0;
4625
4626 SHRINK;
4627 GROW;
4628 cur = CUR_CHAR(l);
4629 while ((cur != '<') && /* checked */
4630 (cur != '&') &&
4631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4632 if ((cur == ']') && (NXT(1) == ']') &&
4633 (NXT(2) == '>')) {
4634 if (cdata) break;
4635 else {
4636 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4637 }
4638 }
4639 COPY_BUF(l,buf,nbchar,cur);
4640 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4641 buf[nbchar] = 0;
4642
4643 /*
4644 * OK the segment is to be consumed as chars.
4645 */
4646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4647 if (areBlanks(ctxt, buf, nbchar, 0)) {
4648 if (ctxt->sax->ignorableWhitespace != NULL)
4649 ctxt->sax->ignorableWhitespace(ctxt->userData,
4650 buf, nbchar);
4651 } else {
4652 if (ctxt->sax->characters != NULL)
4653 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4654 if ((ctxt->sax->characters !=
4655 ctxt->sax->ignorableWhitespace) &&
4656 (*ctxt->space == -1))
4657 *ctxt->space = -2;
4658 }
4659 }
4660 nbchar = 0;
4661 /* something really bad happened in the SAX callback */
4662 if (ctxt->instate != XML_PARSER_CONTENT)
4663 return;
4664 }
4665 count++;
4666 if (count > 50) {
4667 GROW;
4668 count = 0;
4669 if (ctxt->instate == XML_PARSER_EOF)
4670 return;
4671 }
4672 NEXTL(l);
4673 cur = CUR_CHAR(l);
4674 }
4675 if (nbchar != 0) {
4676 buf[nbchar] = 0;
4677 /*
4678 * OK the segment is to be consumed as chars.
4679 */
4680 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4681 if (areBlanks(ctxt, buf, nbchar, 0)) {
4682 if (ctxt->sax->ignorableWhitespace != NULL)
4683 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4684 } else {
4685 if (ctxt->sax->characters != NULL)
4686 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4687 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4688 (*ctxt->space == -1))
4689 *ctxt->space = -2;
4690 }
4691 }
4692 }
4693 if ((cur != 0) && (!IS_CHAR(cur))) {
4694 /* Generate the error and skip the offending character */
4695 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4696 "PCDATA invalid Char value %d\n",
4697 cur);
4698 NEXTL(l);
4699 }
4700 }
4701
4702 /**
4703 * xmlParseExternalID:
4704 * @ctxt: an XML parser context
4705 * @publicID: a xmlChar** receiving PubidLiteral
4706 * @strict: indicate whether we should restrict parsing to only
4707 * production [75], see NOTE below
4708 *
4709 * Parse an External ID or a Public ID
4710 *
4711 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4712 * 'PUBLIC' S PubidLiteral S SystemLiteral
4713 *
4714 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4715 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4716 *
4717 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4718 *
4719 * Returns the function returns SystemLiteral and in the second
4720 * case publicID receives PubidLiteral, is strict is off
4721 * it is possible to return NULL and have publicID set.
4722 */
4723
4724 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4725 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4726 xmlChar *URI = NULL;
4727
4728 SHRINK;
4729
4730 *publicID = NULL;
4731 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4732 SKIP(6);
4733 if (!IS_BLANK_CH(CUR)) {
4734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4735 "Space required after 'SYSTEM'\n");
4736 }
4737 SKIP_BLANKS;
4738 URI = xmlParseSystemLiteral(ctxt);
4739 if (URI == NULL) {
4740 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4741 }
4742 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4743 SKIP(6);
4744 if (!IS_BLANK_CH(CUR)) {
4745 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4746 "Space required after 'PUBLIC'\n");
4747 }
4748 SKIP_BLANKS;
4749 *publicID = xmlParsePubidLiteral(ctxt);
4750 if (*publicID == NULL) {
4751 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4752 }
4753 if (strict) {
4754 /*
4755 * We don't handle [83] so "S SystemLiteral" is required.
4756 */
4757 if (!IS_BLANK_CH(CUR)) {
4758 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4759 "Space required after the Public Identifier\n");
4760 }
4761 } else {
4762 /*
4763 * We handle [83] so we return immediately, if
4764 * "S SystemLiteral" is not detected. From a purely parsing
4765 * point of view that's a nice mess.
4766 */
4767 const xmlChar *ptr;
4768 GROW;
4769
4770 ptr = CUR_PTR;
4771 if (!IS_BLANK_CH(*ptr)) return(NULL);
4772
4773 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4774 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4775 }
4776 SKIP_BLANKS;
4777 URI = xmlParseSystemLiteral(ctxt);
4778 if (URI == NULL) {
4779 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780 }
4781 }
4782 return(URI);
4783 }
4784
4785 /**
4786 * xmlParseCommentComplex:
4787 * @ctxt: an XML parser context
4788 * @buf: the already parsed part of the buffer
4789 * @len: number of bytes filles in the buffer
4790 * @size: allocated size of the buffer
4791 *
4792 * Skip an XML (SGML) comment <!-- .... -->
4793 * The spec says that "For compatibility, the string "--" (double-hyphen)
4794 * must not occur within comments. "
4795 * This is the slow routine in case the accelerator for ascii didn't work
4796 *
4797 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798 */
4799 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4800 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801 size_t len, size_t size) {
4802 int q, ql;
4803 int r, rl;
4804 int cur, l;
4805 size_t count = 0;
4806 int inputid;
4807
4808 inputid = ctxt->input->id;
4809
4810 if (buf == NULL) {
4811 len = 0;
4812 size = XML_PARSER_BUFFER_SIZE;
4813 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4814 if (buf == NULL) {
4815 xmlErrMemory(ctxt, NULL);
4816 return;
4817 }
4818 }
4819 GROW; /* Assure there's enough input data */
4820 q = CUR_CHAR(ql);
4821 if (q == 0)
4822 goto not_terminated;
4823 if (!IS_CHAR(q)) {
4824 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4825 "xmlParseComment: invalid xmlChar value %d\n",
4826 q);
4827 xmlFree (buf);
4828 return;
4829 }
4830 NEXTL(ql);
4831 r = CUR_CHAR(rl);
4832 if (r == 0)
4833 goto not_terminated;
4834 if (!IS_CHAR(r)) {
4835 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4836 "xmlParseComment: invalid xmlChar value %d\n",
4837 q);
4838 xmlFree (buf);
4839 return;
4840 }
4841 NEXTL(rl);
4842 cur = CUR_CHAR(l);
4843 if (cur == 0)
4844 goto not_terminated;
4845 while (IS_CHAR(cur) && /* checked */
4846 ((cur != '>') ||
4847 (r != '-') || (q != '-'))) {
4848 if ((r == '-') && (q == '-')) {
4849 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4850 }
4851 if ((len > XML_MAX_TEXT_LENGTH) &&
4852 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4853 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4854 "Comment too big found", NULL);
4855 xmlFree (buf);
4856 return;
4857 }
4858 if (len + 5 >= size) {
4859 xmlChar *new_buf;
4860 size_t new_size;
4861
4862 new_size = size * 2;
4863 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4864 if (new_buf == NULL) {
4865 xmlFree (buf);
4866 xmlErrMemory(ctxt, NULL);
4867 return;
4868 }
4869 buf = new_buf;
4870 size = new_size;
4871 }
4872 COPY_BUF(ql,buf,len,q);
4873 q = r;
4874 ql = rl;
4875 r = cur;
4876 rl = l;
4877
4878 count++;
4879 if (count > 50) {
4880 GROW;
4881 count = 0;
4882 if (ctxt->instate == XML_PARSER_EOF) {
4883 xmlFree(buf);
4884 return;
4885 }
4886 }
4887 NEXTL(l);
4888 cur = CUR_CHAR(l);
4889 if (cur == 0) {
4890 SHRINK;
4891 GROW;
4892 cur = CUR_CHAR(l);
4893 }
4894 }
4895 buf[len] = 0;
4896 if (cur == 0) {
4897 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4898 "Comment not terminated \n<!--%.50s\n", buf);
4899 } else if (!IS_CHAR(cur)) {
4900 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4901 "xmlParseComment: invalid xmlChar value %d\n",
4902 cur);
4903 } else {
4904 if (inputid != ctxt->input->id) {
4905 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4906 "Comment doesn't start and stop in the same entity\n");
4907 }
4908 NEXT;
4909 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4910 (!ctxt->disableSAX))
4911 ctxt->sax->comment(ctxt->userData, buf);
4912 }
4913 xmlFree(buf);
4914 return;
4915 not_terminated:
4916 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4917 "Comment not terminated\n", NULL);
4918 xmlFree(buf);
4919 return;
4920 }
4921
4922 /**
4923 * xmlParseComment:
4924 * @ctxt: an XML parser context
4925 *
4926 * Skip an XML (SGML) comment <!-- .... -->
4927 * The spec says that "For compatibility, the string "--" (double-hyphen)
4928 * must not occur within comments. "
4929 *
4930 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4931 */
4932 void
xmlParseComment(xmlParserCtxtPtr ctxt)4933 xmlParseComment(xmlParserCtxtPtr ctxt) {
4934 xmlChar *buf = NULL;
4935 size_t size = XML_PARSER_BUFFER_SIZE;
4936 size_t len = 0;
4937 xmlParserInputState state;
4938 const xmlChar *in;
4939 size_t nbchar = 0;
4940 int ccol;
4941 int inputid;
4942
4943 /*
4944 * Check that there is a comment right here.
4945 */
4946 if ((RAW != '<') || (NXT(1) != '!') ||
4947 (NXT(2) != '-') || (NXT(3) != '-')) return;
4948 state = ctxt->instate;
4949 ctxt->instate = XML_PARSER_COMMENT;
4950 inputid = ctxt->input->id;
4951 SKIP(4);
4952 SHRINK;
4953 GROW;
4954
4955 /*
4956 * Accelerated common case where input don't need to be
4957 * modified before passing it to the handler.
4958 */
4959 in = ctxt->input->cur;
4960 do {
4961 if (*in == 0xA) {
4962 do {
4963 ctxt->input->line++; ctxt->input->col = 1;
4964 in++;
4965 } while (*in == 0xA);
4966 }
4967 get_more:
4968 ccol = ctxt->input->col;
4969 while (((*in > '-') && (*in <= 0x7F)) ||
4970 ((*in >= 0x20) && (*in < '-')) ||
4971 (*in == 0x09)) {
4972 in++;
4973 ccol++;
4974 }
4975 ctxt->input->col = ccol;
4976 if (*in == 0xA) {
4977 do {
4978 ctxt->input->line++; ctxt->input->col = 1;
4979 in++;
4980 } while (*in == 0xA);
4981 goto get_more;
4982 }
4983 nbchar = in - ctxt->input->cur;
4984 /*
4985 * save current set of data
4986 */
4987 if (nbchar > 0) {
4988 if ((ctxt->sax != NULL) &&
4989 (ctxt->sax->comment != NULL)) {
4990 if (buf == NULL) {
4991 if ((*in == '-') && (in[1] == '-'))
4992 size = nbchar + 1;
4993 else
4994 size = XML_PARSER_BUFFER_SIZE + nbchar;
4995 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4996 if (buf == NULL) {
4997 xmlErrMemory(ctxt, NULL);
4998 ctxt->instate = state;
4999 return;
5000 }
5001 len = 0;
5002 } else if (len + nbchar + 1 >= size) {
5003 xmlChar *new_buf;
5004 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5005 new_buf = (xmlChar *) xmlRealloc(buf,
5006 size * sizeof(xmlChar));
5007 if (new_buf == NULL) {
5008 xmlFree (buf);
5009 xmlErrMemory(ctxt, NULL);
5010 ctxt->instate = state;
5011 return;
5012 }
5013 buf = new_buf;
5014 }
5015 memcpy(&buf[len], ctxt->input->cur, nbchar);
5016 len += nbchar;
5017 buf[len] = 0;
5018 }
5019 }
5020 if ((len > XML_MAX_TEXT_LENGTH) &&
5021 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5022 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5023 "Comment too big found", NULL);
5024 xmlFree (buf);
5025 return;
5026 }
5027 ctxt->input->cur = in;
5028 if (*in == 0xA) {
5029 in++;
5030 ctxt->input->line++; ctxt->input->col = 1;
5031 }
5032 if (*in == 0xD) {
5033 in++;
5034 if (*in == 0xA) {
5035 ctxt->input->cur = in;
5036 in++;
5037 ctxt->input->line++; ctxt->input->col = 1;
5038 continue; /* while */
5039 }
5040 in--;
5041 }
5042 SHRINK;
5043 GROW;
5044 if (ctxt->instate == XML_PARSER_EOF) {
5045 xmlFree(buf);
5046 return;
5047 }
5048 in = ctxt->input->cur;
5049 if (*in == '-') {
5050 if (in[1] == '-') {
5051 if (in[2] == '>') {
5052 if (ctxt->input->id != inputid) {
5053 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5054 "comment doesn't start and stop in the same entity\n");
5055 }
5056 SKIP(3);
5057 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5058 (!ctxt->disableSAX)) {
5059 if (buf != NULL)
5060 ctxt->sax->comment(ctxt->userData, buf);
5061 else
5062 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5063 }
5064 if (buf != NULL)
5065 xmlFree(buf);
5066 if (ctxt->instate != XML_PARSER_EOF)
5067 ctxt->instate = state;
5068 return;
5069 }
5070 if (buf != NULL) {
5071 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5072 "Double hyphen within comment: "
5073 "<!--%.50s\n",
5074 buf);
5075 } else
5076 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5077 "Double hyphen within comment\n", NULL);
5078 in++;
5079 ctxt->input->col++;
5080 }
5081 in++;
5082 ctxt->input->col++;
5083 goto get_more;
5084 }
5085 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5086 xmlParseCommentComplex(ctxt, buf, len, size);
5087 ctxt->instate = state;
5088 return;
5089 }
5090
5091
5092 /**
5093 * xmlParsePITarget:
5094 * @ctxt: an XML parser context
5095 *
5096 * parse the name of a PI
5097 *
5098 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5099 *
5100 * Returns the PITarget name or NULL
5101 */
5102
5103 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5104 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5105 const xmlChar *name;
5106
5107 name = xmlParseName(ctxt);
5108 if ((name != NULL) &&
5109 ((name[0] == 'x') || (name[0] == 'X')) &&
5110 ((name[1] == 'm') || (name[1] == 'M')) &&
5111 ((name[2] == 'l') || (name[2] == 'L'))) {
5112 int i;
5113 if ((name[0] == 'x') && (name[1] == 'm') &&
5114 (name[2] == 'l') && (name[3] == 0)) {
5115 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5116 "XML declaration allowed only at the start of the document\n");
5117 return(name);
5118 } else if (name[3] == 0) {
5119 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5120 return(name);
5121 }
5122 for (i = 0;;i++) {
5123 if (xmlW3CPIs[i] == NULL) break;
5124 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5125 return(name);
5126 }
5127 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5128 "xmlParsePITarget: invalid name prefix 'xml'\n",
5129 NULL, NULL);
5130 }
5131 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5132 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5133 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5134 }
5135 return(name);
5136 }
5137
5138 #ifdef LIBXML_CATALOG_ENABLED
5139 /**
5140 * xmlParseCatalogPI:
5141 * @ctxt: an XML parser context
5142 * @catalog: the PI value string
5143 *
5144 * parse an XML Catalog Processing Instruction.
5145 *
5146 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5147 *
5148 * Occurs only if allowed by the user and if happening in the Misc
5149 * part of the document before any doctype informations
5150 * This will add the given catalog to the parsing context in order
5151 * to be used if there is a resolution need further down in the document
5152 */
5153
5154 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5155 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5156 xmlChar *URL = NULL;
5157 const xmlChar *tmp, *base;
5158 xmlChar marker;
5159
5160 tmp = catalog;
5161 while (IS_BLANK_CH(*tmp)) tmp++;
5162 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5163 goto error;
5164 tmp += 7;
5165 while (IS_BLANK_CH(*tmp)) tmp++;
5166 if (*tmp != '=') {
5167 return;
5168 }
5169 tmp++;
5170 while (IS_BLANK_CH(*tmp)) tmp++;
5171 marker = *tmp;
5172 if ((marker != '\'') && (marker != '"'))
5173 goto error;
5174 tmp++;
5175 base = tmp;
5176 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5177 if (*tmp == 0)
5178 goto error;
5179 URL = xmlStrndup(base, tmp - base);
5180 tmp++;
5181 while (IS_BLANK_CH(*tmp)) tmp++;
5182 if (*tmp != 0)
5183 goto error;
5184
5185 if (URL != NULL) {
5186 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5187 xmlFree(URL);
5188 }
5189 return;
5190
5191 error:
5192 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5193 "Catalog PI syntax error: %s\n",
5194 catalog, NULL);
5195 if (URL != NULL)
5196 xmlFree(URL);
5197 }
5198 #endif
5199
5200 /**
5201 * xmlParsePI:
5202 * @ctxt: an XML parser context
5203 *
5204 * parse an XML Processing Instruction.
5205 *
5206 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5207 *
5208 * The processing is transfered to SAX once parsed.
5209 */
5210
5211 void
xmlParsePI(xmlParserCtxtPtr ctxt)5212 xmlParsePI(xmlParserCtxtPtr ctxt) {
5213 xmlChar *buf = NULL;
5214 size_t len = 0;
5215 size_t size = XML_PARSER_BUFFER_SIZE;
5216 int cur, l;
5217 const xmlChar *target;
5218 xmlParserInputState state;
5219 int count = 0;
5220
5221 if ((RAW == '<') && (NXT(1) == '?')) {
5222 xmlParserInputPtr input = ctxt->input;
5223 state = ctxt->instate;
5224 ctxt->instate = XML_PARSER_PI;
5225 /*
5226 * this is a Processing Instruction.
5227 */
5228 SKIP(2);
5229 SHRINK;
5230
5231 /*
5232 * Parse the target name and check for special support like
5233 * namespace.
5234 */
5235 target = xmlParsePITarget(ctxt);
5236 if (target != NULL) {
5237 if ((RAW == '?') && (NXT(1) == '>')) {
5238 if (input != ctxt->input) {
5239 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5240 "PI declaration doesn't start and stop in the same entity\n");
5241 }
5242 SKIP(2);
5243
5244 /*
5245 * SAX: PI detected.
5246 */
5247 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5248 (ctxt->sax->processingInstruction != NULL))
5249 ctxt->sax->processingInstruction(ctxt->userData,
5250 target, NULL);
5251 if (ctxt->instate != XML_PARSER_EOF)
5252 ctxt->instate = state;
5253 return;
5254 }
5255 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5256 if (buf == NULL) {
5257 xmlErrMemory(ctxt, NULL);
5258 ctxt->instate = state;
5259 return;
5260 }
5261 cur = CUR;
5262 if (!IS_BLANK(cur)) {
5263 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5264 "ParsePI: PI %s space expected\n", target);
5265 }
5266 SKIP_BLANKS;
5267 cur = CUR_CHAR(l);
5268 while (IS_CHAR(cur) && /* checked */
5269 ((cur != '?') || (NXT(1) != '>'))) {
5270 if (len + 5 >= size) {
5271 xmlChar *tmp;
5272 size_t new_size = size * 2;
5273 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5274 if (tmp == NULL) {
5275 xmlErrMemory(ctxt, NULL);
5276 xmlFree(buf);
5277 ctxt->instate = state;
5278 return;
5279 }
5280 buf = tmp;
5281 size = new_size;
5282 }
5283 count++;
5284 if (count > 50) {
5285 GROW;
5286 if (ctxt->instate == XML_PARSER_EOF) {
5287 xmlFree(buf);
5288 return;
5289 }
5290 count = 0;
5291 if ((len > XML_MAX_TEXT_LENGTH) &&
5292 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5293 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5294 "PI %s too big found", target);
5295 xmlFree(buf);
5296 ctxt->instate = state;
5297 return;
5298 }
5299 }
5300 COPY_BUF(l,buf,len,cur);
5301 NEXTL(l);
5302 cur = CUR_CHAR(l);
5303 if (cur == 0) {
5304 SHRINK;
5305 GROW;
5306 cur = CUR_CHAR(l);
5307 }
5308 }
5309 if ((len > XML_MAX_TEXT_LENGTH) &&
5310 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5311 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5312 "PI %s too big found", target);
5313 xmlFree(buf);
5314 ctxt->instate = state;
5315 return;
5316 }
5317 buf[len] = 0;
5318 if (cur != '?') {
5319 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5320 "ParsePI: PI %s never end ...\n", target);
5321 } else {
5322 if (input != ctxt->input) {
5323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5324 "PI declaration doesn't start and stop in the same entity\n");
5325 }
5326 SKIP(2);
5327
5328 #ifdef LIBXML_CATALOG_ENABLED
5329 if (((state == XML_PARSER_MISC) ||
5330 (state == XML_PARSER_START)) &&
5331 (xmlStrEqual(target, XML_CATALOG_PI))) {
5332 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5333 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5334 (allow == XML_CATA_ALLOW_ALL))
5335 xmlParseCatalogPI(ctxt, buf);
5336 }
5337 #endif
5338
5339
5340 /*
5341 * SAX: PI detected.
5342 */
5343 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5344 (ctxt->sax->processingInstruction != NULL))
5345 ctxt->sax->processingInstruction(ctxt->userData,
5346 target, buf);
5347 }
5348 xmlFree(buf);
5349 } else {
5350 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5351 }
5352 if (ctxt->instate != XML_PARSER_EOF)
5353 ctxt->instate = state;
5354 }
5355 }
5356
5357 /**
5358 * xmlParseNotationDecl:
5359 * @ctxt: an XML parser context
5360 *
5361 * parse a notation declaration
5362 *
5363 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5364 *
5365 * Hence there is actually 3 choices:
5366 * 'PUBLIC' S PubidLiteral
5367 * 'PUBLIC' S PubidLiteral S SystemLiteral
5368 * and 'SYSTEM' S SystemLiteral
5369 *
5370 * See the NOTE on xmlParseExternalID().
5371 */
5372
5373 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5374 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5375 const xmlChar *name;
5376 xmlChar *Pubid;
5377 xmlChar *Systemid;
5378
5379 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5380 xmlParserInputPtr input = ctxt->input;
5381 SHRINK;
5382 SKIP(10);
5383 if (!IS_BLANK_CH(CUR)) {
5384 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385 "Space required after '<!NOTATION'\n");
5386 return;
5387 }
5388 SKIP_BLANKS;
5389
5390 name = xmlParseName(ctxt);
5391 if (name == NULL) {
5392 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5393 return;
5394 }
5395 if (!IS_BLANK_CH(CUR)) {
5396 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5397 "Space required after the NOTATION name'\n");
5398 return;
5399 }
5400 if (xmlStrchr(name, ':') != NULL) {
5401 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5402 "colons are forbidden from notation names '%s'\n",
5403 name, NULL, NULL);
5404 }
5405 SKIP_BLANKS;
5406
5407 /*
5408 * Parse the IDs.
5409 */
5410 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5411 SKIP_BLANKS;
5412
5413 if (RAW == '>') {
5414 if (input != ctxt->input) {
5415 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5416 "Notation declaration doesn't start and stop in the same entity\n");
5417 }
5418 NEXT;
5419 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5420 (ctxt->sax->notationDecl != NULL))
5421 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5422 } else {
5423 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5424 }
5425 if (Systemid != NULL) xmlFree(Systemid);
5426 if (Pubid != NULL) xmlFree(Pubid);
5427 }
5428 }
5429
5430 /**
5431 * xmlParseEntityDecl:
5432 * @ctxt: an XML parser context
5433 *
5434 * parse <!ENTITY declarations
5435 *
5436 * [70] EntityDecl ::= GEDecl | PEDecl
5437 *
5438 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5439 *
5440 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5441 *
5442 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5443 *
5444 * [74] PEDef ::= EntityValue | ExternalID
5445 *
5446 * [76] NDataDecl ::= S 'NDATA' S Name
5447 *
5448 * [ VC: Notation Declared ]
5449 * The Name must match the declared name of a notation.
5450 */
5451
5452 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5453 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5454 const xmlChar *name = NULL;
5455 xmlChar *value = NULL;
5456 xmlChar *URI = NULL, *literal = NULL;
5457 const xmlChar *ndata = NULL;
5458 int isParameter = 0;
5459 xmlChar *orig = NULL;
5460 int skipped;
5461
5462 /* GROW; done in the caller */
5463 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5464 xmlParserInputPtr input = ctxt->input;
5465 SHRINK;
5466 SKIP(8);
5467 skipped = SKIP_BLANKS;
5468 if (skipped == 0) {
5469 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5470 "Space required after '<!ENTITY'\n");
5471 }
5472
5473 if (RAW == '%') {
5474 NEXT;
5475 skipped = SKIP_BLANKS;
5476 if (skipped == 0) {
5477 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478 "Space required after '%'\n");
5479 }
5480 isParameter = 1;
5481 }
5482
5483 name = xmlParseName(ctxt);
5484 if (name == NULL) {
5485 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5486 "xmlParseEntityDecl: no name\n");
5487 return;
5488 }
5489 if (xmlStrchr(name, ':') != NULL) {
5490 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5491 "colons are forbidden from entities names '%s'\n",
5492 name, NULL, NULL);
5493 }
5494 skipped = SKIP_BLANKS;
5495 if (skipped == 0) {
5496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5497 "Space required after the entity name\n");
5498 }
5499
5500 ctxt->instate = XML_PARSER_ENTITY_DECL;
5501 /*
5502 * handle the various case of definitions...
5503 */
5504 if (isParameter) {
5505 if ((RAW == '"') || (RAW == '\'')) {
5506 value = xmlParseEntityValue(ctxt, &orig);
5507 if (value) {
5508 if ((ctxt->sax != NULL) &&
5509 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5510 ctxt->sax->entityDecl(ctxt->userData, name,
5511 XML_INTERNAL_PARAMETER_ENTITY,
5512 NULL, NULL, value);
5513 }
5514 } else {
5515 URI = xmlParseExternalID(ctxt, &literal, 1);
5516 if ((URI == NULL) && (literal == NULL)) {
5517 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5518 }
5519 if (URI) {
5520 xmlURIPtr uri;
5521
5522 uri = xmlParseURI((const char *) URI);
5523 if (uri == NULL) {
5524 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5525 "Invalid URI: %s\n", URI);
5526 /*
5527 * This really ought to be a well formedness error
5528 * but the XML Core WG decided otherwise c.f. issue
5529 * E26 of the XML erratas.
5530 */
5531 } else {
5532 if (uri->fragment != NULL) {
5533 /*
5534 * Okay this is foolish to block those but not
5535 * invalid URIs.
5536 */
5537 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5538 } else {
5539 if ((ctxt->sax != NULL) &&
5540 (!ctxt->disableSAX) &&
5541 (ctxt->sax->entityDecl != NULL))
5542 ctxt->sax->entityDecl(ctxt->userData, name,
5543 XML_EXTERNAL_PARAMETER_ENTITY,
5544 literal, URI, NULL);
5545 }
5546 xmlFreeURI(uri);
5547 }
5548 }
5549 }
5550 } else {
5551 if ((RAW == '"') || (RAW == '\'')) {
5552 value = xmlParseEntityValue(ctxt, &orig);
5553 if ((ctxt->sax != NULL) &&
5554 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5555 ctxt->sax->entityDecl(ctxt->userData, name,
5556 XML_INTERNAL_GENERAL_ENTITY,
5557 NULL, NULL, value);
5558 /*
5559 * For expat compatibility in SAX mode.
5560 */
5561 if ((ctxt->myDoc == NULL) ||
5562 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5563 if (ctxt->myDoc == NULL) {
5564 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5565 if (ctxt->myDoc == NULL) {
5566 xmlErrMemory(ctxt, "New Doc failed");
5567 return;
5568 }
5569 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5570 }
5571 if (ctxt->myDoc->intSubset == NULL)
5572 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5573 BAD_CAST "fake", NULL, NULL);
5574
5575 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5576 NULL, NULL, value);
5577 }
5578 } else {
5579 URI = xmlParseExternalID(ctxt, &literal, 1);
5580 if ((URI == NULL) && (literal == NULL)) {
5581 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5582 }
5583 if (URI) {
5584 xmlURIPtr uri;
5585
5586 uri = xmlParseURI((const char *)URI);
5587 if (uri == NULL) {
5588 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5589 "Invalid URI: %s\n", URI);
5590 /*
5591 * This really ought to be a well formedness error
5592 * but the XML Core WG decided otherwise c.f. issue
5593 * E26 of the XML erratas.
5594 */
5595 } else {
5596 if (uri->fragment != NULL) {
5597 /*
5598 * Okay this is foolish to block those but not
5599 * invalid URIs.
5600 */
5601 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5602 }
5603 xmlFreeURI(uri);
5604 }
5605 }
5606 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5607 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5608 "Space required before 'NDATA'\n");
5609 }
5610 SKIP_BLANKS;
5611 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5612 SKIP(5);
5613 if (!IS_BLANK_CH(CUR)) {
5614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5615 "Space required after 'NDATA'\n");
5616 }
5617 SKIP_BLANKS;
5618 ndata = xmlParseName(ctxt);
5619 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5620 (ctxt->sax->unparsedEntityDecl != NULL))
5621 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5622 literal, URI, ndata);
5623 } else {
5624 if ((ctxt->sax != NULL) &&
5625 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5626 ctxt->sax->entityDecl(ctxt->userData, name,
5627 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5628 literal, URI, NULL);
5629 /*
5630 * For expat compatibility in SAX mode.
5631 * assuming the entity repalcement was asked for
5632 */
5633 if ((ctxt->replaceEntities != 0) &&
5634 ((ctxt->myDoc == NULL) ||
5635 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5636 if (ctxt->myDoc == NULL) {
5637 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5638 if (ctxt->myDoc == NULL) {
5639 xmlErrMemory(ctxt, "New Doc failed");
5640 return;
5641 }
5642 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5643 }
5644
5645 if (ctxt->myDoc->intSubset == NULL)
5646 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5647 BAD_CAST "fake", NULL, NULL);
5648 xmlSAX2EntityDecl(ctxt, name,
5649 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5650 literal, URI, NULL);
5651 }
5652 }
5653 }
5654 }
5655 if (ctxt->instate == XML_PARSER_EOF)
5656 return;
5657 SKIP_BLANKS;
5658 if (RAW != '>') {
5659 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5660 "xmlParseEntityDecl: entity %s not terminated\n", name);
5661 } else {
5662 if (input != ctxt->input) {
5663 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5664 "Entity declaration doesn't start and stop in the same entity\n");
5665 }
5666 NEXT;
5667 }
5668 if (orig != NULL) {
5669 /*
5670 * Ugly mechanism to save the raw entity value.
5671 */
5672 xmlEntityPtr cur = NULL;
5673
5674 if (isParameter) {
5675 if ((ctxt->sax != NULL) &&
5676 (ctxt->sax->getParameterEntity != NULL))
5677 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5678 } else {
5679 if ((ctxt->sax != NULL) &&
5680 (ctxt->sax->getEntity != NULL))
5681 cur = ctxt->sax->getEntity(ctxt->userData, name);
5682 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5683 cur = xmlSAX2GetEntity(ctxt, name);
5684 }
5685 }
5686 if (cur != NULL) {
5687 if (cur->orig != NULL)
5688 xmlFree(orig);
5689 else
5690 cur->orig = orig;
5691 } else
5692 xmlFree(orig);
5693 }
5694 if (value != NULL) xmlFree(value);
5695 if (URI != NULL) xmlFree(URI);
5696 if (literal != NULL) xmlFree(literal);
5697 }
5698 }
5699
5700 /**
5701 * xmlParseDefaultDecl:
5702 * @ctxt: an XML parser context
5703 * @value: Receive a possible fixed default value for the attribute
5704 *
5705 * Parse an attribute default declaration
5706 *
5707 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5708 *
5709 * [ VC: Required Attribute ]
5710 * if the default declaration is the keyword #REQUIRED, then the
5711 * attribute must be specified for all elements of the type in the
5712 * attribute-list declaration.
5713 *
5714 * [ VC: Attribute Default Legal ]
5715 * The declared default value must meet the lexical constraints of
5716 * the declared attribute type c.f. xmlValidateAttributeDecl()
5717 *
5718 * [ VC: Fixed Attribute Default ]
5719 * if an attribute has a default value declared with the #FIXED
5720 * keyword, instances of that attribute must match the default value.
5721 *
5722 * [ WFC: No < in Attribute Values ]
5723 * handled in xmlParseAttValue()
5724 *
5725 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5726 * or XML_ATTRIBUTE_FIXED.
5727 */
5728
5729 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5730 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5731 int val;
5732 xmlChar *ret;
5733
5734 *value = NULL;
5735 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5736 SKIP(9);
5737 return(XML_ATTRIBUTE_REQUIRED);
5738 }
5739 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5740 SKIP(8);
5741 return(XML_ATTRIBUTE_IMPLIED);
5742 }
5743 val = XML_ATTRIBUTE_NONE;
5744 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5745 SKIP(6);
5746 val = XML_ATTRIBUTE_FIXED;
5747 if (!IS_BLANK_CH(CUR)) {
5748 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5749 "Space required after '#FIXED'\n");
5750 }
5751 SKIP_BLANKS;
5752 }
5753 ret = xmlParseAttValue(ctxt);
5754 ctxt->instate = XML_PARSER_DTD;
5755 if (ret == NULL) {
5756 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5757 "Attribute default value declaration error\n");
5758 } else
5759 *value = ret;
5760 return(val);
5761 }
5762
5763 /**
5764 * xmlParseNotationType:
5765 * @ctxt: an XML parser context
5766 *
5767 * parse an Notation attribute type.
5768 *
5769 * Note: the leading 'NOTATION' S part has already being parsed...
5770 *
5771 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5772 *
5773 * [ VC: Notation Attributes ]
5774 * Values of this type must match one of the notation names included
5775 * in the declaration; all notation names in the declaration must be declared.
5776 *
5777 * Returns: the notation attribute tree built while parsing
5778 */
5779
5780 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5781 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5782 const xmlChar *name;
5783 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5784
5785 if (RAW != '(') {
5786 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5787 return(NULL);
5788 }
5789 SHRINK;
5790 do {
5791 NEXT;
5792 SKIP_BLANKS;
5793 name = xmlParseName(ctxt);
5794 if (name == NULL) {
5795 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5796 "Name expected in NOTATION declaration\n");
5797 xmlFreeEnumeration(ret);
5798 return(NULL);
5799 }
5800 tmp = ret;
5801 while (tmp != NULL) {
5802 if (xmlStrEqual(name, tmp->name)) {
5803 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5804 "standalone: attribute notation value token %s duplicated\n",
5805 name, NULL);
5806 if (!xmlDictOwns(ctxt->dict, name))
5807 xmlFree((xmlChar *) name);
5808 break;
5809 }
5810 tmp = tmp->next;
5811 }
5812 if (tmp == NULL) {
5813 cur = xmlCreateEnumeration(name);
5814 if (cur == NULL) {
5815 xmlFreeEnumeration(ret);
5816 return(NULL);
5817 }
5818 if (last == NULL) ret = last = cur;
5819 else {
5820 last->next = cur;
5821 last = cur;
5822 }
5823 }
5824 SKIP_BLANKS;
5825 } while (RAW == '|');
5826 if (RAW != ')') {
5827 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5828 xmlFreeEnumeration(ret);
5829 return(NULL);
5830 }
5831 NEXT;
5832 return(ret);
5833 }
5834
5835 /**
5836 * xmlParseEnumerationType:
5837 * @ctxt: an XML parser context
5838 *
5839 * parse an Enumeration attribute type.
5840 *
5841 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5842 *
5843 * [ VC: Enumeration ]
5844 * Values of this type must match one of the Nmtoken tokens in
5845 * the declaration
5846 *
5847 * Returns: the enumeration attribute tree built while parsing
5848 */
5849
5850 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5851 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5852 xmlChar *name;
5853 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5854
5855 if (RAW != '(') {
5856 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5857 return(NULL);
5858 }
5859 SHRINK;
5860 do {
5861 NEXT;
5862 SKIP_BLANKS;
5863 name = xmlParseNmtoken(ctxt);
5864 if (name == NULL) {
5865 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5866 return(ret);
5867 }
5868 tmp = ret;
5869 while (tmp != NULL) {
5870 if (xmlStrEqual(name, tmp->name)) {
5871 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5872 "standalone: attribute enumeration value token %s duplicated\n",
5873 name, NULL);
5874 if (!xmlDictOwns(ctxt->dict, name))
5875 xmlFree(name);
5876 break;
5877 }
5878 tmp = tmp->next;
5879 }
5880 if (tmp == NULL) {
5881 cur = xmlCreateEnumeration(name);
5882 if (!xmlDictOwns(ctxt->dict, name))
5883 xmlFree(name);
5884 if (cur == NULL) {
5885 xmlFreeEnumeration(ret);
5886 return(NULL);
5887 }
5888 if (last == NULL) ret = last = cur;
5889 else {
5890 last->next = cur;
5891 last = cur;
5892 }
5893 }
5894 SKIP_BLANKS;
5895 } while (RAW == '|');
5896 if (RAW != ')') {
5897 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5898 return(ret);
5899 }
5900 NEXT;
5901 return(ret);
5902 }
5903
5904 /**
5905 * xmlParseEnumeratedType:
5906 * @ctxt: an XML parser context
5907 * @tree: the enumeration tree built while parsing
5908 *
5909 * parse an Enumerated attribute type.
5910 *
5911 * [57] EnumeratedType ::= NotationType | Enumeration
5912 *
5913 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5914 *
5915 *
5916 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5917 */
5918
5919 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5920 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5921 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5922 SKIP(8);
5923 if (!IS_BLANK_CH(CUR)) {
5924 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5925 "Space required after 'NOTATION'\n");
5926 return(0);
5927 }
5928 SKIP_BLANKS;
5929 *tree = xmlParseNotationType(ctxt);
5930 if (*tree == NULL) return(0);
5931 return(XML_ATTRIBUTE_NOTATION);
5932 }
5933 *tree = xmlParseEnumerationType(ctxt);
5934 if (*tree == NULL) return(0);
5935 return(XML_ATTRIBUTE_ENUMERATION);
5936 }
5937
5938 /**
5939 * xmlParseAttributeType:
5940 * @ctxt: an XML parser context
5941 * @tree: the enumeration tree built while parsing
5942 *
5943 * parse the Attribute list def for an element
5944 *
5945 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5946 *
5947 * [55] StringType ::= 'CDATA'
5948 *
5949 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5950 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5951 *
5952 * Validity constraints for attribute values syntax are checked in
5953 * xmlValidateAttributeValue()
5954 *
5955 * [ VC: ID ]
5956 * Values of type ID must match the Name production. A name must not
5957 * appear more than once in an XML document as a value of this type;
5958 * i.e., ID values must uniquely identify the elements which bear them.
5959 *
5960 * [ VC: One ID per Element Type ]
5961 * No element type may have more than one ID attribute specified.
5962 *
5963 * [ VC: ID Attribute Default ]
5964 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5965 *
5966 * [ VC: IDREF ]
5967 * Values of type IDREF must match the Name production, and values
5968 * of type IDREFS must match Names; each IDREF Name must match the value
5969 * of an ID attribute on some element in the XML document; i.e. IDREF
5970 * values must match the value of some ID attribute.
5971 *
5972 * [ VC: Entity Name ]
5973 * Values of type ENTITY must match the Name production, values
5974 * of type ENTITIES must match Names; each Entity Name must match the
5975 * name of an unparsed entity declared in the DTD.
5976 *
5977 * [ VC: Name Token ]
5978 * Values of type NMTOKEN must match the Nmtoken production; values
5979 * of type NMTOKENS must match Nmtokens.
5980 *
5981 * Returns the attribute type
5982 */
5983 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5984 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5985 SHRINK;
5986 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5987 SKIP(5);
5988 return(XML_ATTRIBUTE_CDATA);
5989 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5990 SKIP(6);
5991 return(XML_ATTRIBUTE_IDREFS);
5992 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5993 SKIP(5);
5994 return(XML_ATTRIBUTE_IDREF);
5995 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5996 SKIP(2);
5997 return(XML_ATTRIBUTE_ID);
5998 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5999 SKIP(6);
6000 return(XML_ATTRIBUTE_ENTITY);
6001 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6002 SKIP(8);
6003 return(XML_ATTRIBUTE_ENTITIES);
6004 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6005 SKIP(8);
6006 return(XML_ATTRIBUTE_NMTOKENS);
6007 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6008 SKIP(7);
6009 return(XML_ATTRIBUTE_NMTOKEN);
6010 }
6011 return(xmlParseEnumeratedType(ctxt, tree));
6012 }
6013
6014 /**
6015 * xmlParseAttributeListDecl:
6016 * @ctxt: an XML parser context
6017 *
6018 * : parse the Attribute list def for an element
6019 *
6020 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6021 *
6022 * [53] AttDef ::= S Name S AttType S DefaultDecl
6023 *
6024 */
6025 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6026 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6027 const xmlChar *elemName;
6028 const xmlChar *attrName;
6029 xmlEnumerationPtr tree;
6030
6031 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6032 xmlParserInputPtr input = ctxt->input;
6033
6034 SKIP(9);
6035 if (!IS_BLANK_CH(CUR)) {
6036 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6037 "Space required after '<!ATTLIST'\n");
6038 }
6039 SKIP_BLANKS;
6040 elemName = xmlParseName(ctxt);
6041 if (elemName == NULL) {
6042 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6043 "ATTLIST: no name for Element\n");
6044 return;
6045 }
6046 SKIP_BLANKS;
6047 GROW;
6048 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6049 const xmlChar *check = CUR_PTR;
6050 int type;
6051 int def;
6052 xmlChar *defaultValue = NULL;
6053
6054 GROW;
6055 tree = NULL;
6056 attrName = xmlParseName(ctxt);
6057 if (attrName == NULL) {
6058 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6059 "ATTLIST: no name for Attribute\n");
6060 break;
6061 }
6062 GROW;
6063 if (!IS_BLANK_CH(CUR)) {
6064 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6065 "Space required after the attribute name\n");
6066 break;
6067 }
6068 SKIP_BLANKS;
6069
6070 type = xmlParseAttributeType(ctxt, &tree);
6071 if (type <= 0) {
6072 break;
6073 }
6074
6075 GROW;
6076 if (!IS_BLANK_CH(CUR)) {
6077 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6078 "Space required after the attribute type\n");
6079 if (tree != NULL)
6080 xmlFreeEnumeration(tree);
6081 break;
6082 }
6083 SKIP_BLANKS;
6084
6085 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6086 if (def <= 0) {
6087 if (defaultValue != NULL)
6088 xmlFree(defaultValue);
6089 if (tree != NULL)
6090 xmlFreeEnumeration(tree);
6091 break;
6092 }
6093 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6094 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6095
6096 GROW;
6097 if (RAW != '>') {
6098 if (!IS_BLANK_CH(CUR)) {
6099 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100 "Space required after the attribute default value\n");
6101 if (defaultValue != NULL)
6102 xmlFree(defaultValue);
6103 if (tree != NULL)
6104 xmlFreeEnumeration(tree);
6105 break;
6106 }
6107 SKIP_BLANKS;
6108 }
6109 if (check == CUR_PTR) {
6110 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6111 "in xmlParseAttributeListDecl\n");
6112 if (defaultValue != NULL)
6113 xmlFree(defaultValue);
6114 if (tree != NULL)
6115 xmlFreeEnumeration(tree);
6116 break;
6117 }
6118 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6119 (ctxt->sax->attributeDecl != NULL))
6120 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6121 type, def, defaultValue, tree);
6122 else if (tree != NULL)
6123 xmlFreeEnumeration(tree);
6124
6125 if ((ctxt->sax2) && (defaultValue != NULL) &&
6126 (def != XML_ATTRIBUTE_IMPLIED) &&
6127 (def != XML_ATTRIBUTE_REQUIRED)) {
6128 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6129 }
6130 if (ctxt->sax2) {
6131 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6132 }
6133 if (defaultValue != NULL)
6134 xmlFree(defaultValue);
6135 GROW;
6136 }
6137 if (RAW == '>') {
6138 if (input != ctxt->input) {
6139 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6140 "Attribute list declaration doesn't start and stop in the same entity\n",
6141 NULL, NULL);
6142 }
6143 NEXT;
6144 }
6145 }
6146 }
6147
6148 /**
6149 * xmlParseElementMixedContentDecl:
6150 * @ctxt: an XML parser context
6151 * @inputchk: the input used for the current entity, needed for boundary checks
6152 *
6153 * parse the declaration for a Mixed Element content
6154 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6155 *
6156 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6157 * '(' S? '#PCDATA' S? ')'
6158 *
6159 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6160 *
6161 * [ VC: No Duplicate Types ]
6162 * The same name must not appear more than once in a single
6163 * mixed-content declaration.
6164 *
6165 * returns: the list of the xmlElementContentPtr describing the element choices
6166 */
6167 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6168 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6169 xmlElementContentPtr ret = NULL, cur = NULL, n;
6170 const xmlChar *elem = NULL;
6171
6172 GROW;
6173 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6174 SKIP(7);
6175 SKIP_BLANKS;
6176 SHRINK;
6177 if (RAW == ')') {
6178 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6179 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6180 "Element content declaration doesn't start and stop in the same entity\n",
6181 NULL, NULL);
6182 }
6183 NEXT;
6184 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6185 if (ret == NULL)
6186 return(NULL);
6187 if (RAW == '*') {
6188 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6189 NEXT;
6190 }
6191 return(ret);
6192 }
6193 if ((RAW == '(') || (RAW == '|')) {
6194 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6195 if (ret == NULL) return(NULL);
6196 }
6197 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6198 NEXT;
6199 if (elem == NULL) {
6200 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6201 if (ret == NULL) return(NULL);
6202 ret->c1 = cur;
6203 if (cur != NULL)
6204 cur->parent = ret;
6205 cur = ret;
6206 } else {
6207 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6208 if (n == NULL) return(NULL);
6209 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6210 if (n->c1 != NULL)
6211 n->c1->parent = n;
6212 cur->c2 = n;
6213 if (n != NULL)
6214 n->parent = cur;
6215 cur = n;
6216 }
6217 SKIP_BLANKS;
6218 elem = xmlParseName(ctxt);
6219 if (elem == NULL) {
6220 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6221 "xmlParseElementMixedContentDecl : Name expected\n");
6222 xmlFreeDocElementContent(ctxt->myDoc, cur);
6223 return(NULL);
6224 }
6225 SKIP_BLANKS;
6226 GROW;
6227 }
6228 if ((RAW == ')') && (NXT(1) == '*')) {
6229 if (elem != NULL) {
6230 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6231 XML_ELEMENT_CONTENT_ELEMENT);
6232 if (cur->c2 != NULL)
6233 cur->c2->parent = cur;
6234 }
6235 if (ret != NULL)
6236 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6237 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6238 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6239 "Element content declaration doesn't start and stop in the same entity\n",
6240 NULL, NULL);
6241 }
6242 SKIP(2);
6243 } else {
6244 xmlFreeDocElementContent(ctxt->myDoc, ret);
6245 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6246 return(NULL);
6247 }
6248
6249 } else {
6250 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6251 }
6252 return(ret);
6253 }
6254
6255 /**
6256 * xmlParseElementChildrenContentDeclPriv:
6257 * @ctxt: an XML parser context
6258 * @inputchk: the input used for the current entity, needed for boundary checks
6259 * @depth: the level of recursion
6260 *
6261 * parse the declaration for a Mixed Element content
6262 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6263 *
6264 *
6265 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6266 *
6267 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6268 *
6269 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6270 *
6271 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6272 *
6273 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6274 * TODO Parameter-entity replacement text must be properly nested
6275 * with parenthesized groups. That is to say, if either of the
6276 * opening or closing parentheses in a choice, seq, or Mixed
6277 * construct is contained in the replacement text for a parameter
6278 * entity, both must be contained in the same replacement text. For
6279 * interoperability, if a parameter-entity reference appears in a
6280 * choice, seq, or Mixed construct, its replacement text should not
6281 * be empty, and neither the first nor last non-blank character of
6282 * the replacement text should be a connector (| or ,).
6283 *
6284 * Returns the tree of xmlElementContentPtr describing the element
6285 * hierarchy.
6286 */
6287 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6288 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6289 int depth) {
6290 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6291 const xmlChar *elem;
6292 xmlChar type = 0;
6293
6294 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6295 (depth > 2048)) {
6296 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6297 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6298 depth);
6299 return(NULL);
6300 }
6301 SKIP_BLANKS;
6302 GROW;
6303 if (RAW == '(') {
6304 int inputid = ctxt->input->id;
6305
6306 /* Recurse on first child */
6307 NEXT;
6308 SKIP_BLANKS;
6309 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6310 depth + 1);
6311 SKIP_BLANKS;
6312 GROW;
6313 } else {
6314 elem = xmlParseName(ctxt);
6315 if (elem == NULL) {
6316 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6317 return(NULL);
6318 }
6319 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6320 if (cur == NULL) {
6321 xmlErrMemory(ctxt, NULL);
6322 return(NULL);
6323 }
6324 GROW;
6325 if (RAW == '?') {
6326 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6327 NEXT;
6328 } else if (RAW == '*') {
6329 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6330 NEXT;
6331 } else if (RAW == '+') {
6332 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6333 NEXT;
6334 } else {
6335 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6336 }
6337 GROW;
6338 }
6339 SKIP_BLANKS;
6340 SHRINK;
6341 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6342 /*
6343 * Each loop we parse one separator and one element.
6344 */
6345 if (RAW == ',') {
6346 if (type == 0) type = CUR;
6347
6348 /*
6349 * Detect "Name | Name , Name" error
6350 */
6351 else if (type != CUR) {
6352 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6353 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6354 type);
6355 if ((last != NULL) && (last != ret))
6356 xmlFreeDocElementContent(ctxt->myDoc, last);
6357 if (ret != NULL)
6358 xmlFreeDocElementContent(ctxt->myDoc, ret);
6359 return(NULL);
6360 }
6361 NEXT;
6362
6363 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6364 if (op == NULL) {
6365 if ((last != NULL) && (last != ret))
6366 xmlFreeDocElementContent(ctxt->myDoc, last);
6367 xmlFreeDocElementContent(ctxt->myDoc, ret);
6368 return(NULL);
6369 }
6370 if (last == NULL) {
6371 op->c1 = ret;
6372 if (ret != NULL)
6373 ret->parent = op;
6374 ret = cur = op;
6375 } else {
6376 cur->c2 = op;
6377 if (op != NULL)
6378 op->parent = cur;
6379 op->c1 = last;
6380 if (last != NULL)
6381 last->parent = op;
6382 cur =op;
6383 last = NULL;
6384 }
6385 } else if (RAW == '|') {
6386 if (type == 0) type = CUR;
6387
6388 /*
6389 * Detect "Name , Name | Name" error
6390 */
6391 else if (type != CUR) {
6392 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6393 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6394 type);
6395 if ((last != NULL) && (last != ret))
6396 xmlFreeDocElementContent(ctxt->myDoc, last);
6397 if (ret != NULL)
6398 xmlFreeDocElementContent(ctxt->myDoc, ret);
6399 return(NULL);
6400 }
6401 NEXT;
6402
6403 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6404 if (op == NULL) {
6405 if ((last != NULL) && (last != ret))
6406 xmlFreeDocElementContent(ctxt->myDoc, last);
6407 if (ret != NULL)
6408 xmlFreeDocElementContent(ctxt->myDoc, ret);
6409 return(NULL);
6410 }
6411 if (last == NULL) {
6412 op->c1 = ret;
6413 if (ret != NULL)
6414 ret->parent = op;
6415 ret = cur = op;
6416 } else {
6417 cur->c2 = op;
6418 if (op != NULL)
6419 op->parent = cur;
6420 op->c1 = last;
6421 if (last != NULL)
6422 last->parent = op;
6423 cur =op;
6424 last = NULL;
6425 }
6426 } else {
6427 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6428 if ((last != NULL) && (last != ret))
6429 xmlFreeDocElementContent(ctxt->myDoc, last);
6430 if (ret != NULL)
6431 xmlFreeDocElementContent(ctxt->myDoc, ret);
6432 return(NULL);
6433 }
6434 GROW;
6435 SKIP_BLANKS;
6436 GROW;
6437 if (RAW == '(') {
6438 int inputid = ctxt->input->id;
6439 /* Recurse on second child */
6440 NEXT;
6441 SKIP_BLANKS;
6442 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6443 depth + 1);
6444 SKIP_BLANKS;
6445 } else {
6446 elem = xmlParseName(ctxt);
6447 if (elem == NULL) {
6448 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6449 if (ret != NULL)
6450 xmlFreeDocElementContent(ctxt->myDoc, ret);
6451 return(NULL);
6452 }
6453 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6454 if (last == NULL) {
6455 if (ret != NULL)
6456 xmlFreeDocElementContent(ctxt->myDoc, ret);
6457 return(NULL);
6458 }
6459 if (RAW == '?') {
6460 last->ocur = XML_ELEMENT_CONTENT_OPT;
6461 NEXT;
6462 } else if (RAW == '*') {
6463 last->ocur = XML_ELEMENT_CONTENT_MULT;
6464 NEXT;
6465 } else if (RAW == '+') {
6466 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6467 NEXT;
6468 } else {
6469 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6470 }
6471 }
6472 SKIP_BLANKS;
6473 GROW;
6474 }
6475 if ((cur != NULL) && (last != NULL)) {
6476 cur->c2 = last;
6477 if (last != NULL)
6478 last->parent = cur;
6479 }
6480 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6481 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6482 "Element content declaration doesn't start and stop in the same entity\n",
6483 NULL, NULL);
6484 }
6485 NEXT;
6486 if (RAW == '?') {
6487 if (ret != NULL) {
6488 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6489 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6490 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6491 else
6492 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6493 }
6494 NEXT;
6495 } else if (RAW == '*') {
6496 if (ret != NULL) {
6497 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6498 cur = ret;
6499 /*
6500 * Some normalization:
6501 * (a | b* | c?)* == (a | b | c)*
6502 */
6503 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6504 if ((cur->c1 != NULL) &&
6505 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6506 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6507 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6508 if ((cur->c2 != NULL) &&
6509 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6510 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6511 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6512 cur = cur->c2;
6513 }
6514 }
6515 NEXT;
6516 } else if (RAW == '+') {
6517 if (ret != NULL) {
6518 int found = 0;
6519
6520 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6521 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6522 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6523 else
6524 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6525 /*
6526 * Some normalization:
6527 * (a | b*)+ == (a | b)*
6528 * (a | b?)+ == (a | b)*
6529 */
6530 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6531 if ((cur->c1 != NULL) &&
6532 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6533 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6534 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6535 found = 1;
6536 }
6537 if ((cur->c2 != NULL) &&
6538 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6539 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6540 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6541 found = 1;
6542 }
6543 cur = cur->c2;
6544 }
6545 if (found)
6546 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6547 }
6548 NEXT;
6549 }
6550 return(ret);
6551 }
6552
6553 /**
6554 * xmlParseElementChildrenContentDecl:
6555 * @ctxt: an XML parser context
6556 * @inputchk: the input used for the current entity, needed for boundary checks
6557 *
6558 * parse the declaration for a Mixed Element content
6559 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6560 *
6561 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6562 *
6563 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6564 *
6565 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6566 *
6567 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6568 *
6569 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6570 * TODO Parameter-entity replacement text must be properly nested
6571 * with parenthesized groups. That is to say, if either of the
6572 * opening or closing parentheses in a choice, seq, or Mixed
6573 * construct is contained in the replacement text for a parameter
6574 * entity, both must be contained in the same replacement text. For
6575 * interoperability, if a parameter-entity reference appears in a
6576 * choice, seq, or Mixed construct, its replacement text should not
6577 * be empty, and neither the first nor last non-blank character of
6578 * the replacement text should be a connector (| or ,).
6579 *
6580 * Returns the tree of xmlElementContentPtr describing the element
6581 * hierarchy.
6582 */
6583 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6584 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6585 /* stub left for API/ABI compat */
6586 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6587 }
6588
6589 /**
6590 * xmlParseElementContentDecl:
6591 * @ctxt: an XML parser context
6592 * @name: the name of the element being defined.
6593 * @result: the Element Content pointer will be stored here if any
6594 *
6595 * parse the declaration for an Element content either Mixed or Children,
6596 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6597 *
6598 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6599 *
6600 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6601 */
6602
6603 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6604 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6605 xmlElementContentPtr *result) {
6606
6607 xmlElementContentPtr tree = NULL;
6608 int inputid = ctxt->input->id;
6609 int res;
6610
6611 *result = NULL;
6612
6613 if (RAW != '(') {
6614 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6615 "xmlParseElementContentDecl : %s '(' expected\n", name);
6616 return(-1);
6617 }
6618 NEXT;
6619 GROW;
6620 if (ctxt->instate == XML_PARSER_EOF)
6621 return(-1);
6622 SKIP_BLANKS;
6623 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6624 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6625 res = XML_ELEMENT_TYPE_MIXED;
6626 } else {
6627 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6628 res = XML_ELEMENT_TYPE_ELEMENT;
6629 }
6630 SKIP_BLANKS;
6631 *result = tree;
6632 return(res);
6633 }
6634
6635 /**
6636 * xmlParseElementDecl:
6637 * @ctxt: an XML parser context
6638 *
6639 * parse an Element declaration.
6640 *
6641 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6642 *
6643 * [ VC: Unique Element Type Declaration ]
6644 * No element type may be declared more than once
6645 *
6646 * Returns the type of the element, or -1 in case of error
6647 */
6648 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6649 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6650 const xmlChar *name;
6651 int ret = -1;
6652 xmlElementContentPtr content = NULL;
6653
6654 /* GROW; done in the caller */
6655 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6656 xmlParserInputPtr input = ctxt->input;
6657
6658 SKIP(9);
6659 if (!IS_BLANK_CH(CUR)) {
6660 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6661 "Space required after 'ELEMENT'\n");
6662 }
6663 SKIP_BLANKS;
6664 name = xmlParseName(ctxt);
6665 if (name == NULL) {
6666 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6667 "xmlParseElementDecl: no name for Element\n");
6668 return(-1);
6669 }
6670 while ((RAW == 0) && (ctxt->inputNr > 1))
6671 xmlPopInput(ctxt);
6672 if (!IS_BLANK_CH(CUR)) {
6673 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6674 "Space required after the element name\n");
6675 }
6676 SKIP_BLANKS;
6677 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6678 SKIP(5);
6679 /*
6680 * Element must always be empty.
6681 */
6682 ret = XML_ELEMENT_TYPE_EMPTY;
6683 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6684 (NXT(2) == 'Y')) {
6685 SKIP(3);
6686 /*
6687 * Element is a generic container.
6688 */
6689 ret = XML_ELEMENT_TYPE_ANY;
6690 } else if (RAW == '(') {
6691 ret = xmlParseElementContentDecl(ctxt, name, &content);
6692 } else {
6693 /*
6694 * [ WFC: PEs in Internal Subset ] error handling.
6695 */
6696 if ((RAW == '%') && (ctxt->external == 0) &&
6697 (ctxt->inputNr == 1)) {
6698 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6699 "PEReference: forbidden within markup decl in internal subset\n");
6700 } else {
6701 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6702 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6703 }
6704 return(-1);
6705 }
6706
6707 SKIP_BLANKS;
6708 /*
6709 * Pop-up of finished entities.
6710 */
6711 while ((RAW == 0) && (ctxt->inputNr > 1))
6712 xmlPopInput(ctxt);
6713 SKIP_BLANKS;
6714
6715 if (RAW != '>') {
6716 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6717 if (content != NULL) {
6718 xmlFreeDocElementContent(ctxt->myDoc, content);
6719 }
6720 } else {
6721 if (input != ctxt->input) {
6722 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6723 "Element declaration doesn't start and stop in the same entity\n");
6724 }
6725
6726 NEXT;
6727 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6728 (ctxt->sax->elementDecl != NULL)) {
6729 if (content != NULL)
6730 content->parent = NULL;
6731 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6732 content);
6733 if ((content != NULL) && (content->parent == NULL)) {
6734 /*
6735 * this is a trick: if xmlAddElementDecl is called,
6736 * instead of copying the full tree it is plugged directly
6737 * if called from the parser. Avoid duplicating the
6738 * interfaces or change the API/ABI
6739 */
6740 xmlFreeDocElementContent(ctxt->myDoc, content);
6741 }
6742 } else if (content != NULL) {
6743 xmlFreeDocElementContent(ctxt->myDoc, content);
6744 }
6745 }
6746 }
6747 return(ret);
6748 }
6749
6750 /**
6751 * xmlParseConditionalSections
6752 * @ctxt: an XML parser context
6753 *
6754 * [61] conditionalSect ::= includeSect | ignoreSect
6755 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6756 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6757 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6758 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6759 */
6760
6761 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6762 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6763 int id = ctxt->input->id;
6764
6765 SKIP(3);
6766 SKIP_BLANKS;
6767 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6768 SKIP(7);
6769 SKIP_BLANKS;
6770 if (RAW != '[') {
6771 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6772 } else {
6773 if (ctxt->input->id != id) {
6774 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6775 "All markup of the conditional section is not in the same entity\n",
6776 NULL, NULL);
6777 }
6778 NEXT;
6779 }
6780 if (xmlParserDebugEntities) {
6781 if ((ctxt->input != NULL) && (ctxt->input->filename))
6782 xmlGenericError(xmlGenericErrorContext,
6783 "%s(%d): ", ctxt->input->filename,
6784 ctxt->input->line);
6785 xmlGenericError(xmlGenericErrorContext,
6786 "Entering INCLUDE Conditional Section\n");
6787 }
6788
6789 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6790 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6791 const xmlChar *check = CUR_PTR;
6792 unsigned int cons = ctxt->input->consumed;
6793
6794 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6795 xmlParseConditionalSections(ctxt);
6796 } else if (IS_BLANK_CH(CUR)) {
6797 NEXT;
6798 } else if (RAW == '%') {
6799 xmlParsePEReference(ctxt);
6800 } else
6801 xmlParseMarkupDecl(ctxt);
6802
6803 /*
6804 * Pop-up of finished entities.
6805 */
6806 while ((RAW == 0) && (ctxt->inputNr > 1))
6807 xmlPopInput(ctxt);
6808
6809 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6810 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6811 break;
6812 }
6813 }
6814 if (xmlParserDebugEntities) {
6815 if ((ctxt->input != NULL) && (ctxt->input->filename))
6816 xmlGenericError(xmlGenericErrorContext,
6817 "%s(%d): ", ctxt->input->filename,
6818 ctxt->input->line);
6819 xmlGenericError(xmlGenericErrorContext,
6820 "Leaving INCLUDE Conditional Section\n");
6821 }
6822
6823 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6824 int state;
6825 xmlParserInputState instate;
6826 int depth = 0;
6827
6828 SKIP(6);
6829 SKIP_BLANKS;
6830 if (RAW != '[') {
6831 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6832 } else {
6833 if (ctxt->input->id != id) {
6834 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6835 "All markup of the conditional section is not in the same entity\n",
6836 NULL, NULL);
6837 }
6838 NEXT;
6839 }
6840 if (xmlParserDebugEntities) {
6841 if ((ctxt->input != NULL) && (ctxt->input->filename))
6842 xmlGenericError(xmlGenericErrorContext,
6843 "%s(%d): ", ctxt->input->filename,
6844 ctxt->input->line);
6845 xmlGenericError(xmlGenericErrorContext,
6846 "Entering IGNORE Conditional Section\n");
6847 }
6848
6849 /*
6850 * Parse up to the end of the conditional section
6851 * But disable SAX event generating DTD building in the meantime
6852 */
6853 state = ctxt->disableSAX;
6854 instate = ctxt->instate;
6855 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6856 ctxt->instate = XML_PARSER_IGNORE;
6857
6858 while (((depth >= 0) && (RAW != 0)) &&
6859 (ctxt->instate != XML_PARSER_EOF)) {
6860 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6861 depth++;
6862 SKIP(3);
6863 continue;
6864 }
6865 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6866 if (--depth >= 0) SKIP(3);
6867 continue;
6868 }
6869 NEXT;
6870 continue;
6871 }
6872
6873 ctxt->disableSAX = state;
6874 ctxt->instate = instate;
6875
6876 if (xmlParserDebugEntities) {
6877 if ((ctxt->input != NULL) && (ctxt->input->filename))
6878 xmlGenericError(xmlGenericErrorContext,
6879 "%s(%d): ", ctxt->input->filename,
6880 ctxt->input->line);
6881 xmlGenericError(xmlGenericErrorContext,
6882 "Leaving IGNORE Conditional Section\n");
6883 }
6884
6885 } else {
6886 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6887 }
6888
6889 if (RAW == 0)
6890 SHRINK;
6891
6892 if (RAW == 0) {
6893 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6894 } else {
6895 if (ctxt->input->id != id) {
6896 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6897 "All markup of the conditional section is not in the same entity\n",
6898 NULL, NULL);
6899 }
6900 SKIP(3);
6901 }
6902 }
6903
6904 /**
6905 * xmlParseMarkupDecl:
6906 * @ctxt: an XML parser context
6907 *
6908 * parse Markup declarations
6909 *
6910 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6911 * NotationDecl | PI | Comment
6912 *
6913 * [ VC: Proper Declaration/PE Nesting ]
6914 * Parameter-entity replacement text must be properly nested with
6915 * markup declarations. That is to say, if either the first character
6916 * or the last character of a markup declaration (markupdecl above) is
6917 * contained in the replacement text for a parameter-entity reference,
6918 * both must be contained in the same replacement text.
6919 *
6920 * [ WFC: PEs in Internal Subset ]
6921 * In the internal DTD subset, parameter-entity references can occur
6922 * only where markup declarations can occur, not within markup declarations.
6923 * (This does not apply to references that occur in external parameter
6924 * entities or to the external subset.)
6925 */
6926 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6927 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6928 GROW;
6929 if (CUR == '<') {
6930 if (NXT(1) == '!') {
6931 switch (NXT(2)) {
6932 case 'E':
6933 if (NXT(3) == 'L')
6934 xmlParseElementDecl(ctxt);
6935 else if (NXT(3) == 'N')
6936 xmlParseEntityDecl(ctxt);
6937 break;
6938 case 'A':
6939 xmlParseAttributeListDecl(ctxt);
6940 break;
6941 case 'N':
6942 xmlParseNotationDecl(ctxt);
6943 break;
6944 case '-':
6945 xmlParseComment(ctxt);
6946 break;
6947 default:
6948 /* there is an error but it will be detected later */
6949 break;
6950 }
6951 } else if (NXT(1) == '?') {
6952 xmlParsePI(ctxt);
6953 }
6954 }
6955 /*
6956 * This is only for internal subset. On external entities,
6957 * the replacement is done before parsing stage
6958 */
6959 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6960 xmlParsePEReference(ctxt);
6961
6962 /*
6963 * Conditional sections are allowed from entities included
6964 * by PE References in the internal subset.
6965 */
6966 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6967 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6968 xmlParseConditionalSections(ctxt);
6969 }
6970 }
6971
6972 ctxt->instate = XML_PARSER_DTD;
6973 }
6974
6975 /**
6976 * xmlParseTextDecl:
6977 * @ctxt: an XML parser context
6978 *
6979 * parse an XML declaration header for external entities
6980 *
6981 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6982 */
6983
6984 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6985 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6986 xmlChar *version;
6987 const xmlChar *encoding;
6988
6989 /*
6990 * We know that '<?xml' is here.
6991 */
6992 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6993 SKIP(5);
6994 } else {
6995 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6996 return;
6997 }
6998
6999 if (!IS_BLANK_CH(CUR)) {
7000 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7001 "Space needed after '<?xml'\n");
7002 }
7003 SKIP_BLANKS;
7004
7005 /*
7006 * We may have the VersionInfo here.
7007 */
7008 version = xmlParseVersionInfo(ctxt);
7009 if (version == NULL)
7010 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7011 else {
7012 if (!IS_BLANK_CH(CUR)) {
7013 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7014 "Space needed here\n");
7015 }
7016 }
7017 ctxt->input->version = version;
7018
7019 /*
7020 * We must have the encoding declaration
7021 */
7022 encoding = xmlParseEncodingDecl(ctxt);
7023 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7024 /*
7025 * The XML REC instructs us to stop parsing right here
7026 */
7027 return;
7028 }
7029 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7030 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7031 "Missing encoding in text declaration\n");
7032 }
7033
7034 SKIP_BLANKS;
7035 if ((RAW == '?') && (NXT(1) == '>')) {
7036 SKIP(2);
7037 } else if (RAW == '>') {
7038 /* Deprecated old WD ... */
7039 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7040 NEXT;
7041 } else {
7042 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7043 MOVETO_ENDTAG(CUR_PTR);
7044 NEXT;
7045 }
7046 }
7047
7048 /**
7049 * xmlParseExternalSubset:
7050 * @ctxt: an XML parser context
7051 * @ExternalID: the external identifier
7052 * @SystemID: the system identifier (or URL)
7053 *
7054 * parse Markup declarations from an external subset
7055 *
7056 * [30] extSubset ::= textDecl? extSubsetDecl
7057 *
7058 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7059 */
7060 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7061 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7062 const xmlChar *SystemID) {
7063 xmlDetectSAX2(ctxt);
7064 GROW;
7065
7066 if ((ctxt->encoding == NULL) &&
7067 (ctxt->input->end - ctxt->input->cur >= 4)) {
7068 xmlChar start[4];
7069 xmlCharEncoding enc;
7070
7071 start[0] = RAW;
7072 start[1] = NXT(1);
7073 start[2] = NXT(2);
7074 start[3] = NXT(3);
7075 enc = xmlDetectCharEncoding(start, 4);
7076 if (enc != XML_CHAR_ENCODING_NONE)
7077 xmlSwitchEncoding(ctxt, enc);
7078 }
7079
7080 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7081 xmlParseTextDecl(ctxt);
7082 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7083 /*
7084 * The XML REC instructs us to stop parsing right here
7085 */
7086 ctxt->instate = XML_PARSER_EOF;
7087 return;
7088 }
7089 }
7090 if (ctxt->myDoc == NULL) {
7091 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7092 if (ctxt->myDoc == NULL) {
7093 xmlErrMemory(ctxt, "New Doc failed");
7094 return;
7095 }
7096 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7097 }
7098 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7099 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7100
7101 ctxt->instate = XML_PARSER_DTD;
7102 ctxt->external = 1;
7103 while (((RAW == '<') && (NXT(1) == '?')) ||
7104 ((RAW == '<') && (NXT(1) == '!')) ||
7105 (RAW == '%') || IS_BLANK_CH(CUR)) {
7106 const xmlChar *check = CUR_PTR;
7107 unsigned int cons = ctxt->input->consumed;
7108
7109 GROW;
7110 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7111 xmlParseConditionalSections(ctxt);
7112 } else if (IS_BLANK_CH(CUR)) {
7113 NEXT;
7114 } else if (RAW == '%') {
7115 xmlParsePEReference(ctxt);
7116 } else
7117 xmlParseMarkupDecl(ctxt);
7118
7119 /*
7120 * Pop-up of finished entities.
7121 */
7122 while ((RAW == 0) && (ctxt->inputNr > 1))
7123 xmlPopInput(ctxt);
7124
7125 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7126 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7127 break;
7128 }
7129 }
7130
7131 if (RAW != 0) {
7132 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7133 }
7134
7135 }
7136
7137 /**
7138 * xmlParseReference:
7139 * @ctxt: an XML parser context
7140 *
7141 * parse and handle entity references in content, depending on the SAX
7142 * interface, this may end-up in a call to character() if this is a
7143 * CharRef, a predefined entity, if there is no reference() callback.
7144 * or if the parser was asked to switch to that mode.
7145 *
7146 * [67] Reference ::= EntityRef | CharRef
7147 */
7148 void
xmlParseReference(xmlParserCtxtPtr ctxt)7149 xmlParseReference(xmlParserCtxtPtr ctxt) {
7150 xmlEntityPtr ent;
7151 xmlChar *val;
7152 int was_checked;
7153 xmlNodePtr list = NULL;
7154 xmlParserErrors ret = XML_ERR_OK;
7155
7156
7157 if (RAW != '&')
7158 return;
7159
7160 /*
7161 * Simple case of a CharRef
7162 */
7163 if (NXT(1) == '#') {
7164 int i = 0;
7165 xmlChar out[10];
7166 int hex = NXT(2);
7167 int value = xmlParseCharRef(ctxt);
7168
7169 if (value == 0)
7170 return;
7171 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7172 /*
7173 * So we are using non-UTF-8 buffers
7174 * Check that the char fit on 8bits, if not
7175 * generate a CharRef.
7176 */
7177 if (value <= 0xFF) {
7178 out[0] = value;
7179 out[1] = 0;
7180 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7181 (!ctxt->disableSAX))
7182 ctxt->sax->characters(ctxt->userData, out, 1);
7183 } else {
7184 if ((hex == 'x') || (hex == 'X'))
7185 snprintf((char *)out, sizeof(out), "#x%X", value);
7186 else
7187 snprintf((char *)out, sizeof(out), "#%d", value);
7188 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7189 (!ctxt->disableSAX))
7190 ctxt->sax->reference(ctxt->userData, out);
7191 }
7192 } else {
7193 /*
7194 * Just encode the value in UTF-8
7195 */
7196 COPY_BUF(0 ,out, i, value);
7197 out[i] = 0;
7198 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7199 (!ctxt->disableSAX))
7200 ctxt->sax->characters(ctxt->userData, out, i);
7201 }
7202 return;
7203 }
7204
7205 /*
7206 * We are seeing an entity reference
7207 */
7208 ent = xmlParseEntityRef(ctxt);
7209 if (ent == NULL) return;
7210 if (!ctxt->wellFormed)
7211 return;
7212 was_checked = ent->checked;
7213
7214 /* special case of predefined entities */
7215 if ((ent->name == NULL) ||
7216 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7217 val = ent->content;
7218 if (val == NULL) return;
7219 /*
7220 * inline the entity.
7221 */
7222 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223 (!ctxt->disableSAX))
7224 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7225 return;
7226 }
7227
7228 /*
7229 * The first reference to the entity trigger a parsing phase
7230 * where the ent->children is filled with the result from
7231 * the parsing.
7232 * Note: external parsed entities will not be loaded, it is not
7233 * required for a non-validating parser, unless the parsing option
7234 * of validating, or substituting entities were given. Doing so is
7235 * far more secure as the parser will only process data coming from
7236 * the document entity by default.
7237 */
7238 if (((ent->checked == 0) ||
7239 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7240 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7241 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7242 unsigned long oldnbent = ctxt->nbentities;
7243
7244 /*
7245 * This is a bit hackish but this seems the best
7246 * way to make sure both SAX and DOM entity support
7247 * behaves okay.
7248 */
7249 void *user_data;
7250 if (ctxt->userData == ctxt)
7251 user_data = NULL;
7252 else
7253 user_data = ctxt->userData;
7254
7255 /*
7256 * Check that this entity is well formed
7257 * 4.3.2: An internal general parsed entity is well-formed
7258 * if its replacement text matches the production labeled
7259 * content.
7260 */
7261 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7262 ctxt->depth++;
7263 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7264 user_data, &list);
7265 ctxt->depth--;
7266
7267 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7268 ctxt->depth++;
7269 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7270 user_data, ctxt->depth, ent->URI,
7271 ent->ExternalID, &list);
7272 ctxt->depth--;
7273 } else {
7274 ret = XML_ERR_ENTITY_PE_INTERNAL;
7275 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7276 "invalid entity type found\n", NULL);
7277 }
7278
7279 /*
7280 * Store the number of entities needing parsing for this entity
7281 * content and do checkings
7282 */
7283 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7284 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7285 ent->checked |= 1;
7286 if (ret == XML_ERR_ENTITY_LOOP) {
7287 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7288 xmlFreeNodeList(list);
7289 return;
7290 }
7291 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7292 xmlFreeNodeList(list);
7293 return;
7294 }
7295
7296 if ((ret == XML_ERR_OK) && (list != NULL)) {
7297 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7298 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7299 (ent->children == NULL)) {
7300 ent->children = list;
7301 if (ctxt->replaceEntities) {
7302 /*
7303 * Prune it directly in the generated document
7304 * except for single text nodes.
7305 */
7306 if (((list->type == XML_TEXT_NODE) &&
7307 (list->next == NULL)) ||
7308 (ctxt->parseMode == XML_PARSE_READER)) {
7309 list->parent = (xmlNodePtr) ent;
7310 list = NULL;
7311 ent->owner = 1;
7312 } else {
7313 ent->owner = 0;
7314 while (list != NULL) {
7315 list->parent = (xmlNodePtr) ctxt->node;
7316 list->doc = ctxt->myDoc;
7317 if (list->next == NULL)
7318 ent->last = list;
7319 list = list->next;
7320 }
7321 list = ent->children;
7322 #ifdef LIBXML_LEGACY_ENABLED
7323 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7324 xmlAddEntityReference(ent, list, NULL);
7325 #endif /* LIBXML_LEGACY_ENABLED */
7326 }
7327 } else {
7328 ent->owner = 1;
7329 while (list != NULL) {
7330 list->parent = (xmlNodePtr) ent;
7331 xmlSetTreeDoc(list, ent->doc);
7332 if (list->next == NULL)
7333 ent->last = list;
7334 list = list->next;
7335 }
7336 }
7337 } else {
7338 xmlFreeNodeList(list);
7339 list = NULL;
7340 }
7341 } else if ((ret != XML_ERR_OK) &&
7342 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7343 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7344 "Entity '%s' failed to parse\n", ent->name);
7345 xmlParserEntityCheck(ctxt, 0, ent, 0);
7346 } else if (list != NULL) {
7347 xmlFreeNodeList(list);
7348 list = NULL;
7349 }
7350 if (ent->checked == 0)
7351 ent->checked = 2;
7352 } else if (ent->checked != 1) {
7353 ctxt->nbentities += ent->checked / 2;
7354 }
7355
7356 /*
7357 * Now that the entity content has been gathered
7358 * provide it to the application, this can take different forms based
7359 * on the parsing modes.
7360 */
7361 if (ent->children == NULL) {
7362 /*
7363 * Probably running in SAX mode and the callbacks don't
7364 * build the entity content. So unless we already went
7365 * though parsing for first checking go though the entity
7366 * content to generate callbacks associated to the entity
7367 */
7368 if (was_checked != 0) {
7369 void *user_data;
7370 /*
7371 * This is a bit hackish but this seems the best
7372 * way to make sure both SAX and DOM entity support
7373 * behaves okay.
7374 */
7375 if (ctxt->userData == ctxt)
7376 user_data = NULL;
7377 else
7378 user_data = ctxt->userData;
7379
7380 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7381 ctxt->depth++;
7382 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7383 ent->content, user_data, NULL);
7384 ctxt->depth--;
7385 } else if (ent->etype ==
7386 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7387 ctxt->depth++;
7388 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7389 ctxt->sax, user_data, ctxt->depth,
7390 ent->URI, ent->ExternalID, NULL);
7391 ctxt->depth--;
7392 } else {
7393 ret = XML_ERR_ENTITY_PE_INTERNAL;
7394 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7395 "invalid entity type found\n", NULL);
7396 }
7397 if (ret == XML_ERR_ENTITY_LOOP) {
7398 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7399 return;
7400 }
7401 }
7402 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7403 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7404 /*
7405 * Entity reference callback comes second, it's somewhat
7406 * superfluous but a compatibility to historical behaviour
7407 */
7408 ctxt->sax->reference(ctxt->userData, ent->name);
7409 }
7410 return;
7411 }
7412
7413 /*
7414 * If we didn't get any children for the entity being built
7415 */
7416 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7417 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7418 /*
7419 * Create a node.
7420 */
7421 ctxt->sax->reference(ctxt->userData, ent->name);
7422 return;
7423 }
7424
7425 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7426 /*
7427 * There is a problem on the handling of _private for entities
7428 * (bug 155816): Should we copy the content of the field from
7429 * the entity (possibly overwriting some value set by the user
7430 * when a copy is created), should we leave it alone, or should
7431 * we try to take care of different situations? The problem
7432 * is exacerbated by the usage of this field by the xmlReader.
7433 * To fix this bug, we look at _private on the created node
7434 * and, if it's NULL, we copy in whatever was in the entity.
7435 * If it's not NULL we leave it alone. This is somewhat of a
7436 * hack - maybe we should have further tests to determine
7437 * what to do.
7438 */
7439 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7440 /*
7441 * Seems we are generating the DOM content, do
7442 * a simple tree copy for all references except the first
7443 * In the first occurrence list contains the replacement.
7444 */
7445 if (((list == NULL) && (ent->owner == 0)) ||
7446 (ctxt->parseMode == XML_PARSE_READER)) {
7447 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7448
7449 /*
7450 * We are copying here, make sure there is no abuse
7451 */
7452 ctxt->sizeentcopy += ent->length + 5;
7453 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7454 return;
7455
7456 /*
7457 * when operating on a reader, the entities definitions
7458 * are always owning the entities subtree.
7459 if (ctxt->parseMode == XML_PARSE_READER)
7460 ent->owner = 1;
7461 */
7462
7463 cur = ent->children;
7464 while (cur != NULL) {
7465 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7466 if (nw != NULL) {
7467 if (nw->_private == NULL)
7468 nw->_private = cur->_private;
7469 if (firstChild == NULL){
7470 firstChild = nw;
7471 }
7472 nw = xmlAddChild(ctxt->node, nw);
7473 }
7474 if (cur == ent->last) {
7475 /*
7476 * needed to detect some strange empty
7477 * node cases in the reader tests
7478 */
7479 if ((ctxt->parseMode == XML_PARSE_READER) &&
7480 (nw != NULL) &&
7481 (nw->type == XML_ELEMENT_NODE) &&
7482 (nw->children == NULL))
7483 nw->extra = 1;
7484
7485 break;
7486 }
7487 cur = cur->next;
7488 }
7489 #ifdef LIBXML_LEGACY_ENABLED
7490 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7491 xmlAddEntityReference(ent, firstChild, nw);
7492 #endif /* LIBXML_LEGACY_ENABLED */
7493 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7494 xmlNodePtr nw = NULL, cur, next, last,
7495 firstChild = NULL;
7496
7497 /*
7498 * We are copying here, make sure there is no abuse
7499 */
7500 ctxt->sizeentcopy += ent->length + 5;
7501 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7502 return;
7503
7504 /*
7505 * Copy the entity child list and make it the new
7506 * entity child list. The goal is to make sure any
7507 * ID or REF referenced will be the one from the
7508 * document content and not the entity copy.
7509 */
7510 cur = ent->children;
7511 ent->children = NULL;
7512 last = ent->last;
7513 ent->last = NULL;
7514 while (cur != NULL) {
7515 next = cur->next;
7516 cur->next = NULL;
7517 cur->parent = NULL;
7518 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7519 if (nw != NULL) {
7520 if (nw->_private == NULL)
7521 nw->_private = cur->_private;
7522 if (firstChild == NULL){
7523 firstChild = cur;
7524 }
7525 xmlAddChild((xmlNodePtr) ent, nw);
7526 xmlAddChild(ctxt->node, cur);
7527 }
7528 if (cur == last)
7529 break;
7530 cur = next;
7531 }
7532 if (ent->owner == 0)
7533 ent->owner = 1;
7534 #ifdef LIBXML_LEGACY_ENABLED
7535 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7536 xmlAddEntityReference(ent, firstChild, nw);
7537 #endif /* LIBXML_LEGACY_ENABLED */
7538 } else {
7539 const xmlChar *nbktext;
7540
7541 /*
7542 * the name change is to avoid coalescing of the
7543 * node with a possible previous text one which
7544 * would make ent->children a dangling pointer
7545 */
7546 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7547 -1);
7548 if (ent->children->type == XML_TEXT_NODE)
7549 ent->children->name = nbktext;
7550 if ((ent->last != ent->children) &&
7551 (ent->last->type == XML_TEXT_NODE))
7552 ent->last->name = nbktext;
7553 xmlAddChildList(ctxt->node, ent->children);
7554 }
7555
7556 /*
7557 * This is to avoid a nasty side effect, see
7558 * characters() in SAX.c
7559 */
7560 ctxt->nodemem = 0;
7561 ctxt->nodelen = 0;
7562 return;
7563 }
7564 }
7565 }
7566
7567 /**
7568 * xmlParseEntityRef:
7569 * @ctxt: an XML parser context
7570 *
7571 * parse ENTITY references declarations
7572 *
7573 * [68] EntityRef ::= '&' Name ';'
7574 *
7575 * [ WFC: Entity Declared ]
7576 * In a document without any DTD, a document with only an internal DTD
7577 * subset which contains no parameter entity references, or a document
7578 * with "standalone='yes'", the Name given in the entity reference
7579 * must match that in an entity declaration, except that well-formed
7580 * documents need not declare any of the following entities: amp, lt,
7581 * gt, apos, quot. The declaration of a parameter entity must precede
7582 * any reference to it. Similarly, the declaration of a general entity
7583 * must precede any reference to it which appears in a default value in an
7584 * attribute-list declaration. Note that if entities are declared in the
7585 * external subset or in external parameter entities, a non-validating
7586 * processor is not obligated to read and process their declarations;
7587 * for such documents, the rule that an entity must be declared is a
7588 * well-formedness constraint only if standalone='yes'.
7589 *
7590 * [ WFC: Parsed Entity ]
7591 * An entity reference must not contain the name of an unparsed entity
7592 *
7593 * Returns the xmlEntityPtr if found, or NULL otherwise.
7594 */
7595 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7596 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7597 const xmlChar *name;
7598 xmlEntityPtr ent = NULL;
7599
7600 GROW;
7601 if (ctxt->instate == XML_PARSER_EOF)
7602 return(NULL);
7603
7604 if (RAW != '&')
7605 return(NULL);
7606 NEXT;
7607 name = xmlParseName(ctxt);
7608 if (name == NULL) {
7609 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7610 "xmlParseEntityRef: no name\n");
7611 return(NULL);
7612 }
7613 if (RAW != ';') {
7614 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7615 return(NULL);
7616 }
7617 NEXT;
7618
7619 /*
7620 * Predefined entities override any extra definition
7621 */
7622 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7623 ent = xmlGetPredefinedEntity(name);
7624 if (ent != NULL)
7625 return(ent);
7626 }
7627
7628 /*
7629 * Increase the number of entity references parsed
7630 */
7631 ctxt->nbentities++;
7632
7633 /*
7634 * Ask first SAX for entity resolution, otherwise try the
7635 * entities which may have stored in the parser context.
7636 */
7637 if (ctxt->sax != NULL) {
7638 if (ctxt->sax->getEntity != NULL)
7639 ent = ctxt->sax->getEntity(ctxt->userData, name);
7640 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7641 (ctxt->options & XML_PARSE_OLDSAX))
7642 ent = xmlGetPredefinedEntity(name);
7643 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7644 (ctxt->userData==ctxt)) {
7645 ent = xmlSAX2GetEntity(ctxt, name);
7646 }
7647 }
7648 if (ctxt->instate == XML_PARSER_EOF)
7649 return(NULL);
7650 /*
7651 * [ WFC: Entity Declared ]
7652 * In a document without any DTD, a document with only an
7653 * internal DTD subset which contains no parameter entity
7654 * references, or a document with "standalone='yes'", the
7655 * Name given in the entity reference must match that in an
7656 * entity declaration, except that well-formed documents
7657 * need not declare any of the following entities: amp, lt,
7658 * gt, apos, quot.
7659 * The declaration of a parameter entity must precede any
7660 * reference to it.
7661 * Similarly, the declaration of a general entity must
7662 * precede any reference to it which appears in a default
7663 * value in an attribute-list declaration. Note that if
7664 * entities are declared in the external subset or in
7665 * external parameter entities, a non-validating processor
7666 * is not obligated to read and process their declarations;
7667 * for such documents, the rule that an entity must be
7668 * declared is a well-formedness constraint only if
7669 * standalone='yes'.
7670 */
7671 if (ent == NULL) {
7672 if ((ctxt->standalone == 1) ||
7673 ((ctxt->hasExternalSubset == 0) &&
7674 (ctxt->hasPErefs == 0))) {
7675 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7676 "Entity '%s' not defined\n", name);
7677 } else {
7678 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7679 "Entity '%s' not defined\n", name);
7680 if ((ctxt->inSubset == 0) &&
7681 (ctxt->sax != NULL) &&
7682 (ctxt->sax->reference != NULL)) {
7683 ctxt->sax->reference(ctxt->userData, name);
7684 }
7685 }
7686 xmlParserEntityCheck(ctxt, 0, ent, 0);
7687 ctxt->valid = 0;
7688 }
7689
7690 /*
7691 * [ WFC: Parsed Entity ]
7692 * An entity reference must not contain the name of an
7693 * unparsed entity
7694 */
7695 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7696 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7697 "Entity reference to unparsed entity %s\n", name);
7698 }
7699
7700 /*
7701 * [ WFC: No External Entity References ]
7702 * Attribute values cannot contain direct or indirect
7703 * entity references to external entities.
7704 */
7705 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7706 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7707 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7708 "Attribute references external entity '%s'\n", name);
7709 }
7710 /*
7711 * [ WFC: No < in Attribute Values ]
7712 * The replacement text of any entity referred to directly or
7713 * indirectly in an attribute value (other than "<") must
7714 * not contain a <.
7715 */
7716 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7717 (ent != NULL) &&
7718 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7719 if (((ent->checked & 1) || (ent->checked == 0)) &&
7720 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7721 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7722 "'<' in entity '%s' is not allowed in attributes values\n", name);
7723 }
7724 }
7725
7726 /*
7727 * Internal check, no parameter entities here ...
7728 */
7729 else {
7730 switch (ent->etype) {
7731 case XML_INTERNAL_PARAMETER_ENTITY:
7732 case XML_EXTERNAL_PARAMETER_ENTITY:
7733 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7734 "Attempt to reference the parameter entity '%s'\n",
7735 name);
7736 break;
7737 default:
7738 break;
7739 }
7740 }
7741
7742 /*
7743 * [ WFC: No Recursion ]
7744 * A parsed entity must not contain a recursive reference
7745 * to itself, either directly or indirectly.
7746 * Done somewhere else
7747 */
7748 return(ent);
7749 }
7750
7751 /**
7752 * xmlParseStringEntityRef:
7753 * @ctxt: an XML parser context
7754 * @str: a pointer to an index in the string
7755 *
7756 * parse ENTITY references declarations, but this version parses it from
7757 * a string value.
7758 *
7759 * [68] EntityRef ::= '&' Name ';'
7760 *
7761 * [ WFC: Entity Declared ]
7762 * In a document without any DTD, a document with only an internal DTD
7763 * subset which contains no parameter entity references, or a document
7764 * with "standalone='yes'", the Name given in the entity reference
7765 * must match that in an entity declaration, except that well-formed
7766 * documents need not declare any of the following entities: amp, lt,
7767 * gt, apos, quot. The declaration of a parameter entity must precede
7768 * any reference to it. Similarly, the declaration of a general entity
7769 * must precede any reference to it which appears in a default value in an
7770 * attribute-list declaration. Note that if entities are declared in the
7771 * external subset or in external parameter entities, a non-validating
7772 * processor is not obligated to read and process their declarations;
7773 * for such documents, the rule that an entity must be declared is a
7774 * well-formedness constraint only if standalone='yes'.
7775 *
7776 * [ WFC: Parsed Entity ]
7777 * An entity reference must not contain the name of an unparsed entity
7778 *
7779 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7780 * is updated to the current location in the string.
7781 */
7782 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7783 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7784 xmlChar *name;
7785 const xmlChar *ptr;
7786 xmlChar cur;
7787 xmlEntityPtr ent = NULL;
7788
7789 if ((str == NULL) || (*str == NULL))
7790 return(NULL);
7791 ptr = *str;
7792 cur = *ptr;
7793 if (cur != '&')
7794 return(NULL);
7795
7796 ptr++;
7797 name = xmlParseStringName(ctxt, &ptr);
7798 if (name == NULL) {
7799 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7800 "xmlParseStringEntityRef: no name\n");
7801 *str = ptr;
7802 return(NULL);
7803 }
7804 if (*ptr != ';') {
7805 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7806 xmlFree(name);
7807 *str = ptr;
7808 return(NULL);
7809 }
7810 ptr++;
7811
7812
7813 /*
7814 * Predefined entities override any extra definition
7815 */
7816 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7817 ent = xmlGetPredefinedEntity(name);
7818 if (ent != NULL) {
7819 xmlFree(name);
7820 *str = ptr;
7821 return(ent);
7822 }
7823 }
7824
7825 /*
7826 * Increate the number of entity references parsed
7827 */
7828 ctxt->nbentities++;
7829
7830 /*
7831 * Ask first SAX for entity resolution, otherwise try the
7832 * entities which may have stored in the parser context.
7833 */
7834 if (ctxt->sax != NULL) {
7835 if (ctxt->sax->getEntity != NULL)
7836 ent = ctxt->sax->getEntity(ctxt->userData, name);
7837 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7838 ent = xmlGetPredefinedEntity(name);
7839 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7840 ent = xmlSAX2GetEntity(ctxt, name);
7841 }
7842 }
7843 if (ctxt->instate == XML_PARSER_EOF) {
7844 xmlFree(name);
7845 return(NULL);
7846 }
7847
7848 /*
7849 * [ WFC: Entity Declared ]
7850 * In a document without any DTD, a document with only an
7851 * internal DTD subset which contains no parameter entity
7852 * references, or a document with "standalone='yes'", the
7853 * Name given in the entity reference must match that in an
7854 * entity declaration, except that well-formed documents
7855 * need not declare any of the following entities: amp, lt,
7856 * gt, apos, quot.
7857 * The declaration of a parameter entity must precede any
7858 * reference to it.
7859 * Similarly, the declaration of a general entity must
7860 * precede any reference to it which appears in a default
7861 * value in an attribute-list declaration. Note that if
7862 * entities are declared in the external subset or in
7863 * external parameter entities, a non-validating processor
7864 * is not obligated to read and process their declarations;
7865 * for such documents, the rule that an entity must be
7866 * declared is a well-formedness constraint only if
7867 * standalone='yes'.
7868 */
7869 if (ent == NULL) {
7870 if ((ctxt->standalone == 1) ||
7871 ((ctxt->hasExternalSubset == 0) &&
7872 (ctxt->hasPErefs == 0))) {
7873 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7874 "Entity '%s' not defined\n", name);
7875 } else {
7876 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7877 "Entity '%s' not defined\n",
7878 name);
7879 }
7880 xmlParserEntityCheck(ctxt, 0, ent, 0);
7881 /* TODO ? check regressions ctxt->valid = 0; */
7882 }
7883
7884 /*
7885 * [ WFC: Parsed Entity ]
7886 * An entity reference must not contain the name of an
7887 * unparsed entity
7888 */
7889 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7890 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7891 "Entity reference to unparsed entity %s\n", name);
7892 }
7893
7894 /*
7895 * [ WFC: No External Entity References ]
7896 * Attribute values cannot contain direct or indirect
7897 * entity references to external entities.
7898 */
7899 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7900 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7901 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7902 "Attribute references external entity '%s'\n", name);
7903 }
7904 /*
7905 * [ WFC: No < in Attribute Values ]
7906 * The replacement text of any entity referred to directly or
7907 * indirectly in an attribute value (other than "<") must
7908 * not contain a <.
7909 */
7910 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7911 (ent != NULL) && (ent->content != NULL) &&
7912 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7913 (xmlStrchr(ent->content, '<'))) {
7914 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7915 "'<' in entity '%s' is not allowed in attributes values\n",
7916 name);
7917 }
7918
7919 /*
7920 * Internal check, no parameter entities here ...
7921 */
7922 else {
7923 switch (ent->etype) {
7924 case XML_INTERNAL_PARAMETER_ENTITY:
7925 case XML_EXTERNAL_PARAMETER_ENTITY:
7926 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7927 "Attempt to reference the parameter entity '%s'\n",
7928 name);
7929 break;
7930 default:
7931 break;
7932 }
7933 }
7934
7935 /*
7936 * [ WFC: No Recursion ]
7937 * A parsed entity must not contain a recursive reference
7938 * to itself, either directly or indirectly.
7939 * Done somewhere else
7940 */
7941
7942 xmlFree(name);
7943 *str = ptr;
7944 return(ent);
7945 }
7946
7947 /**
7948 * xmlParsePEReference:
7949 * @ctxt: an XML parser context
7950 *
7951 * parse PEReference declarations
7952 * The entity content is handled directly by pushing it's content as
7953 * a new input stream.
7954 *
7955 * [69] PEReference ::= '%' Name ';'
7956 *
7957 * [ WFC: No Recursion ]
7958 * A parsed entity must not contain a recursive
7959 * reference to itself, either directly or indirectly.
7960 *
7961 * [ WFC: Entity Declared ]
7962 * In a document without any DTD, a document with only an internal DTD
7963 * subset which contains no parameter entity references, or a document
7964 * with "standalone='yes'", ... ... The declaration of a parameter
7965 * entity must precede any reference to it...
7966 *
7967 * [ VC: Entity Declared ]
7968 * In a document with an external subset or external parameter entities
7969 * with "standalone='no'", ... ... The declaration of a parameter entity
7970 * must precede any reference to it...
7971 *
7972 * [ WFC: In DTD ]
7973 * Parameter-entity references may only appear in the DTD.
7974 * NOTE: misleading but this is handled.
7975 */
7976 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7977 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7978 {
7979 const xmlChar *name;
7980 xmlEntityPtr entity = NULL;
7981 xmlParserInputPtr input;
7982
7983 if (RAW != '%')
7984 return;
7985 NEXT;
7986 name = xmlParseName(ctxt);
7987 if (name == NULL) {
7988 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7989 "xmlParsePEReference: no name\n");
7990 return;
7991 }
7992 if (RAW != ';') {
7993 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7994 return;
7995 }
7996
7997 NEXT;
7998
7999 /*
8000 * Increate the number of entity references parsed
8001 */
8002 ctxt->nbentities++;
8003
8004 /*
8005 * Request the entity from SAX
8006 */
8007 if ((ctxt->sax != NULL) &&
8008 (ctxt->sax->getParameterEntity != NULL))
8009 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8010 if (ctxt->instate == XML_PARSER_EOF)
8011 return;
8012 if (entity == NULL) {
8013 /*
8014 * [ WFC: Entity Declared ]
8015 * In a document without any DTD, a document with only an
8016 * internal DTD subset which contains no parameter entity
8017 * references, or a document with "standalone='yes'", ...
8018 * ... The declaration of a parameter entity must precede
8019 * any reference to it...
8020 */
8021 if ((ctxt->standalone == 1) ||
8022 ((ctxt->hasExternalSubset == 0) &&
8023 (ctxt->hasPErefs == 0))) {
8024 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8025 "PEReference: %%%s; not found\n",
8026 name);
8027 } else {
8028 /*
8029 * [ VC: Entity Declared ]
8030 * In a document with an external subset or external
8031 * parameter entities with "standalone='no'", ...
8032 * ... The declaration of a parameter entity must
8033 * precede any reference to it...
8034 */
8035 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8036 "PEReference: %%%s; not found\n",
8037 name, NULL);
8038 ctxt->valid = 0;
8039 }
8040 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8041 } else {
8042 /*
8043 * Internal checking in case the entity quest barfed
8044 */
8045 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8046 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8047 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8048 "Internal: %%%s; is not a parameter entity\n",
8049 name, NULL);
8050 } else if (ctxt->input->free != deallocblankswrapper) {
8051 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8052 if (xmlPushInput(ctxt, input) < 0)
8053 return;
8054 } else {
8055 /*
8056 * TODO !!!
8057 * handle the extra spaces added before and after
8058 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8059 */
8060 input = xmlNewEntityInputStream(ctxt, entity);
8061 if (xmlPushInput(ctxt, input) < 0)
8062 return;
8063 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8064 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8065 (IS_BLANK_CH(NXT(5)))) {
8066 xmlParseTextDecl(ctxt);
8067 if (ctxt->errNo ==
8068 XML_ERR_UNSUPPORTED_ENCODING) {
8069 /*
8070 * The XML REC instructs us to stop parsing
8071 * right here
8072 */
8073 ctxt->instate = XML_PARSER_EOF;
8074 return;
8075 }
8076 }
8077 }
8078 }
8079 ctxt->hasPErefs = 1;
8080 }
8081
8082 /**
8083 * xmlLoadEntityContent:
8084 * @ctxt: an XML parser context
8085 * @entity: an unloaded system entity
8086 *
8087 * Load the original content of the given system entity from the
8088 * ExternalID/SystemID given. This is to be used for Included in Literal
8089 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8090 *
8091 * Returns 0 in case of success and -1 in case of failure
8092 */
8093 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8094 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8095 xmlParserInputPtr input;
8096 xmlBufferPtr buf;
8097 int l, c;
8098 int count = 0;
8099
8100 if ((ctxt == NULL) || (entity == NULL) ||
8101 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8102 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8103 (entity->content != NULL)) {
8104 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8105 "xmlLoadEntityContent parameter error");
8106 return(-1);
8107 }
8108
8109 if (xmlParserDebugEntities)
8110 xmlGenericError(xmlGenericErrorContext,
8111 "Reading %s entity content input\n", entity->name);
8112
8113 buf = xmlBufferCreate();
8114 if (buf == NULL) {
8115 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8116 "xmlLoadEntityContent parameter error");
8117 return(-1);
8118 }
8119
8120 input = xmlNewEntityInputStream(ctxt, entity);
8121 if (input == NULL) {
8122 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8123 "xmlLoadEntityContent input error");
8124 xmlBufferFree(buf);
8125 return(-1);
8126 }
8127
8128 /*
8129 * Push the entity as the current input, read char by char
8130 * saving to the buffer until the end of the entity or an error
8131 */
8132 if (xmlPushInput(ctxt, input) < 0) {
8133 xmlBufferFree(buf);
8134 return(-1);
8135 }
8136
8137 GROW;
8138 c = CUR_CHAR(l);
8139 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8140 (IS_CHAR(c))) {
8141 xmlBufferAdd(buf, ctxt->input->cur, l);
8142 if (count++ > XML_PARSER_CHUNK_SIZE) {
8143 count = 0;
8144 GROW;
8145 if (ctxt->instate == XML_PARSER_EOF) {
8146 xmlBufferFree(buf);
8147 return(-1);
8148 }
8149 }
8150 NEXTL(l);
8151 c = CUR_CHAR(l);
8152 if (c == 0) {
8153 count = 0;
8154 GROW;
8155 if (ctxt->instate == XML_PARSER_EOF) {
8156 xmlBufferFree(buf);
8157 return(-1);
8158 }
8159 c = CUR_CHAR(l);
8160 }
8161 }
8162
8163 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8164 xmlPopInput(ctxt);
8165 } else if (!IS_CHAR(c)) {
8166 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8167 "xmlLoadEntityContent: invalid char value %d\n",
8168 c);
8169 xmlBufferFree(buf);
8170 return(-1);
8171 }
8172 entity->content = buf->content;
8173 buf->content = NULL;
8174 xmlBufferFree(buf);
8175
8176 return(0);
8177 }
8178
8179 /**
8180 * xmlParseStringPEReference:
8181 * @ctxt: an XML parser context
8182 * @str: a pointer to an index in the string
8183 *
8184 * parse PEReference declarations
8185 *
8186 * [69] PEReference ::= '%' Name ';'
8187 *
8188 * [ WFC: No Recursion ]
8189 * A parsed entity must not contain a recursive
8190 * reference to itself, either directly or indirectly.
8191 *
8192 * [ WFC: Entity Declared ]
8193 * In a document without any DTD, a document with only an internal DTD
8194 * subset which contains no parameter entity references, or a document
8195 * with "standalone='yes'", ... ... The declaration of a parameter
8196 * entity must precede any reference to it...
8197 *
8198 * [ VC: Entity Declared ]
8199 * In a document with an external subset or external parameter entities
8200 * with "standalone='no'", ... ... The declaration of a parameter entity
8201 * must precede any reference to it...
8202 *
8203 * [ WFC: In DTD ]
8204 * Parameter-entity references may only appear in the DTD.
8205 * NOTE: misleading but this is handled.
8206 *
8207 * Returns the string of the entity content.
8208 * str is updated to the current value of the index
8209 */
8210 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8211 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8212 const xmlChar *ptr;
8213 xmlChar cur;
8214 xmlChar *name;
8215 xmlEntityPtr entity = NULL;
8216
8217 if ((str == NULL) || (*str == NULL)) return(NULL);
8218 ptr = *str;
8219 cur = *ptr;
8220 if (cur != '%')
8221 return(NULL);
8222 ptr++;
8223 name = xmlParseStringName(ctxt, &ptr);
8224 if (name == NULL) {
8225 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8226 "xmlParseStringPEReference: no name\n");
8227 *str = ptr;
8228 return(NULL);
8229 }
8230 cur = *ptr;
8231 if (cur != ';') {
8232 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8233 xmlFree(name);
8234 *str = ptr;
8235 return(NULL);
8236 }
8237 ptr++;
8238
8239 /*
8240 * Increate the number of entity references parsed
8241 */
8242 ctxt->nbentities++;
8243
8244 /*
8245 * Request the entity from SAX
8246 */
8247 if ((ctxt->sax != NULL) &&
8248 (ctxt->sax->getParameterEntity != NULL))
8249 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8250 if (ctxt->instate == XML_PARSER_EOF) {
8251 xmlFree(name);
8252 return(NULL);
8253 }
8254 if (entity == NULL) {
8255 /*
8256 * [ WFC: Entity Declared ]
8257 * In a document without any DTD, a document with only an
8258 * internal DTD subset which contains no parameter entity
8259 * references, or a document with "standalone='yes'", ...
8260 * ... The declaration of a parameter entity must precede
8261 * any reference to it...
8262 */
8263 if ((ctxt->standalone == 1) ||
8264 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8265 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8266 "PEReference: %%%s; not found\n", name);
8267 } else {
8268 /*
8269 * [ VC: Entity Declared ]
8270 * In a document with an external subset or external
8271 * parameter entities with "standalone='no'", ...
8272 * ... The declaration of a parameter entity must
8273 * precede any reference to it...
8274 */
8275 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8276 "PEReference: %%%s; not found\n",
8277 name, NULL);
8278 ctxt->valid = 0;
8279 }
8280 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8281 } else {
8282 /*
8283 * Internal checking in case the entity quest barfed
8284 */
8285 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8286 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8287 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8288 "%%%s; is not a parameter entity\n",
8289 name, NULL);
8290 }
8291 }
8292 ctxt->hasPErefs = 1;
8293 xmlFree(name);
8294 *str = ptr;
8295 return(entity);
8296 }
8297
8298 /**
8299 * xmlParseDocTypeDecl:
8300 * @ctxt: an XML parser context
8301 *
8302 * parse a DOCTYPE declaration
8303 *
8304 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8305 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8306 *
8307 * [ VC: Root Element Type ]
8308 * The Name in the document type declaration must match the element
8309 * type of the root element.
8310 */
8311
8312 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8313 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8314 const xmlChar *name = NULL;
8315 xmlChar *ExternalID = NULL;
8316 xmlChar *URI = NULL;
8317
8318 /*
8319 * We know that '<!DOCTYPE' has been detected.
8320 */
8321 SKIP(9);
8322
8323 SKIP_BLANKS;
8324
8325 /*
8326 * Parse the DOCTYPE name.
8327 */
8328 name = xmlParseName(ctxt);
8329 if (name == NULL) {
8330 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8331 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8332 }
8333 ctxt->intSubName = name;
8334
8335 SKIP_BLANKS;
8336
8337 /*
8338 * Check for SystemID and ExternalID
8339 */
8340 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8341
8342 if ((URI != NULL) || (ExternalID != NULL)) {
8343 ctxt->hasExternalSubset = 1;
8344 }
8345 ctxt->extSubURI = URI;
8346 ctxt->extSubSystem = ExternalID;
8347
8348 SKIP_BLANKS;
8349
8350 /*
8351 * Create and update the internal subset.
8352 */
8353 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8354 (!ctxt->disableSAX))
8355 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8356 if (ctxt->instate == XML_PARSER_EOF)
8357 return;
8358
8359 /*
8360 * Is there any internal subset declarations ?
8361 * they are handled separately in xmlParseInternalSubset()
8362 */
8363 if (RAW == '[')
8364 return;
8365
8366 /*
8367 * We should be at the end of the DOCTYPE declaration.
8368 */
8369 if (RAW != '>') {
8370 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8371 }
8372 NEXT;
8373 }
8374
8375 /**
8376 * xmlParseInternalSubset:
8377 * @ctxt: an XML parser context
8378 *
8379 * parse the internal subset declaration
8380 *
8381 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8382 */
8383
8384 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8385 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8386 /*
8387 * Is there any DTD definition ?
8388 */
8389 if (RAW == '[') {
8390 ctxt->instate = XML_PARSER_DTD;
8391 NEXT;
8392 /*
8393 * Parse the succession of Markup declarations and
8394 * PEReferences.
8395 * Subsequence (markupdecl | PEReference | S)*
8396 */
8397 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8398 const xmlChar *check = CUR_PTR;
8399 unsigned int cons = ctxt->input->consumed;
8400
8401 SKIP_BLANKS;
8402 xmlParseMarkupDecl(ctxt);
8403 xmlParsePEReference(ctxt);
8404
8405 /*
8406 * Pop-up of finished entities.
8407 */
8408 while ((RAW == 0) && (ctxt->inputNr > 1))
8409 xmlPopInput(ctxt);
8410
8411 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8412 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8413 "xmlParseInternalSubset: error detected in Markup declaration\n");
8414 break;
8415 }
8416 }
8417 if (RAW == ']') {
8418 NEXT;
8419 SKIP_BLANKS;
8420 }
8421 }
8422
8423 /*
8424 * We should be at the end of the DOCTYPE declaration.
8425 */
8426 if (RAW != '>') {
8427 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8428 }
8429 NEXT;
8430 }
8431
8432 #ifdef LIBXML_SAX1_ENABLED
8433 /**
8434 * xmlParseAttribute:
8435 * @ctxt: an XML parser context
8436 * @value: a xmlChar ** used to store the value of the attribute
8437 *
8438 * parse an attribute
8439 *
8440 * [41] Attribute ::= Name Eq AttValue
8441 *
8442 * [ WFC: No External Entity References ]
8443 * Attribute values cannot contain direct or indirect entity references
8444 * to external entities.
8445 *
8446 * [ WFC: No < in Attribute Values ]
8447 * The replacement text of any entity referred to directly or indirectly in
8448 * an attribute value (other than "<") must not contain a <.
8449 *
8450 * [ VC: Attribute Value Type ]
8451 * The attribute must have been declared; the value must be of the type
8452 * declared for it.
8453 *
8454 * [25] Eq ::= S? '=' S?
8455 *
8456 * With namespace:
8457 *
8458 * [NS 11] Attribute ::= QName Eq AttValue
8459 *
8460 * Also the case QName == xmlns:??? is handled independently as a namespace
8461 * definition.
8462 *
8463 * Returns the attribute name, and the value in *value.
8464 */
8465
8466 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8467 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8468 const xmlChar *name;
8469 xmlChar *val;
8470
8471 *value = NULL;
8472 GROW;
8473 name = xmlParseName(ctxt);
8474 if (name == NULL) {
8475 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8476 "error parsing attribute name\n");
8477 return(NULL);
8478 }
8479
8480 /*
8481 * read the value
8482 */
8483 SKIP_BLANKS;
8484 if (RAW == '=') {
8485 NEXT;
8486 SKIP_BLANKS;
8487 val = xmlParseAttValue(ctxt);
8488 ctxt->instate = XML_PARSER_CONTENT;
8489 } else {
8490 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8491 "Specification mandate value for attribute %s\n", name);
8492 return(NULL);
8493 }
8494
8495 /*
8496 * Check that xml:lang conforms to the specification
8497 * No more registered as an error, just generate a warning now
8498 * since this was deprecated in XML second edition
8499 */
8500 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8501 if (!xmlCheckLanguageID(val)) {
8502 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8503 "Malformed value for xml:lang : %s\n",
8504 val, NULL);
8505 }
8506 }
8507
8508 /*
8509 * Check that xml:space conforms to the specification
8510 */
8511 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8512 if (xmlStrEqual(val, BAD_CAST "default"))
8513 *(ctxt->space) = 0;
8514 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8515 *(ctxt->space) = 1;
8516 else {
8517 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8518 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8519 val, NULL);
8520 }
8521 }
8522
8523 *value = val;
8524 return(name);
8525 }
8526
8527 /**
8528 * xmlParseStartTag:
8529 * @ctxt: an XML parser context
8530 *
8531 * parse a start of tag either for rule element or
8532 * EmptyElement. In both case we don't parse the tag closing chars.
8533 *
8534 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8535 *
8536 * [ WFC: Unique Att Spec ]
8537 * No attribute name may appear more than once in the same start-tag or
8538 * empty-element tag.
8539 *
8540 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8541 *
8542 * [ WFC: Unique Att Spec ]
8543 * No attribute name may appear more than once in the same start-tag or
8544 * empty-element tag.
8545 *
8546 * With namespace:
8547 *
8548 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8549 *
8550 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8551 *
8552 * Returns the element name parsed
8553 */
8554
8555 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8556 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8557 const xmlChar *name;
8558 const xmlChar *attname;
8559 xmlChar *attvalue;
8560 const xmlChar **atts = ctxt->atts;
8561 int nbatts = 0;
8562 int maxatts = ctxt->maxatts;
8563 int i;
8564
8565 if (RAW != '<') return(NULL);
8566 NEXT1;
8567
8568 name = xmlParseName(ctxt);
8569 if (name == NULL) {
8570 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8571 "xmlParseStartTag: invalid element name\n");
8572 return(NULL);
8573 }
8574
8575 /*
8576 * Now parse the attributes, it ends up with the ending
8577 *
8578 * (S Attribute)* S?
8579 */
8580 SKIP_BLANKS;
8581 GROW;
8582
8583 while (((RAW != '>') &&
8584 ((RAW != '/') || (NXT(1) != '>')) &&
8585 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8586 const xmlChar *q = CUR_PTR;
8587 unsigned int cons = ctxt->input->consumed;
8588
8589 attname = xmlParseAttribute(ctxt, &attvalue);
8590 if ((attname != NULL) && (attvalue != NULL)) {
8591 /*
8592 * [ WFC: Unique Att Spec ]
8593 * No attribute name may appear more than once in the same
8594 * start-tag or empty-element tag.
8595 */
8596 for (i = 0; i < nbatts;i += 2) {
8597 if (xmlStrEqual(atts[i], attname)) {
8598 xmlErrAttributeDup(ctxt, NULL, attname);
8599 xmlFree(attvalue);
8600 goto failed;
8601 }
8602 }
8603 /*
8604 * Add the pair to atts
8605 */
8606 if (atts == NULL) {
8607 maxatts = 22; /* allow for 10 attrs by default */
8608 atts = (const xmlChar **)
8609 xmlMalloc(maxatts * sizeof(xmlChar *));
8610 if (atts == NULL) {
8611 xmlErrMemory(ctxt, NULL);
8612 if (attvalue != NULL)
8613 xmlFree(attvalue);
8614 goto failed;
8615 }
8616 ctxt->atts = atts;
8617 ctxt->maxatts = maxatts;
8618 } else if (nbatts + 4 > maxatts) {
8619 const xmlChar **n;
8620
8621 maxatts *= 2;
8622 n = (const xmlChar **) xmlRealloc((void *) atts,
8623 maxatts * sizeof(const xmlChar *));
8624 if (n == NULL) {
8625 xmlErrMemory(ctxt, NULL);
8626 if (attvalue != NULL)
8627 xmlFree(attvalue);
8628 goto failed;
8629 }
8630 atts = n;
8631 ctxt->atts = atts;
8632 ctxt->maxatts = maxatts;
8633 }
8634 atts[nbatts++] = attname;
8635 atts[nbatts++] = attvalue;
8636 atts[nbatts] = NULL;
8637 atts[nbatts + 1] = NULL;
8638 } else {
8639 if (attvalue != NULL)
8640 xmlFree(attvalue);
8641 }
8642
8643 failed:
8644
8645 GROW
8646 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8647 break;
8648 if (!IS_BLANK_CH(RAW)) {
8649 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8650 "attributes construct error\n");
8651 }
8652 SKIP_BLANKS;
8653 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8654 (attname == NULL) && (attvalue == NULL)) {
8655 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8656 "xmlParseStartTag: problem parsing attributes\n");
8657 break;
8658 }
8659 SHRINK;
8660 GROW;
8661 }
8662
8663 /*
8664 * SAX: Start of Element !
8665 */
8666 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8667 (!ctxt->disableSAX)) {
8668 if (nbatts > 0)
8669 ctxt->sax->startElement(ctxt->userData, name, atts);
8670 else
8671 ctxt->sax->startElement(ctxt->userData, name, NULL);
8672 }
8673
8674 if (atts != NULL) {
8675 /* Free only the content strings */
8676 for (i = 1;i < nbatts;i+=2)
8677 if (atts[i] != NULL)
8678 xmlFree((xmlChar *) atts[i]);
8679 }
8680 return(name);
8681 }
8682
8683 /**
8684 * xmlParseEndTag1:
8685 * @ctxt: an XML parser context
8686 * @line: line of the start tag
8687 * @nsNr: number of namespaces on the start tag
8688 *
8689 * parse an end of tag
8690 *
8691 * [42] ETag ::= '</' Name S? '>'
8692 *
8693 * With namespace
8694 *
8695 * [NS 9] ETag ::= '</' QName S? '>'
8696 */
8697
8698 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8699 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8700 const xmlChar *name;
8701
8702 GROW;
8703 if ((RAW != '<') || (NXT(1) != '/')) {
8704 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8705 "xmlParseEndTag: '</' not found\n");
8706 return;
8707 }
8708 SKIP(2);
8709
8710 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8711
8712 /*
8713 * We should definitely be at the ending "S? '>'" part
8714 */
8715 GROW;
8716 SKIP_BLANKS;
8717 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8718 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8719 } else
8720 NEXT1;
8721
8722 /*
8723 * [ WFC: Element Type Match ]
8724 * The Name in an element's end-tag must match the element type in the
8725 * start-tag.
8726 *
8727 */
8728 if (name != (xmlChar*)1) {
8729 if (name == NULL) name = BAD_CAST "unparseable";
8730 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8731 "Opening and ending tag mismatch: %s line %d and %s\n",
8732 ctxt->name, line, name);
8733 }
8734
8735 /*
8736 * SAX: End of Tag
8737 */
8738 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8739 (!ctxt->disableSAX))
8740 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8741
8742 namePop(ctxt);
8743 spacePop(ctxt);
8744 return;
8745 }
8746
8747 /**
8748 * xmlParseEndTag:
8749 * @ctxt: an XML parser context
8750 *
8751 * parse an end of tag
8752 *
8753 * [42] ETag ::= '</' Name S? '>'
8754 *
8755 * With namespace
8756 *
8757 * [NS 9] ETag ::= '</' QName S? '>'
8758 */
8759
8760 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8761 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8762 xmlParseEndTag1(ctxt, 0);
8763 }
8764 #endif /* LIBXML_SAX1_ENABLED */
8765
8766 /************************************************************************
8767 * *
8768 * SAX 2 specific operations *
8769 * *
8770 ************************************************************************/
8771
8772 /*
8773 * xmlGetNamespace:
8774 * @ctxt: an XML parser context
8775 * @prefix: the prefix to lookup
8776 *
8777 * Lookup the namespace name for the @prefix (which ca be NULL)
8778 * The prefix must come from the @ctxt->dict dictionnary
8779 *
8780 * Returns the namespace name or NULL if not bound
8781 */
8782 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8783 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8784 int i;
8785
8786 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8787 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8788 if (ctxt->nsTab[i] == prefix) {
8789 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8790 return(NULL);
8791 return(ctxt->nsTab[i + 1]);
8792 }
8793 return(NULL);
8794 }
8795
8796 /**
8797 * xmlParseQName:
8798 * @ctxt: an XML parser context
8799 * @prefix: pointer to store the prefix part
8800 *
8801 * parse an XML Namespace QName
8802 *
8803 * [6] QName ::= (Prefix ':')? LocalPart
8804 * [7] Prefix ::= NCName
8805 * [8] LocalPart ::= NCName
8806 *
8807 * Returns the Name parsed or NULL
8808 */
8809
8810 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8811 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8812 const xmlChar *l, *p;
8813
8814 GROW;
8815
8816 l = xmlParseNCName(ctxt);
8817 if (l == NULL) {
8818 if (CUR == ':') {
8819 l = xmlParseName(ctxt);
8820 if (l != NULL) {
8821 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8822 "Failed to parse QName '%s'\n", l, NULL, NULL);
8823 *prefix = NULL;
8824 return(l);
8825 }
8826 }
8827 return(NULL);
8828 }
8829 if (CUR == ':') {
8830 NEXT;
8831 p = l;
8832 l = xmlParseNCName(ctxt);
8833 if (l == NULL) {
8834 xmlChar *tmp;
8835
8836 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8837 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8838 l = xmlParseNmtoken(ctxt);
8839 if (l == NULL)
8840 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8841 else {
8842 tmp = xmlBuildQName(l, p, NULL, 0);
8843 xmlFree((char *)l);
8844 }
8845 p = xmlDictLookup(ctxt->dict, tmp, -1);
8846 if (tmp != NULL) xmlFree(tmp);
8847 *prefix = NULL;
8848 return(p);
8849 }
8850 if (CUR == ':') {
8851 xmlChar *tmp;
8852
8853 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8854 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8855 NEXT;
8856 tmp = (xmlChar *) xmlParseName(ctxt);
8857 if (tmp != NULL) {
8858 tmp = xmlBuildQName(tmp, l, NULL, 0);
8859 l = xmlDictLookup(ctxt->dict, tmp, -1);
8860 if (tmp != NULL) xmlFree(tmp);
8861 *prefix = p;
8862 return(l);
8863 }
8864 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8865 l = xmlDictLookup(ctxt->dict, tmp, -1);
8866 if (tmp != NULL) xmlFree(tmp);
8867 *prefix = p;
8868 return(l);
8869 }
8870 *prefix = p;
8871 } else
8872 *prefix = NULL;
8873 return(l);
8874 }
8875
8876 /**
8877 * xmlParseQNameAndCompare:
8878 * @ctxt: an XML parser context
8879 * @name: the localname
8880 * @prefix: the prefix, if any.
8881 *
8882 * parse an XML name and compares for match
8883 * (specialized for endtag parsing)
8884 *
8885 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8886 * and the name for mismatch
8887 */
8888
8889 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8890 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8891 xmlChar const *prefix) {
8892 const xmlChar *cmp;
8893 const xmlChar *in;
8894 const xmlChar *ret;
8895 const xmlChar *prefix2;
8896
8897 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8898
8899 GROW;
8900 in = ctxt->input->cur;
8901
8902 cmp = prefix;
8903 while (*in != 0 && *in == *cmp) {
8904 ++in;
8905 ++cmp;
8906 }
8907 if ((*cmp == 0) && (*in == ':')) {
8908 in++;
8909 cmp = name;
8910 while (*in != 0 && *in == *cmp) {
8911 ++in;
8912 ++cmp;
8913 }
8914 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8915 /* success */
8916 ctxt->input->cur = in;
8917 return((const xmlChar*) 1);
8918 }
8919 }
8920 /*
8921 * all strings coms from the dictionary, equality can be done directly
8922 */
8923 ret = xmlParseQName (ctxt, &prefix2);
8924 if ((ret == name) && (prefix == prefix2))
8925 return((const xmlChar*) 1);
8926 return ret;
8927 }
8928
8929 /**
8930 * xmlParseAttValueInternal:
8931 * @ctxt: an XML parser context
8932 * @len: attribute len result
8933 * @alloc: whether the attribute was reallocated as a new string
8934 * @normalize: if 1 then further non-CDATA normalization must be done
8935 *
8936 * parse a value for an attribute.
8937 * NOTE: if no normalization is needed, the routine will return pointers
8938 * directly from the data buffer.
8939 *
8940 * 3.3.3 Attribute-Value Normalization:
8941 * Before the value of an attribute is passed to the application or
8942 * checked for validity, the XML processor must normalize it as follows:
8943 * - a character reference is processed by appending the referenced
8944 * character to the attribute value
8945 * - an entity reference is processed by recursively processing the
8946 * replacement text of the entity
8947 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8948 * appending #x20 to the normalized value, except that only a single
8949 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8950 * parsed entity or the literal entity value of an internal parsed entity
8951 * - other characters are processed by appending them to the normalized value
8952 * If the declared value is not CDATA, then the XML processor must further
8953 * process the normalized attribute value by discarding any leading and
8954 * trailing space (#x20) characters, and by replacing sequences of space
8955 * (#x20) characters by a single space (#x20) character.
8956 * All attributes for which no declaration has been read should be treated
8957 * by a non-validating parser as if declared CDATA.
8958 *
8959 * Returns the AttValue parsed or NULL. The value has to be freed by the
8960 * caller if it was copied, this can be detected by val[*len] == 0.
8961 */
8962
8963 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8964 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8965 int normalize)
8966 {
8967 xmlChar limit = 0;
8968 const xmlChar *in = NULL, *start, *end, *last;
8969 xmlChar *ret = NULL;
8970 int line, col;
8971
8972 GROW;
8973 in = (xmlChar *) CUR_PTR;
8974 line = ctxt->input->line;
8975 col = ctxt->input->col;
8976 if (*in != '"' && *in != '\'') {
8977 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8978 return (NULL);
8979 }
8980 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8981
8982 /*
8983 * try to handle in this routine the most common case where no
8984 * allocation of a new string is required and where content is
8985 * pure ASCII.
8986 */
8987 limit = *in++;
8988 col++;
8989 end = ctxt->input->end;
8990 start = in;
8991 if (in >= end) {
8992 const xmlChar *oldbase = ctxt->input->base;
8993 GROW;
8994 if (oldbase != ctxt->input->base) {
8995 long delta = ctxt->input->base - oldbase;
8996 start = start + delta;
8997 in = in + delta;
8998 }
8999 end = ctxt->input->end;
9000 }
9001 if (normalize) {
9002 /*
9003 * Skip any leading spaces
9004 */
9005 while ((in < end) && (*in != limit) &&
9006 ((*in == 0x20) || (*in == 0x9) ||
9007 (*in == 0xA) || (*in == 0xD))) {
9008 if (*in == 0xA) {
9009 line++; col = 1;
9010 } else {
9011 col++;
9012 }
9013 in++;
9014 start = in;
9015 if (in >= end) {
9016 const xmlChar *oldbase = ctxt->input->base;
9017 GROW;
9018 if (ctxt->instate == XML_PARSER_EOF)
9019 return(NULL);
9020 if (oldbase != ctxt->input->base) {
9021 long delta = ctxt->input->base - oldbase;
9022 start = start + delta;
9023 in = in + delta;
9024 }
9025 end = ctxt->input->end;
9026 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9027 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9028 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9029 "AttValue length too long\n");
9030 return(NULL);
9031 }
9032 }
9033 }
9034 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9035 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9036 col++;
9037 if ((*in++ == 0x20) && (*in == 0x20)) break;
9038 if (in >= end) {
9039 const xmlChar *oldbase = ctxt->input->base;
9040 GROW;
9041 if (ctxt->instate == XML_PARSER_EOF)
9042 return(NULL);
9043 if (oldbase != ctxt->input->base) {
9044 long delta = ctxt->input->base - oldbase;
9045 start = start + delta;
9046 in = in + delta;
9047 }
9048 end = ctxt->input->end;
9049 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9050 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9051 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9052 "AttValue length too long\n");
9053 return(NULL);
9054 }
9055 }
9056 }
9057 last = in;
9058 /*
9059 * skip the trailing blanks
9060 */
9061 while ((last[-1] == 0x20) && (last > start)) last--;
9062 while ((in < end) && (*in != limit) &&
9063 ((*in == 0x20) || (*in == 0x9) ||
9064 (*in == 0xA) || (*in == 0xD))) {
9065 if (*in == 0xA) {
9066 line++, col = 1;
9067 } else {
9068 col++;
9069 }
9070 in++;
9071 if (in >= end) {
9072 const xmlChar *oldbase = ctxt->input->base;
9073 GROW;
9074 if (ctxt->instate == XML_PARSER_EOF)
9075 return(NULL);
9076 if (oldbase != ctxt->input->base) {
9077 long delta = ctxt->input->base - oldbase;
9078 start = start + delta;
9079 in = in + delta;
9080 last = last + delta;
9081 }
9082 end = ctxt->input->end;
9083 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9084 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9085 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9086 "AttValue length too long\n");
9087 return(NULL);
9088 }
9089 }
9090 }
9091 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9092 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9093 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9094 "AttValue length too long\n");
9095 return(NULL);
9096 }
9097 if (*in != limit) goto need_complex;
9098 } else {
9099 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9100 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9101 in++;
9102 col++;
9103 if (in >= end) {
9104 const xmlChar *oldbase = ctxt->input->base;
9105 GROW;
9106 if (ctxt->instate == XML_PARSER_EOF)
9107 return(NULL);
9108 if (oldbase != ctxt->input->base) {
9109 long delta = ctxt->input->base - oldbase;
9110 start = start + delta;
9111 in = in + delta;
9112 }
9113 end = ctxt->input->end;
9114 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9115 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9116 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9117 "AttValue length too long\n");
9118 return(NULL);
9119 }
9120 }
9121 }
9122 last = in;
9123 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9124 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9125 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9126 "AttValue length too long\n");
9127 return(NULL);
9128 }
9129 if (*in != limit) goto need_complex;
9130 }
9131 in++;
9132 col++;
9133 if (len != NULL) {
9134 *len = last - start;
9135 ret = (xmlChar *) start;
9136 } else {
9137 if (alloc) *alloc = 1;
9138 ret = xmlStrndup(start, last - start);
9139 }
9140 CUR_PTR = in;
9141 ctxt->input->line = line;
9142 ctxt->input->col = col;
9143 if (alloc) *alloc = 0;
9144 return ret;
9145 need_complex:
9146 if (alloc) *alloc = 1;
9147 return xmlParseAttValueComplex(ctxt, len, normalize);
9148 }
9149
9150 /**
9151 * xmlParseAttribute2:
9152 * @ctxt: an XML parser context
9153 * @pref: the element prefix
9154 * @elem: the element name
9155 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9156 * @value: a xmlChar ** used to store the value of the attribute
9157 * @len: an int * to save the length of the attribute
9158 * @alloc: an int * to indicate if the attribute was allocated
9159 *
9160 * parse an attribute in the new SAX2 framework.
9161 *
9162 * Returns the attribute name, and the value in *value, .
9163 */
9164
9165 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9166 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9167 const xmlChar * pref, const xmlChar * elem,
9168 const xmlChar ** prefix, xmlChar ** value,
9169 int *len, int *alloc)
9170 {
9171 const xmlChar *name;
9172 xmlChar *val, *internal_val = NULL;
9173 int normalize = 0;
9174
9175 *value = NULL;
9176 GROW;
9177 name = xmlParseQName(ctxt, prefix);
9178 if (name == NULL) {
9179 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9180 "error parsing attribute name\n");
9181 return (NULL);
9182 }
9183
9184 /*
9185 * get the type if needed
9186 */
9187 if (ctxt->attsSpecial != NULL) {
9188 int type;
9189
9190 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9191 pref, elem, *prefix, name);
9192 if (type != 0)
9193 normalize = 1;
9194 }
9195
9196 /*
9197 * read the value
9198 */
9199 SKIP_BLANKS;
9200 if (RAW == '=') {
9201 NEXT;
9202 SKIP_BLANKS;
9203 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9204 if (normalize) {
9205 /*
9206 * Sometimes a second normalisation pass for spaces is needed
9207 * but that only happens if charrefs or entities refernces
9208 * have been used in the attribute value, i.e. the attribute
9209 * value have been extracted in an allocated string already.
9210 */
9211 if (*alloc) {
9212 const xmlChar *val2;
9213
9214 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9215 if ((val2 != NULL) && (val2 != val)) {
9216 xmlFree(val);
9217 val = (xmlChar *) val2;
9218 }
9219 }
9220 }
9221 ctxt->instate = XML_PARSER_CONTENT;
9222 } else {
9223 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9224 "Specification mandate value for attribute %s\n",
9225 name);
9226 return (NULL);
9227 }
9228
9229 if (*prefix == ctxt->str_xml) {
9230 /*
9231 * Check that xml:lang conforms to the specification
9232 * No more registered as an error, just generate a warning now
9233 * since this was deprecated in XML second edition
9234 */
9235 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9236 internal_val = xmlStrndup(val, *len);
9237 if (!xmlCheckLanguageID(internal_val)) {
9238 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9239 "Malformed value for xml:lang : %s\n",
9240 internal_val, NULL);
9241 }
9242 }
9243
9244 /*
9245 * Check that xml:space conforms to the specification
9246 */
9247 if (xmlStrEqual(name, BAD_CAST "space")) {
9248 internal_val = xmlStrndup(val, *len);
9249 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9250 *(ctxt->space) = 0;
9251 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9252 *(ctxt->space) = 1;
9253 else {
9254 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9255 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9256 internal_val, NULL);
9257 }
9258 }
9259 if (internal_val) {
9260 xmlFree(internal_val);
9261 }
9262 }
9263
9264 *value = val;
9265 return (name);
9266 }
9267 /**
9268 * xmlParseStartTag2:
9269 * @ctxt: an XML parser context
9270 *
9271 * parse a start of tag either for rule element or
9272 * EmptyElement. In both case we don't parse the tag closing chars.
9273 * This routine is called when running SAX2 parsing
9274 *
9275 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9276 *
9277 * [ WFC: Unique Att Spec ]
9278 * No attribute name may appear more than once in the same start-tag or
9279 * empty-element tag.
9280 *
9281 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9282 *
9283 * [ WFC: Unique Att Spec ]
9284 * No attribute name may appear more than once in the same start-tag or
9285 * empty-element tag.
9286 *
9287 * With namespace:
9288 *
9289 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9290 *
9291 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9292 *
9293 * Returns the element name parsed
9294 */
9295
9296 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9297 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9298 const xmlChar **URI, int *tlen) {
9299 const xmlChar *localname;
9300 const xmlChar *prefix;
9301 const xmlChar *attname;
9302 const xmlChar *aprefix;
9303 const xmlChar *nsname;
9304 xmlChar *attvalue;
9305 const xmlChar **atts = ctxt->atts;
9306 int maxatts = ctxt->maxatts;
9307 int nratts, nbatts, nbdef;
9308 int i, j, nbNs, attval, oldline, oldcol;
9309 const xmlChar *base;
9310 unsigned long cur;
9311 int nsNr = ctxt->nsNr;
9312
9313 if (RAW != '<') return(NULL);
9314 NEXT1;
9315
9316 /*
9317 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9318 * point since the attribute values may be stored as pointers to
9319 * the buffer and calling SHRINK would destroy them !
9320 * The Shrinking is only possible once the full set of attribute
9321 * callbacks have been done.
9322 */
9323 reparse:
9324 SHRINK;
9325 base = ctxt->input->base;
9326 cur = ctxt->input->cur - ctxt->input->base;
9327 oldline = ctxt->input->line;
9328 oldcol = ctxt->input->col;
9329 nbatts = 0;
9330 nratts = 0;
9331 nbdef = 0;
9332 nbNs = 0;
9333 attval = 0;
9334 /* Forget any namespaces added during an earlier parse of this element. */
9335 ctxt->nsNr = nsNr;
9336
9337 localname = xmlParseQName(ctxt, &prefix);
9338 if (localname == NULL) {
9339 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9340 "StartTag: invalid element name\n");
9341 return(NULL);
9342 }
9343 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9344
9345 /*
9346 * Now parse the attributes, it ends up with the ending
9347 *
9348 * (S Attribute)* S?
9349 */
9350 SKIP_BLANKS;
9351 GROW;
9352 if (ctxt->input->base != base) goto base_changed;
9353
9354 while (((RAW != '>') &&
9355 ((RAW != '/') || (NXT(1) != '>')) &&
9356 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9357 const xmlChar *q = CUR_PTR;
9358 unsigned int cons = ctxt->input->consumed;
9359 int len = -1, alloc = 0;
9360
9361 attname = xmlParseAttribute2(ctxt, prefix, localname,
9362 &aprefix, &attvalue, &len, &alloc);
9363 if (ctxt->input->base != base) {
9364 if ((attvalue != NULL) && (alloc != 0))
9365 xmlFree(attvalue);
9366 attvalue = NULL;
9367 goto base_changed;
9368 }
9369 if ((attname != NULL) && (attvalue != NULL)) {
9370 if (len < 0) len = xmlStrlen(attvalue);
9371 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9372 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9373 xmlURIPtr uri;
9374
9375 if (URL == NULL) {
9376 xmlErrMemory(ctxt, "dictionary allocation failure");
9377 if ((attvalue != NULL) && (alloc != 0))
9378 xmlFree(attvalue);
9379 return(NULL);
9380 }
9381 if (*URL != 0) {
9382 uri = xmlParseURI((const char *) URL);
9383 if (uri == NULL) {
9384 xmlNsErr(ctxt, XML_WAR_NS_URI,
9385 "xmlns: '%s' is not a valid URI\n",
9386 URL, NULL, NULL);
9387 } else {
9388 if (uri->scheme == NULL) {
9389 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9390 "xmlns: URI %s is not absolute\n",
9391 URL, NULL, NULL);
9392 }
9393 xmlFreeURI(uri);
9394 }
9395 if (URL == ctxt->str_xml_ns) {
9396 if (attname != ctxt->str_xml) {
9397 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9398 "xml namespace URI cannot be the default namespace\n",
9399 NULL, NULL, NULL);
9400 }
9401 goto skip_default_ns;
9402 }
9403 if ((len == 29) &&
9404 (xmlStrEqual(URL,
9405 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9406 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9407 "reuse of the xmlns namespace name is forbidden\n",
9408 NULL, NULL, NULL);
9409 goto skip_default_ns;
9410 }
9411 }
9412 /*
9413 * check that it's not a defined namespace
9414 */
9415 for (j = 1;j <= nbNs;j++)
9416 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9417 break;
9418 if (j <= nbNs)
9419 xmlErrAttributeDup(ctxt, NULL, attname);
9420 else
9421 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9422 skip_default_ns:
9423 if (alloc != 0) xmlFree(attvalue);
9424 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9425 break;
9426 if (!IS_BLANK_CH(RAW)) {
9427 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9428 "attributes construct error\n");
9429 break;
9430 }
9431 SKIP_BLANKS;
9432 continue;
9433 }
9434 if (aprefix == ctxt->str_xmlns) {
9435 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9436 xmlURIPtr uri;
9437
9438 if (attname == ctxt->str_xml) {
9439 if (URL != ctxt->str_xml_ns) {
9440 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9441 "xml namespace prefix mapped to wrong URI\n",
9442 NULL, NULL, NULL);
9443 }
9444 /*
9445 * Do not keep a namespace definition node
9446 */
9447 goto skip_ns;
9448 }
9449 if (URL == ctxt->str_xml_ns) {
9450 if (attname != ctxt->str_xml) {
9451 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9452 "xml namespace URI mapped to wrong prefix\n",
9453 NULL, NULL, NULL);
9454 }
9455 goto skip_ns;
9456 }
9457 if (attname == ctxt->str_xmlns) {
9458 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459 "redefinition of the xmlns prefix is forbidden\n",
9460 NULL, NULL, NULL);
9461 goto skip_ns;
9462 }
9463 if ((len == 29) &&
9464 (xmlStrEqual(URL,
9465 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9466 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9467 "reuse of the xmlns namespace name is forbidden\n",
9468 NULL, NULL, NULL);
9469 goto skip_ns;
9470 }
9471 if ((URL == NULL) || (URL[0] == 0)) {
9472 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9473 "xmlns:%s: Empty XML namespace is not allowed\n",
9474 attname, NULL, NULL);
9475 goto skip_ns;
9476 } else {
9477 uri = xmlParseURI((const char *) URL);
9478 if (uri == NULL) {
9479 xmlNsErr(ctxt, XML_WAR_NS_URI,
9480 "xmlns:%s: '%s' is not a valid URI\n",
9481 attname, URL, NULL);
9482 } else {
9483 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9484 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9485 "xmlns:%s: URI %s is not absolute\n",
9486 attname, URL, NULL);
9487 }
9488 xmlFreeURI(uri);
9489 }
9490 }
9491
9492 /*
9493 * check that it's not a defined namespace
9494 */
9495 for (j = 1;j <= nbNs;j++)
9496 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9497 break;
9498 if (j <= nbNs)
9499 xmlErrAttributeDup(ctxt, aprefix, attname);
9500 else
9501 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9502 skip_ns:
9503 if (alloc != 0) xmlFree(attvalue);
9504 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9505 break;
9506 if (!IS_BLANK_CH(RAW)) {
9507 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9508 "attributes construct error\n");
9509 break;
9510 }
9511 SKIP_BLANKS;
9512 if (ctxt->input->base != base) goto base_changed;
9513 continue;
9514 }
9515
9516 /*
9517 * Add the pair to atts
9518 */
9519 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9520 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9521 if (attvalue[len] == 0)
9522 xmlFree(attvalue);
9523 goto failed;
9524 }
9525 maxatts = ctxt->maxatts;
9526 atts = ctxt->atts;
9527 }
9528 ctxt->attallocs[nratts++] = alloc;
9529 atts[nbatts++] = attname;
9530 atts[nbatts++] = aprefix;
9531 atts[nbatts++] = NULL; /* the URI will be fetched later */
9532 atts[nbatts++] = attvalue;
9533 attvalue += len;
9534 atts[nbatts++] = attvalue;
9535 /*
9536 * tag if some deallocation is needed
9537 */
9538 if (alloc != 0) attval = 1;
9539 } else {
9540 if ((attvalue != NULL) && (attvalue[len] == 0))
9541 xmlFree(attvalue);
9542 }
9543
9544 failed:
9545
9546 GROW
9547 if (ctxt->instate == XML_PARSER_EOF)
9548 break;
9549 if (ctxt->input->base != base) goto base_changed;
9550 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9551 break;
9552 if (!IS_BLANK_CH(RAW)) {
9553 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9554 "attributes construct error\n");
9555 break;
9556 }
9557 SKIP_BLANKS;
9558 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9559 (attname == NULL) && (attvalue == NULL)) {
9560 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9561 "xmlParseStartTag: problem parsing attributes\n");
9562 break;
9563 }
9564 GROW;
9565 if (ctxt->input->base != base) goto base_changed;
9566 }
9567
9568 /*
9569 * The attributes defaulting
9570 */
9571 if (ctxt->attsDefault != NULL) {
9572 xmlDefAttrsPtr defaults;
9573
9574 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9575 if (defaults != NULL) {
9576 for (i = 0;i < defaults->nbAttrs;i++) {
9577 attname = defaults->values[5 * i];
9578 aprefix = defaults->values[5 * i + 1];
9579
9580 /*
9581 * special work for namespaces defaulted defs
9582 */
9583 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9584 /*
9585 * check that it's not a defined namespace
9586 */
9587 for (j = 1;j <= nbNs;j++)
9588 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9589 break;
9590 if (j <= nbNs) continue;
9591
9592 nsname = xmlGetNamespace(ctxt, NULL);
9593 if (nsname != defaults->values[5 * i + 2]) {
9594 if (nsPush(ctxt, NULL,
9595 defaults->values[5 * i + 2]) > 0)
9596 nbNs++;
9597 }
9598 } else if (aprefix == ctxt->str_xmlns) {
9599 /*
9600 * check that it's not a defined namespace
9601 */
9602 for (j = 1;j <= nbNs;j++)
9603 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9604 break;
9605 if (j <= nbNs) continue;
9606
9607 nsname = xmlGetNamespace(ctxt, attname);
9608 if (nsname != defaults->values[2]) {
9609 if (nsPush(ctxt, attname,
9610 defaults->values[5 * i + 2]) > 0)
9611 nbNs++;
9612 }
9613 } else {
9614 /*
9615 * check that it's not a defined attribute
9616 */
9617 for (j = 0;j < nbatts;j+=5) {
9618 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9619 break;
9620 }
9621 if (j < nbatts) continue;
9622
9623 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9624 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9625 return(NULL);
9626 }
9627 maxatts = ctxt->maxatts;
9628 atts = ctxt->atts;
9629 }
9630 atts[nbatts++] = attname;
9631 atts[nbatts++] = aprefix;
9632 if (aprefix == NULL)
9633 atts[nbatts++] = NULL;
9634 else
9635 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9636 atts[nbatts++] = defaults->values[5 * i + 2];
9637 atts[nbatts++] = defaults->values[5 * i + 3];
9638 if ((ctxt->standalone == 1) &&
9639 (defaults->values[5 * i + 4] != NULL)) {
9640 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9641 "standalone: attribute %s on %s defaulted from external subset\n",
9642 attname, localname);
9643 }
9644 nbdef++;
9645 }
9646 }
9647 }
9648 }
9649
9650 /*
9651 * The attributes checkings
9652 */
9653 for (i = 0; i < nbatts;i += 5) {
9654 /*
9655 * The default namespace does not apply to attribute names.
9656 */
9657 if (atts[i + 1] != NULL) {
9658 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9659 if (nsname == NULL) {
9660 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9661 "Namespace prefix %s for %s on %s is not defined\n",
9662 atts[i + 1], atts[i], localname);
9663 }
9664 atts[i + 2] = nsname;
9665 } else
9666 nsname = NULL;
9667 /*
9668 * [ WFC: Unique Att Spec ]
9669 * No attribute name may appear more than once in the same
9670 * start-tag or empty-element tag.
9671 * As extended by the Namespace in XML REC.
9672 */
9673 for (j = 0; j < i;j += 5) {
9674 if (atts[i] == atts[j]) {
9675 if (atts[i+1] == atts[j+1]) {
9676 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9677 break;
9678 }
9679 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9680 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9681 "Namespaced Attribute %s in '%s' redefined\n",
9682 atts[i], nsname, NULL);
9683 break;
9684 }
9685 }
9686 }
9687 }
9688
9689 nsname = xmlGetNamespace(ctxt, prefix);
9690 if ((prefix != NULL) && (nsname == NULL)) {
9691 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9692 "Namespace prefix %s on %s is not defined\n",
9693 prefix, localname, NULL);
9694 }
9695 *pref = prefix;
9696 *URI = nsname;
9697
9698 /*
9699 * SAX: Start of Element !
9700 */
9701 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9702 (!ctxt->disableSAX)) {
9703 if (nbNs > 0)
9704 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9705 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9706 nbatts / 5, nbdef, atts);
9707 else
9708 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9709 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9710 }
9711
9712 /*
9713 * Free up attribute allocated strings if needed
9714 */
9715 if (attval != 0) {
9716 for (i = 3,j = 0; j < nratts;i += 5,j++)
9717 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9718 xmlFree((xmlChar *) atts[i]);
9719 }
9720
9721 return(localname);
9722
9723 base_changed:
9724 /*
9725 * the attribute strings are valid iif the base didn't changed
9726 */
9727 if (attval != 0) {
9728 for (i = 3,j = 0; j < nratts;i += 5,j++)
9729 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9730 xmlFree((xmlChar *) atts[i]);
9731 }
9732 ctxt->input->cur = ctxt->input->base + cur;
9733 ctxt->input->line = oldline;
9734 ctxt->input->col = oldcol;
9735 if (ctxt->wellFormed == 1) {
9736 goto reparse;
9737 }
9738 return(NULL);
9739 }
9740
9741 /**
9742 * xmlParseEndTag2:
9743 * @ctxt: an XML parser context
9744 * @line: line of the start tag
9745 * @nsNr: number of namespaces on the start tag
9746 *
9747 * parse an end of tag
9748 *
9749 * [42] ETag ::= '</' Name S? '>'
9750 *
9751 * With namespace
9752 *
9753 * [NS 9] ETag ::= '</' QName S? '>'
9754 */
9755
9756 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9757 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9758 const xmlChar *URI, int line, int nsNr, int tlen) {
9759 const xmlChar *name;
9760
9761 GROW;
9762 if ((RAW != '<') || (NXT(1) != '/')) {
9763 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9764 return;
9765 }
9766 SKIP(2);
9767
9768 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9769 if (ctxt->input->cur[tlen] == '>') {
9770 ctxt->input->cur += tlen + 1;
9771 ctxt->input->col += tlen + 1;
9772 goto done;
9773 }
9774 ctxt->input->cur += tlen;
9775 ctxt->input->col += tlen;
9776 name = (xmlChar*)1;
9777 } else {
9778 if (prefix == NULL)
9779 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9780 else
9781 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9782 }
9783
9784 /*
9785 * We should definitely be at the ending "S? '>'" part
9786 */
9787 GROW;
9788 if (ctxt->instate == XML_PARSER_EOF)
9789 return;
9790 SKIP_BLANKS;
9791 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9792 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9793 } else
9794 NEXT1;
9795
9796 /*
9797 * [ WFC: Element Type Match ]
9798 * The Name in an element's end-tag must match the element type in the
9799 * start-tag.
9800 *
9801 */
9802 if (name != (xmlChar*)1) {
9803 if (name == NULL) name = BAD_CAST "unparseable";
9804 if ((line == 0) && (ctxt->node != NULL))
9805 line = ctxt->node->line;
9806 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9807 "Opening and ending tag mismatch: %s line %d and %s\n",
9808 ctxt->name, line, name);
9809 }
9810
9811 /*
9812 * SAX: End of Tag
9813 */
9814 done:
9815 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9816 (!ctxt->disableSAX))
9817 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9818
9819 spacePop(ctxt);
9820 if (nsNr != 0)
9821 nsPop(ctxt, nsNr);
9822 return;
9823 }
9824
9825 /**
9826 * xmlParseCDSect:
9827 * @ctxt: an XML parser context
9828 *
9829 * Parse escaped pure raw content.
9830 *
9831 * [18] CDSect ::= CDStart CData CDEnd
9832 *
9833 * [19] CDStart ::= '<![CDATA['
9834 *
9835 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9836 *
9837 * [21] CDEnd ::= ']]>'
9838 */
9839 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9840 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9841 xmlChar *buf = NULL;
9842 int len = 0;
9843 int size = XML_PARSER_BUFFER_SIZE;
9844 int r, rl;
9845 int s, sl;
9846 int cur, l;
9847 int count = 0;
9848
9849 /* Check 2.6.0 was NXT(0) not RAW */
9850 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9851 SKIP(9);
9852 } else
9853 return;
9854
9855 ctxt->instate = XML_PARSER_CDATA_SECTION;
9856 r = CUR_CHAR(rl);
9857 if (!IS_CHAR(r)) {
9858 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9859 ctxt->instate = XML_PARSER_CONTENT;
9860 return;
9861 }
9862 NEXTL(rl);
9863 s = CUR_CHAR(sl);
9864 if (!IS_CHAR(s)) {
9865 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9866 ctxt->instate = XML_PARSER_CONTENT;
9867 return;
9868 }
9869 NEXTL(sl);
9870 cur = CUR_CHAR(l);
9871 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9872 if (buf == NULL) {
9873 xmlErrMemory(ctxt, NULL);
9874 return;
9875 }
9876 while (IS_CHAR(cur) &&
9877 ((r != ']') || (s != ']') || (cur != '>'))) {
9878 if (len + 5 >= size) {
9879 xmlChar *tmp;
9880
9881 if ((size > XML_MAX_TEXT_LENGTH) &&
9882 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9883 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9884 "CData section too big found", NULL);
9885 xmlFree (buf);
9886 return;
9887 }
9888 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9889 if (tmp == NULL) {
9890 xmlFree(buf);
9891 xmlErrMemory(ctxt, NULL);
9892 return;
9893 }
9894 buf = tmp;
9895 size *= 2;
9896 }
9897 COPY_BUF(rl,buf,len,r);
9898 r = s;
9899 rl = sl;
9900 s = cur;
9901 sl = l;
9902 count++;
9903 if (count > 50) {
9904 GROW;
9905 if (ctxt->instate == XML_PARSER_EOF) {
9906 xmlFree(buf);
9907 return;
9908 }
9909 count = 0;
9910 }
9911 NEXTL(l);
9912 cur = CUR_CHAR(l);
9913 }
9914 buf[len] = 0;
9915 ctxt->instate = XML_PARSER_CONTENT;
9916 if (cur != '>') {
9917 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9918 "CData section not finished\n%.50s\n", buf);
9919 xmlFree(buf);
9920 return;
9921 }
9922 NEXTL(l);
9923
9924 /*
9925 * OK the buffer is to be consumed as cdata.
9926 */
9927 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9928 if (ctxt->sax->cdataBlock != NULL)
9929 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9930 else if (ctxt->sax->characters != NULL)
9931 ctxt->sax->characters(ctxt->userData, buf, len);
9932 }
9933 xmlFree(buf);
9934 }
9935
9936 /**
9937 * xmlParseContent:
9938 * @ctxt: an XML parser context
9939 *
9940 * Parse a content:
9941 *
9942 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9943 */
9944
9945 void
xmlParseContent(xmlParserCtxtPtr ctxt)9946 xmlParseContent(xmlParserCtxtPtr ctxt) {
9947 GROW;
9948 while ((RAW != 0) &&
9949 ((RAW != '<') || (NXT(1) != '/')) &&
9950 (ctxt->instate != XML_PARSER_EOF)) {
9951 const xmlChar *test = CUR_PTR;
9952 unsigned int cons = ctxt->input->consumed;
9953 const xmlChar *cur = ctxt->input->cur;
9954
9955 /*
9956 * First case : a Processing Instruction.
9957 */
9958 if ((*cur == '<') && (cur[1] == '?')) {
9959 xmlParsePI(ctxt);
9960 }
9961
9962 /*
9963 * Second case : a CDSection
9964 */
9965 /* 2.6.0 test was *cur not RAW */
9966 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9967 xmlParseCDSect(ctxt);
9968 }
9969
9970 /*
9971 * Third case : a comment
9972 */
9973 else if ((*cur == '<') && (NXT(1) == '!') &&
9974 (NXT(2) == '-') && (NXT(3) == '-')) {
9975 xmlParseComment(ctxt);
9976 ctxt->instate = XML_PARSER_CONTENT;
9977 }
9978
9979 /*
9980 * Fourth case : a sub-element.
9981 */
9982 else if (*cur == '<') {
9983 xmlParseElement(ctxt);
9984 }
9985
9986 /*
9987 * Fifth case : a reference. If if has not been resolved,
9988 * parsing returns it's Name, create the node
9989 */
9990
9991 else if (*cur == '&') {
9992 xmlParseReference(ctxt);
9993 }
9994
9995 /*
9996 * Last case, text. Note that References are handled directly.
9997 */
9998 else {
9999 xmlParseCharData(ctxt, 0);
10000 }
10001
10002 GROW;
10003 /*
10004 * Pop-up of finished entities.
10005 */
10006 while ((RAW == 0) && (ctxt->inputNr > 1))
10007 xmlPopInput(ctxt);
10008 SHRINK;
10009
10010 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10011 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10012 "detected an error in element content\n");
10013 ctxt->instate = XML_PARSER_EOF;
10014 break;
10015 }
10016 }
10017 }
10018
10019 /**
10020 * xmlParseElement:
10021 * @ctxt: an XML parser context
10022 *
10023 * parse an XML element, this is highly recursive
10024 *
10025 * [39] element ::= EmptyElemTag | STag content ETag
10026 *
10027 * [ WFC: Element Type Match ]
10028 * The Name in an element's end-tag must match the element type in the
10029 * start-tag.
10030 *
10031 */
10032
10033 void
xmlParseElement(xmlParserCtxtPtr ctxt)10034 xmlParseElement(xmlParserCtxtPtr ctxt) {
10035 const xmlChar *name;
10036 const xmlChar *prefix = NULL;
10037 const xmlChar *URI = NULL;
10038 xmlParserNodeInfo node_info;
10039 int line, tlen = 0;
10040 xmlNodePtr ret;
10041 int nsNr = ctxt->nsNr;
10042
10043 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10044 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10045 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10046 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10047 xmlParserMaxDepth);
10048 ctxt->instate = XML_PARSER_EOF;
10049 return;
10050 }
10051
10052 /* Capture start position */
10053 if (ctxt->record_info) {
10054 node_info.begin_pos = ctxt->input->consumed +
10055 (CUR_PTR - ctxt->input->base);
10056 node_info.begin_line = ctxt->input->line;
10057 }
10058
10059 if (ctxt->spaceNr == 0)
10060 spacePush(ctxt, -1);
10061 else if (*ctxt->space == -2)
10062 spacePush(ctxt, -1);
10063 else
10064 spacePush(ctxt, *ctxt->space);
10065
10066 line = ctxt->input->line;
10067 #ifdef LIBXML_SAX1_ENABLED
10068 if (ctxt->sax2)
10069 #endif /* LIBXML_SAX1_ENABLED */
10070 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10071 #ifdef LIBXML_SAX1_ENABLED
10072 else
10073 name = xmlParseStartTag(ctxt);
10074 #endif /* LIBXML_SAX1_ENABLED */
10075 if (ctxt->instate == XML_PARSER_EOF)
10076 return;
10077 if (name == NULL) {
10078 spacePop(ctxt);
10079 return;
10080 }
10081 namePush(ctxt, name);
10082 ret = ctxt->node;
10083
10084 #ifdef LIBXML_VALID_ENABLED
10085 /*
10086 * [ VC: Root Element Type ]
10087 * The Name in the document type declaration must match the element
10088 * type of the root element.
10089 */
10090 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10091 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10092 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10093 #endif /* LIBXML_VALID_ENABLED */
10094
10095 /*
10096 * Check for an Empty Element.
10097 */
10098 if ((RAW == '/') && (NXT(1) == '>')) {
10099 SKIP(2);
10100 if (ctxt->sax2) {
10101 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10102 (!ctxt->disableSAX))
10103 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10104 #ifdef LIBXML_SAX1_ENABLED
10105 } else {
10106 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10107 (!ctxt->disableSAX))
10108 ctxt->sax->endElement(ctxt->userData, name);
10109 #endif /* LIBXML_SAX1_ENABLED */
10110 }
10111 namePop(ctxt);
10112 spacePop(ctxt);
10113 if (nsNr != ctxt->nsNr)
10114 nsPop(ctxt, ctxt->nsNr - nsNr);
10115 if ( ret != NULL && ctxt->record_info ) {
10116 node_info.end_pos = ctxt->input->consumed +
10117 (CUR_PTR - ctxt->input->base);
10118 node_info.end_line = ctxt->input->line;
10119 node_info.node = ret;
10120 xmlParserAddNodeInfo(ctxt, &node_info);
10121 }
10122 return;
10123 }
10124 if (RAW == '>') {
10125 NEXT1;
10126 } else {
10127 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10128 "Couldn't find end of Start Tag %s line %d\n",
10129 name, line, NULL);
10130
10131 /*
10132 * end of parsing of this node.
10133 */
10134 nodePop(ctxt);
10135 namePop(ctxt);
10136 spacePop(ctxt);
10137 if (nsNr != ctxt->nsNr)
10138 nsPop(ctxt, ctxt->nsNr - nsNr);
10139
10140 /*
10141 * Capture end position and add node
10142 */
10143 if ( ret != NULL && ctxt->record_info ) {
10144 node_info.end_pos = ctxt->input->consumed +
10145 (CUR_PTR - ctxt->input->base);
10146 node_info.end_line = ctxt->input->line;
10147 node_info.node = ret;
10148 xmlParserAddNodeInfo(ctxt, &node_info);
10149 }
10150 return;
10151 }
10152
10153 /*
10154 * Parse the content of the element:
10155 */
10156 xmlParseContent(ctxt);
10157 if (ctxt->instate == XML_PARSER_EOF)
10158 return;
10159 if (!IS_BYTE_CHAR(RAW)) {
10160 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10161 "Premature end of data in tag %s line %d\n",
10162 name, line, NULL);
10163
10164 /*
10165 * end of parsing of this node.
10166 */
10167 nodePop(ctxt);
10168 namePop(ctxt);
10169 spacePop(ctxt);
10170 if (nsNr != ctxt->nsNr)
10171 nsPop(ctxt, ctxt->nsNr - nsNr);
10172 return;
10173 }
10174
10175 /*
10176 * parse the end of tag: '</' should be here.
10177 */
10178 if (ctxt->sax2) {
10179 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10180 namePop(ctxt);
10181 }
10182 #ifdef LIBXML_SAX1_ENABLED
10183 else
10184 xmlParseEndTag1(ctxt, line);
10185 #endif /* LIBXML_SAX1_ENABLED */
10186
10187 /*
10188 * Capture end position and add node
10189 */
10190 if ( ret != NULL && ctxt->record_info ) {
10191 node_info.end_pos = ctxt->input->consumed +
10192 (CUR_PTR - ctxt->input->base);
10193 node_info.end_line = ctxt->input->line;
10194 node_info.node = ret;
10195 xmlParserAddNodeInfo(ctxt, &node_info);
10196 }
10197 }
10198
10199 /**
10200 * xmlParseVersionNum:
10201 * @ctxt: an XML parser context
10202 *
10203 * parse the XML version value.
10204 *
10205 * [26] VersionNum ::= '1.' [0-9]+
10206 *
10207 * In practice allow [0-9].[0-9]+ at that level
10208 *
10209 * Returns the string giving the XML version number, or NULL
10210 */
10211 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10212 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10213 xmlChar *buf = NULL;
10214 int len = 0;
10215 int size = 10;
10216 xmlChar cur;
10217
10218 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10219 if (buf == NULL) {
10220 xmlErrMemory(ctxt, NULL);
10221 return(NULL);
10222 }
10223 cur = CUR;
10224 if (!((cur >= '0') && (cur <= '9'))) {
10225 xmlFree(buf);
10226 return(NULL);
10227 }
10228 buf[len++] = cur;
10229 NEXT;
10230 cur=CUR;
10231 if (cur != '.') {
10232 xmlFree(buf);
10233 return(NULL);
10234 }
10235 buf[len++] = cur;
10236 NEXT;
10237 cur=CUR;
10238 while ((cur >= '0') && (cur <= '9')) {
10239 if (len + 1 >= size) {
10240 xmlChar *tmp;
10241
10242 size *= 2;
10243 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10244 if (tmp == NULL) {
10245 xmlFree(buf);
10246 xmlErrMemory(ctxt, NULL);
10247 return(NULL);
10248 }
10249 buf = tmp;
10250 }
10251 buf[len++] = cur;
10252 NEXT;
10253 cur=CUR;
10254 }
10255 buf[len] = 0;
10256 return(buf);
10257 }
10258
10259 /**
10260 * xmlParseVersionInfo:
10261 * @ctxt: an XML parser context
10262 *
10263 * parse the XML version.
10264 *
10265 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10266 *
10267 * [25] Eq ::= S? '=' S?
10268 *
10269 * Returns the version string, e.g. "1.0"
10270 */
10271
10272 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10273 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10274 xmlChar *version = NULL;
10275
10276 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10277 SKIP(7);
10278 SKIP_BLANKS;
10279 if (RAW != '=') {
10280 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10281 return(NULL);
10282 }
10283 NEXT;
10284 SKIP_BLANKS;
10285 if (RAW == '"') {
10286 NEXT;
10287 version = xmlParseVersionNum(ctxt);
10288 if (RAW != '"') {
10289 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10290 } else
10291 NEXT;
10292 } else if (RAW == '\''){
10293 NEXT;
10294 version = xmlParseVersionNum(ctxt);
10295 if (RAW != '\'') {
10296 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10297 } else
10298 NEXT;
10299 } else {
10300 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10301 }
10302 }
10303 return(version);
10304 }
10305
10306 /**
10307 * xmlParseEncName:
10308 * @ctxt: an XML parser context
10309 *
10310 * parse the XML encoding name
10311 *
10312 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10313 *
10314 * Returns the encoding name value or NULL
10315 */
10316 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10317 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10318 xmlChar *buf = NULL;
10319 int len = 0;
10320 int size = 10;
10321 xmlChar cur;
10322
10323 cur = CUR;
10324 if (((cur >= 'a') && (cur <= 'z')) ||
10325 ((cur >= 'A') && (cur <= 'Z'))) {
10326 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10327 if (buf == NULL) {
10328 xmlErrMemory(ctxt, NULL);
10329 return(NULL);
10330 }
10331
10332 buf[len++] = cur;
10333 NEXT;
10334 cur = CUR;
10335 while (((cur >= 'a') && (cur <= 'z')) ||
10336 ((cur >= 'A') && (cur <= 'Z')) ||
10337 ((cur >= '0') && (cur <= '9')) ||
10338 (cur == '.') || (cur == '_') ||
10339 (cur == '-')) {
10340 if (len + 1 >= size) {
10341 xmlChar *tmp;
10342
10343 size *= 2;
10344 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10345 if (tmp == NULL) {
10346 xmlErrMemory(ctxt, NULL);
10347 xmlFree(buf);
10348 return(NULL);
10349 }
10350 buf = tmp;
10351 }
10352 buf[len++] = cur;
10353 NEXT;
10354 cur = CUR;
10355 if (cur == 0) {
10356 SHRINK;
10357 GROW;
10358 cur = CUR;
10359 }
10360 }
10361 buf[len] = 0;
10362 } else {
10363 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10364 }
10365 return(buf);
10366 }
10367
10368 /**
10369 * xmlParseEncodingDecl:
10370 * @ctxt: an XML parser context
10371 *
10372 * parse the XML encoding declaration
10373 *
10374 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10375 *
10376 * this setups the conversion filters.
10377 *
10378 * Returns the encoding value or NULL
10379 */
10380
10381 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10382 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10383 xmlChar *encoding = NULL;
10384
10385 SKIP_BLANKS;
10386 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10387 SKIP(8);
10388 SKIP_BLANKS;
10389 if (RAW != '=') {
10390 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10391 return(NULL);
10392 }
10393 NEXT;
10394 SKIP_BLANKS;
10395 if (RAW == '"') {
10396 NEXT;
10397 encoding = xmlParseEncName(ctxt);
10398 if (RAW != '"') {
10399 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10400 } else
10401 NEXT;
10402 } else if (RAW == '\''){
10403 NEXT;
10404 encoding = xmlParseEncName(ctxt);
10405 if (RAW != '\'') {
10406 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10407 } else
10408 NEXT;
10409 } else {
10410 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10411 }
10412
10413 /*
10414 * Non standard parsing, allowing the user to ignore encoding
10415 */
10416 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10417 xmlFree((xmlChar *) encoding);
10418 return(NULL);
10419 }
10420
10421 /*
10422 * UTF-16 encoding stwich has already taken place at this stage,
10423 * more over the little-endian/big-endian selection is already done
10424 */
10425 if ((encoding != NULL) &&
10426 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10427 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10428 /*
10429 * If no encoding was passed to the parser, that we are
10430 * using UTF-16 and no decoder is present i.e. the
10431 * document is apparently UTF-8 compatible, then raise an
10432 * encoding mismatch fatal error
10433 */
10434 if ((ctxt->encoding == NULL) &&
10435 (ctxt->input->buf != NULL) &&
10436 (ctxt->input->buf->encoder == NULL)) {
10437 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10438 "Document labelled UTF-16 but has UTF-8 content\n");
10439 }
10440 if (ctxt->encoding != NULL)
10441 xmlFree((xmlChar *) ctxt->encoding);
10442 ctxt->encoding = encoding;
10443 }
10444 /*
10445 * UTF-8 encoding is handled natively
10446 */
10447 else if ((encoding != NULL) &&
10448 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10449 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10450 if (ctxt->encoding != NULL)
10451 xmlFree((xmlChar *) ctxt->encoding);
10452 ctxt->encoding = encoding;
10453 }
10454 else if (encoding != NULL) {
10455 xmlCharEncodingHandlerPtr handler;
10456
10457 if (ctxt->input->encoding != NULL)
10458 xmlFree((xmlChar *) ctxt->input->encoding);
10459 ctxt->input->encoding = encoding;
10460
10461 handler = xmlFindCharEncodingHandler((const char *) encoding);
10462 if (handler != NULL) {
10463 xmlSwitchToEncoding(ctxt, handler);
10464 } else {
10465 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10466 "Unsupported encoding %s\n", encoding);
10467 return(NULL);
10468 }
10469 }
10470 }
10471 return(encoding);
10472 }
10473
10474 /**
10475 * xmlParseSDDecl:
10476 * @ctxt: an XML parser context
10477 *
10478 * parse the XML standalone declaration
10479 *
10480 * [32] SDDecl ::= S 'standalone' Eq
10481 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10482 *
10483 * [ VC: Standalone Document Declaration ]
10484 * TODO The standalone document declaration must have the value "no"
10485 * if any external markup declarations contain declarations of:
10486 * - attributes with default values, if elements to which these
10487 * attributes apply appear in the document without specifications
10488 * of values for these attributes, or
10489 * - entities (other than amp, lt, gt, apos, quot), if references
10490 * to those entities appear in the document, or
10491 * - attributes with values subject to normalization, where the
10492 * attribute appears in the document with a value which will change
10493 * as a result of normalization, or
10494 * - element types with element content, if white space occurs directly
10495 * within any instance of those types.
10496 *
10497 * Returns:
10498 * 1 if standalone="yes"
10499 * 0 if standalone="no"
10500 * -2 if standalone attribute is missing or invalid
10501 * (A standalone value of -2 means that the XML declaration was found,
10502 * but no value was specified for the standalone attribute).
10503 */
10504
10505 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10506 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10507 int standalone = -2;
10508
10509 SKIP_BLANKS;
10510 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10511 SKIP(10);
10512 SKIP_BLANKS;
10513 if (RAW != '=') {
10514 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10515 return(standalone);
10516 }
10517 NEXT;
10518 SKIP_BLANKS;
10519 if (RAW == '\''){
10520 NEXT;
10521 if ((RAW == 'n') && (NXT(1) == 'o')) {
10522 standalone = 0;
10523 SKIP(2);
10524 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10525 (NXT(2) == 's')) {
10526 standalone = 1;
10527 SKIP(3);
10528 } else {
10529 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10530 }
10531 if (RAW != '\'') {
10532 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10533 } else
10534 NEXT;
10535 } else if (RAW == '"'){
10536 NEXT;
10537 if ((RAW == 'n') && (NXT(1) == 'o')) {
10538 standalone = 0;
10539 SKIP(2);
10540 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10541 (NXT(2) == 's')) {
10542 standalone = 1;
10543 SKIP(3);
10544 } else {
10545 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10546 }
10547 if (RAW != '"') {
10548 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10549 } else
10550 NEXT;
10551 } else {
10552 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10553 }
10554 }
10555 return(standalone);
10556 }
10557
10558 /**
10559 * xmlParseXMLDecl:
10560 * @ctxt: an XML parser context
10561 *
10562 * parse an XML declaration header
10563 *
10564 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10565 */
10566
10567 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10568 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10569 xmlChar *version;
10570
10571 /*
10572 * This value for standalone indicates that the document has an
10573 * XML declaration but it does not have a standalone attribute.
10574 * It will be overwritten later if a standalone attribute is found.
10575 */
10576 ctxt->input->standalone = -2;
10577
10578 /*
10579 * We know that '<?xml' is here.
10580 */
10581 SKIP(5);
10582
10583 if (!IS_BLANK_CH(RAW)) {
10584 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10585 "Blank needed after '<?xml'\n");
10586 }
10587 SKIP_BLANKS;
10588
10589 /*
10590 * We must have the VersionInfo here.
10591 */
10592 version = xmlParseVersionInfo(ctxt);
10593 if (version == NULL) {
10594 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10595 } else {
10596 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10597 /*
10598 * Changed here for XML-1.0 5th edition
10599 */
10600 if (ctxt->options & XML_PARSE_OLD10) {
10601 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10602 "Unsupported version '%s'\n",
10603 version);
10604 } else {
10605 if ((version[0] == '1') && ((version[1] == '.'))) {
10606 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10607 "Unsupported version '%s'\n",
10608 version, NULL);
10609 } else {
10610 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10611 "Unsupported version '%s'\n",
10612 version);
10613 }
10614 }
10615 }
10616 if (ctxt->version != NULL)
10617 xmlFree((void *) ctxt->version);
10618 ctxt->version = version;
10619 }
10620
10621 /*
10622 * We may have the encoding declaration
10623 */
10624 if (!IS_BLANK_CH(RAW)) {
10625 if ((RAW == '?') && (NXT(1) == '>')) {
10626 SKIP(2);
10627 return;
10628 }
10629 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10630 }
10631 xmlParseEncodingDecl(ctxt);
10632 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10633 /*
10634 * The XML REC instructs us to stop parsing right here
10635 */
10636 return;
10637 }
10638
10639 /*
10640 * We may have the standalone status.
10641 */
10642 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10643 if ((RAW == '?') && (NXT(1) == '>')) {
10644 SKIP(2);
10645 return;
10646 }
10647 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10648 }
10649
10650 /*
10651 * We can grow the input buffer freely at that point
10652 */
10653 GROW;
10654
10655 SKIP_BLANKS;
10656 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10657
10658 SKIP_BLANKS;
10659 if ((RAW == '?') && (NXT(1) == '>')) {
10660 SKIP(2);
10661 } else if (RAW == '>') {
10662 /* Deprecated old WD ... */
10663 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10664 NEXT;
10665 } else {
10666 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10667 MOVETO_ENDTAG(CUR_PTR);
10668 NEXT;
10669 }
10670 }
10671
10672 /**
10673 * xmlParseMisc:
10674 * @ctxt: an XML parser context
10675 *
10676 * parse an XML Misc* optional field.
10677 *
10678 * [27] Misc ::= Comment | PI | S
10679 */
10680
10681 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10682 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10683 while ((ctxt->instate != XML_PARSER_EOF) &&
10684 (((RAW == '<') && (NXT(1) == '?')) ||
10685 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10686 IS_BLANK_CH(CUR))) {
10687 if ((RAW == '<') && (NXT(1) == '?')) {
10688 xmlParsePI(ctxt);
10689 } else if (IS_BLANK_CH(CUR)) {
10690 NEXT;
10691 } else
10692 xmlParseComment(ctxt);
10693 }
10694 }
10695
10696 /**
10697 * xmlParseDocument:
10698 * @ctxt: an XML parser context
10699 *
10700 * parse an XML document (and build a tree if using the standard SAX
10701 * interface).
10702 *
10703 * [1] document ::= prolog element Misc*
10704 *
10705 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10706 *
10707 * Returns 0, -1 in case of error. the parser context is augmented
10708 * as a result of the parsing.
10709 */
10710
10711 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10712 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10713 xmlChar start[4];
10714 xmlCharEncoding enc;
10715
10716 xmlInitParser();
10717
10718 if ((ctxt == NULL) || (ctxt->input == NULL))
10719 return(-1);
10720
10721 GROW;
10722
10723 /*
10724 * SAX: detecting the level.
10725 */
10726 xmlDetectSAX2(ctxt);
10727
10728 /*
10729 * SAX: beginning of the document processing.
10730 */
10731 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10732 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10733 if (ctxt->instate == XML_PARSER_EOF)
10734 return(-1);
10735
10736 if ((ctxt->encoding == NULL) &&
10737 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10738 /*
10739 * Get the 4 first bytes and decode the charset
10740 * if enc != XML_CHAR_ENCODING_NONE
10741 * plug some encoding conversion routines.
10742 */
10743 start[0] = RAW;
10744 start[1] = NXT(1);
10745 start[2] = NXT(2);
10746 start[3] = NXT(3);
10747 enc = xmlDetectCharEncoding(&start[0], 4);
10748 if (enc != XML_CHAR_ENCODING_NONE) {
10749 xmlSwitchEncoding(ctxt, enc);
10750 }
10751 }
10752
10753
10754 if (CUR == 0) {
10755 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10756 }
10757
10758 /*
10759 * Check for the XMLDecl in the Prolog.
10760 * do not GROW here to avoid the detected encoder to decode more
10761 * than just the first line, unless the amount of data is really
10762 * too small to hold "<?xml version="1.0" encoding="foo"
10763 */
10764 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10765 GROW;
10766 }
10767 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10768
10769 /*
10770 * Note that we will switch encoding on the fly.
10771 */
10772 xmlParseXMLDecl(ctxt);
10773 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10774 /*
10775 * The XML REC instructs us to stop parsing right here
10776 */
10777 return(-1);
10778 }
10779 ctxt->standalone = ctxt->input->standalone;
10780 SKIP_BLANKS;
10781 } else {
10782 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10783 }
10784 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10785 ctxt->sax->startDocument(ctxt->userData);
10786 if (ctxt->instate == XML_PARSER_EOF)
10787 return(-1);
10788 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10789 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10790 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10791 }
10792
10793 /*
10794 * The Misc part of the Prolog
10795 */
10796 GROW;
10797 xmlParseMisc(ctxt);
10798
10799 /*
10800 * Then possibly doc type declaration(s) and more Misc
10801 * (doctypedecl Misc*)?
10802 */
10803 GROW;
10804 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10805
10806 ctxt->inSubset = 1;
10807 xmlParseDocTypeDecl(ctxt);
10808 if (RAW == '[') {
10809 ctxt->instate = XML_PARSER_DTD;
10810 xmlParseInternalSubset(ctxt);
10811 if (ctxt->instate == XML_PARSER_EOF)
10812 return(-1);
10813 }
10814
10815 /*
10816 * Create and update the external subset.
10817 */
10818 ctxt->inSubset = 2;
10819 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10820 (!ctxt->disableSAX))
10821 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10822 ctxt->extSubSystem, ctxt->extSubURI);
10823 if (ctxt->instate == XML_PARSER_EOF)
10824 return(-1);
10825 ctxt->inSubset = 0;
10826
10827 xmlCleanSpecialAttr(ctxt);
10828
10829 ctxt->instate = XML_PARSER_PROLOG;
10830 xmlParseMisc(ctxt);
10831 }
10832
10833 /*
10834 * Time to start parsing the tree itself
10835 */
10836 GROW;
10837 if (RAW != '<') {
10838 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10839 "Start tag expected, '<' not found\n");
10840 } else {
10841 ctxt->instate = XML_PARSER_CONTENT;
10842 xmlParseElement(ctxt);
10843 ctxt->instate = XML_PARSER_EPILOG;
10844
10845
10846 /*
10847 * The Misc part at the end
10848 */
10849 xmlParseMisc(ctxt);
10850
10851 if (RAW != 0) {
10852 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10853 }
10854 ctxt->instate = XML_PARSER_EOF;
10855 }
10856
10857 /*
10858 * SAX: end of the document processing.
10859 */
10860 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10861 ctxt->sax->endDocument(ctxt->userData);
10862
10863 /*
10864 * Remove locally kept entity definitions if the tree was not built
10865 */
10866 if ((ctxt->myDoc != NULL) &&
10867 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10868 xmlFreeDoc(ctxt->myDoc);
10869 ctxt->myDoc = NULL;
10870 }
10871
10872 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10873 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10874 if (ctxt->valid)
10875 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10876 if (ctxt->nsWellFormed)
10877 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10878 if (ctxt->options & XML_PARSE_OLD10)
10879 ctxt->myDoc->properties |= XML_DOC_OLD10;
10880 }
10881 if (! ctxt->wellFormed) {
10882 ctxt->valid = 0;
10883 return(-1);
10884 }
10885 return(0);
10886 }
10887
10888 /**
10889 * xmlParseExtParsedEnt:
10890 * @ctxt: an XML parser context
10891 *
10892 * parse a general parsed entity
10893 * An external general parsed entity is well-formed if it matches the
10894 * production labeled extParsedEnt.
10895 *
10896 * [78] extParsedEnt ::= TextDecl? content
10897 *
10898 * Returns 0, -1 in case of error. the parser context is augmented
10899 * as a result of the parsing.
10900 */
10901
10902 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10903 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10904 xmlChar start[4];
10905 xmlCharEncoding enc;
10906
10907 if ((ctxt == NULL) || (ctxt->input == NULL))
10908 return(-1);
10909
10910 xmlDefaultSAXHandlerInit();
10911
10912 xmlDetectSAX2(ctxt);
10913
10914 GROW;
10915
10916 /*
10917 * SAX: beginning of the document processing.
10918 */
10919 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10920 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10921
10922 /*
10923 * Get the 4 first bytes and decode the charset
10924 * if enc != XML_CHAR_ENCODING_NONE
10925 * plug some encoding conversion routines.
10926 */
10927 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10928 start[0] = RAW;
10929 start[1] = NXT(1);
10930 start[2] = NXT(2);
10931 start[3] = NXT(3);
10932 enc = xmlDetectCharEncoding(start, 4);
10933 if (enc != XML_CHAR_ENCODING_NONE) {
10934 xmlSwitchEncoding(ctxt, enc);
10935 }
10936 }
10937
10938
10939 if (CUR == 0) {
10940 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10941 }
10942
10943 /*
10944 * Check for the XMLDecl in the Prolog.
10945 */
10946 GROW;
10947 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10948
10949 /*
10950 * Note that we will switch encoding on the fly.
10951 */
10952 xmlParseXMLDecl(ctxt);
10953 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10954 /*
10955 * The XML REC instructs us to stop parsing right here
10956 */
10957 return(-1);
10958 }
10959 SKIP_BLANKS;
10960 } else {
10961 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10962 }
10963 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10964 ctxt->sax->startDocument(ctxt->userData);
10965 if (ctxt->instate == XML_PARSER_EOF)
10966 return(-1);
10967
10968 /*
10969 * Doing validity checking on chunk doesn't make sense
10970 */
10971 ctxt->instate = XML_PARSER_CONTENT;
10972 ctxt->validate = 0;
10973 ctxt->loadsubset = 0;
10974 ctxt->depth = 0;
10975
10976 xmlParseContent(ctxt);
10977 if (ctxt->instate == XML_PARSER_EOF)
10978 return(-1);
10979
10980 if ((RAW == '<') && (NXT(1) == '/')) {
10981 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10982 } else if (RAW != 0) {
10983 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10984 }
10985
10986 /*
10987 * SAX: end of the document processing.
10988 */
10989 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10990 ctxt->sax->endDocument(ctxt->userData);
10991
10992 if (! ctxt->wellFormed) return(-1);
10993 return(0);
10994 }
10995
10996 #ifdef LIBXML_PUSH_ENABLED
10997 /************************************************************************
10998 * *
10999 * Progressive parsing interfaces *
11000 * *
11001 ************************************************************************/
11002
11003 /**
11004 * xmlParseLookupSequence:
11005 * @ctxt: an XML parser context
11006 * @first: the first char to lookup
11007 * @next: the next char to lookup or zero
11008 * @third: the next char to lookup or zero
11009 *
11010 * Try to find if a sequence (first, next, third) or just (first next) or
11011 * (first) is available in the input stream.
11012 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11013 * to avoid rescanning sequences of bytes, it DOES change the state of the
11014 * parser, do not use liberally.
11015 *
11016 * Returns the index to the current parsing point if the full sequence
11017 * is available, -1 otherwise.
11018 */
11019 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11020 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11021 xmlChar next, xmlChar third) {
11022 int base, len;
11023 xmlParserInputPtr in;
11024 const xmlChar *buf;
11025
11026 in = ctxt->input;
11027 if (in == NULL) return(-1);
11028 base = in->cur - in->base;
11029 if (base < 0) return(-1);
11030 if (ctxt->checkIndex > base)
11031 base = ctxt->checkIndex;
11032 if (in->buf == NULL) {
11033 buf = in->base;
11034 len = in->length;
11035 } else {
11036 buf = xmlBufContent(in->buf->buffer);
11037 len = xmlBufUse(in->buf->buffer);
11038 }
11039 /* take into account the sequence length */
11040 if (third) len -= 2;
11041 else if (next) len --;
11042 for (;base < len;base++) {
11043 if (buf[base] == first) {
11044 if (third != 0) {
11045 if ((buf[base + 1] != next) ||
11046 (buf[base + 2] != third)) continue;
11047 } else if (next != 0) {
11048 if (buf[base + 1] != next) continue;
11049 }
11050 ctxt->checkIndex = 0;
11051 #ifdef DEBUG_PUSH
11052 if (next == 0)
11053 xmlGenericError(xmlGenericErrorContext,
11054 "PP: lookup '%c' found at %d\n",
11055 first, base);
11056 else if (third == 0)
11057 xmlGenericError(xmlGenericErrorContext,
11058 "PP: lookup '%c%c' found at %d\n",
11059 first, next, base);
11060 else
11061 xmlGenericError(xmlGenericErrorContext,
11062 "PP: lookup '%c%c%c' found at %d\n",
11063 first, next, third, base);
11064 #endif
11065 return(base - (in->cur - in->base));
11066 }
11067 }
11068 ctxt->checkIndex = base;
11069 #ifdef DEBUG_PUSH
11070 if (next == 0)
11071 xmlGenericError(xmlGenericErrorContext,
11072 "PP: lookup '%c' failed\n", first);
11073 else if (third == 0)
11074 xmlGenericError(xmlGenericErrorContext,
11075 "PP: lookup '%c%c' failed\n", first, next);
11076 else
11077 xmlGenericError(xmlGenericErrorContext,
11078 "PP: lookup '%c%c%c' failed\n", first, next, third);
11079 #endif
11080 return(-1);
11081 }
11082
11083 /**
11084 * xmlParseGetLasts:
11085 * @ctxt: an XML parser context
11086 * @lastlt: pointer to store the last '<' from the input
11087 * @lastgt: pointer to store the last '>' from the input
11088 *
11089 * Lookup the last < and > in the current chunk
11090 */
11091 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11092 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11093 const xmlChar **lastgt) {
11094 const xmlChar *tmp;
11095
11096 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11097 xmlGenericError(xmlGenericErrorContext,
11098 "Internal error: xmlParseGetLasts\n");
11099 return;
11100 }
11101 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11102 tmp = ctxt->input->end;
11103 tmp--;
11104 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11105 if (tmp < ctxt->input->base) {
11106 *lastlt = NULL;
11107 *lastgt = NULL;
11108 } else {
11109 *lastlt = tmp;
11110 tmp++;
11111 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11112 if (*tmp == '\'') {
11113 tmp++;
11114 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11115 if (tmp < ctxt->input->end) tmp++;
11116 } else if (*tmp == '"') {
11117 tmp++;
11118 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11119 if (tmp < ctxt->input->end) tmp++;
11120 } else
11121 tmp++;
11122 }
11123 if (tmp < ctxt->input->end)
11124 *lastgt = tmp;
11125 else {
11126 tmp = *lastlt;
11127 tmp--;
11128 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11129 if (tmp >= ctxt->input->base)
11130 *lastgt = tmp;
11131 else
11132 *lastgt = NULL;
11133 }
11134 }
11135 } else {
11136 *lastlt = NULL;
11137 *lastgt = NULL;
11138 }
11139 }
11140 /**
11141 * xmlCheckCdataPush:
11142 * @cur: pointer to the bock of characters
11143 * @len: length of the block in bytes
11144 *
11145 * Check that the block of characters is okay as SCdata content [20]
11146 *
11147 * Returns the number of bytes to pass if okay, a negative index where an
11148 * UTF-8 error occured otherwise
11149 */
11150 static int
xmlCheckCdataPush(const xmlChar * utf,int len)11151 xmlCheckCdataPush(const xmlChar *utf, int len) {
11152 int ix;
11153 unsigned char c;
11154 int codepoint;
11155
11156 if ((utf == NULL) || (len <= 0))
11157 return(0);
11158
11159 for (ix = 0; ix < len;) { /* string is 0-terminated */
11160 c = utf[ix];
11161 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11162 if (c >= 0x20)
11163 ix++;
11164 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11165 ix++;
11166 else
11167 return(-ix);
11168 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11169 if (ix + 2 > len) return(ix);
11170 if ((utf[ix+1] & 0xc0 ) != 0x80)
11171 return(-ix);
11172 codepoint = (utf[ix] & 0x1f) << 6;
11173 codepoint |= utf[ix+1] & 0x3f;
11174 if (!xmlIsCharQ(codepoint))
11175 return(-ix);
11176 ix += 2;
11177 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11178 if (ix + 3 > len) return(ix);
11179 if (((utf[ix+1] & 0xc0) != 0x80) ||
11180 ((utf[ix+2] & 0xc0) != 0x80))
11181 return(-ix);
11182 codepoint = (utf[ix] & 0xf) << 12;
11183 codepoint |= (utf[ix+1] & 0x3f) << 6;
11184 codepoint |= utf[ix+2] & 0x3f;
11185 if (!xmlIsCharQ(codepoint))
11186 return(-ix);
11187 ix += 3;
11188 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11189 if (ix + 4 > len) return(ix);
11190 if (((utf[ix+1] & 0xc0) != 0x80) ||
11191 ((utf[ix+2] & 0xc0) != 0x80) ||
11192 ((utf[ix+3] & 0xc0) != 0x80))
11193 return(-ix);
11194 codepoint = (utf[ix] & 0x7) << 18;
11195 codepoint |= (utf[ix+1] & 0x3f) << 12;
11196 codepoint |= (utf[ix+2] & 0x3f) << 6;
11197 codepoint |= utf[ix+3] & 0x3f;
11198 if (!xmlIsCharQ(codepoint))
11199 return(-ix);
11200 ix += 4;
11201 } else /* unknown encoding */
11202 return(-ix);
11203 }
11204 return(ix);
11205 }
11206
11207 /**
11208 * xmlParseTryOrFinish:
11209 * @ctxt: an XML parser context
11210 * @terminate: last chunk indicator
11211 *
11212 * Try to progress on parsing
11213 *
11214 * Returns zero if no parsing was possible
11215 */
11216 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11217 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11218 int ret = 0;
11219 int avail, tlen;
11220 xmlChar cur, next;
11221 const xmlChar *lastlt, *lastgt;
11222
11223 if (ctxt->input == NULL)
11224 return(0);
11225
11226 #ifdef DEBUG_PUSH
11227 switch (ctxt->instate) {
11228 case XML_PARSER_EOF:
11229 xmlGenericError(xmlGenericErrorContext,
11230 "PP: try EOF\n"); break;
11231 case XML_PARSER_START:
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: try START\n"); break;
11234 case XML_PARSER_MISC:
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: try MISC\n");break;
11237 case XML_PARSER_COMMENT:
11238 xmlGenericError(xmlGenericErrorContext,
11239 "PP: try COMMENT\n");break;
11240 case XML_PARSER_PROLOG:
11241 xmlGenericError(xmlGenericErrorContext,
11242 "PP: try PROLOG\n");break;
11243 case XML_PARSER_START_TAG:
11244 xmlGenericError(xmlGenericErrorContext,
11245 "PP: try START_TAG\n");break;
11246 case XML_PARSER_CONTENT:
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: try CONTENT\n");break;
11249 case XML_PARSER_CDATA_SECTION:
11250 xmlGenericError(xmlGenericErrorContext,
11251 "PP: try CDATA_SECTION\n");break;
11252 case XML_PARSER_END_TAG:
11253 xmlGenericError(xmlGenericErrorContext,
11254 "PP: try END_TAG\n");break;
11255 case XML_PARSER_ENTITY_DECL:
11256 xmlGenericError(xmlGenericErrorContext,
11257 "PP: try ENTITY_DECL\n");break;
11258 case XML_PARSER_ENTITY_VALUE:
11259 xmlGenericError(xmlGenericErrorContext,
11260 "PP: try ENTITY_VALUE\n");break;
11261 case XML_PARSER_ATTRIBUTE_VALUE:
11262 xmlGenericError(xmlGenericErrorContext,
11263 "PP: try ATTRIBUTE_VALUE\n");break;
11264 case XML_PARSER_DTD:
11265 xmlGenericError(xmlGenericErrorContext,
11266 "PP: try DTD\n");break;
11267 case XML_PARSER_EPILOG:
11268 xmlGenericError(xmlGenericErrorContext,
11269 "PP: try EPILOG\n");break;
11270 case XML_PARSER_PI:
11271 xmlGenericError(xmlGenericErrorContext,
11272 "PP: try PI\n");break;
11273 case XML_PARSER_IGNORE:
11274 xmlGenericError(xmlGenericErrorContext,
11275 "PP: try IGNORE\n");break;
11276 }
11277 #endif
11278
11279 if ((ctxt->input != NULL) &&
11280 (ctxt->input->cur - ctxt->input->base > 4096)) {
11281 xmlSHRINK(ctxt);
11282 ctxt->checkIndex = 0;
11283 }
11284 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11285
11286 while (ctxt->instate != XML_PARSER_EOF) {
11287 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11288 return(0);
11289
11290
11291 /*
11292 * Pop-up of finished entities.
11293 */
11294 while ((RAW == 0) && (ctxt->inputNr > 1))
11295 xmlPopInput(ctxt);
11296
11297 if (ctxt->input == NULL) break;
11298 if (ctxt->input->buf == NULL)
11299 avail = ctxt->input->length -
11300 (ctxt->input->cur - ctxt->input->base);
11301 else {
11302 /*
11303 * If we are operating on converted input, try to flush
11304 * remainng chars to avoid them stalling in the non-converted
11305 * buffer. But do not do this in document start where
11306 * encoding="..." may not have been read and we work on a
11307 * guessed encoding.
11308 */
11309 if ((ctxt->instate != XML_PARSER_START) &&
11310 (ctxt->input->buf->raw != NULL) &&
11311 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11312 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11313 ctxt->input);
11314 size_t current = ctxt->input->cur - ctxt->input->base;
11315
11316 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11317 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11318 base, current);
11319 }
11320 avail = xmlBufUse(ctxt->input->buf->buffer) -
11321 (ctxt->input->cur - ctxt->input->base);
11322 }
11323 if (avail < 1)
11324 goto done;
11325 switch (ctxt->instate) {
11326 case XML_PARSER_EOF:
11327 /*
11328 * Document parsing is done !
11329 */
11330 goto done;
11331 case XML_PARSER_START:
11332 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11333 xmlChar start[4];
11334 xmlCharEncoding enc;
11335
11336 /*
11337 * Very first chars read from the document flow.
11338 */
11339 if (avail < 4)
11340 goto done;
11341
11342 /*
11343 * Get the 4 first bytes and decode the charset
11344 * if enc != XML_CHAR_ENCODING_NONE
11345 * plug some encoding conversion routines,
11346 * else xmlSwitchEncoding will set to (default)
11347 * UTF8.
11348 */
11349 start[0] = RAW;
11350 start[1] = NXT(1);
11351 start[2] = NXT(2);
11352 start[3] = NXT(3);
11353 enc = xmlDetectCharEncoding(start, 4);
11354 xmlSwitchEncoding(ctxt, enc);
11355 break;
11356 }
11357
11358 if (avail < 2)
11359 goto done;
11360 cur = ctxt->input->cur[0];
11361 next = ctxt->input->cur[1];
11362 if (cur == 0) {
11363 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11364 ctxt->sax->setDocumentLocator(ctxt->userData,
11365 &xmlDefaultSAXLocator);
11366 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11367 ctxt->instate = XML_PARSER_EOF;
11368 #ifdef DEBUG_PUSH
11369 xmlGenericError(xmlGenericErrorContext,
11370 "PP: entering EOF\n");
11371 #endif
11372 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11373 ctxt->sax->endDocument(ctxt->userData);
11374 goto done;
11375 }
11376 if ((cur == '<') && (next == '?')) {
11377 /* PI or XML decl */
11378 if (avail < 5) return(ret);
11379 if ((!terminate) &&
11380 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11381 return(ret);
11382 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11383 ctxt->sax->setDocumentLocator(ctxt->userData,
11384 &xmlDefaultSAXLocator);
11385 if ((ctxt->input->cur[2] == 'x') &&
11386 (ctxt->input->cur[3] == 'm') &&
11387 (ctxt->input->cur[4] == 'l') &&
11388 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11389 ret += 5;
11390 #ifdef DEBUG_PUSH
11391 xmlGenericError(xmlGenericErrorContext,
11392 "PP: Parsing XML Decl\n");
11393 #endif
11394 xmlParseXMLDecl(ctxt);
11395 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11396 /*
11397 * The XML REC instructs us to stop parsing right
11398 * here
11399 */
11400 ctxt->instate = XML_PARSER_EOF;
11401 return(0);
11402 }
11403 ctxt->standalone = ctxt->input->standalone;
11404 if ((ctxt->encoding == NULL) &&
11405 (ctxt->input->encoding != NULL))
11406 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11407 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11408 (!ctxt->disableSAX))
11409 ctxt->sax->startDocument(ctxt->userData);
11410 ctxt->instate = XML_PARSER_MISC;
11411 #ifdef DEBUG_PUSH
11412 xmlGenericError(xmlGenericErrorContext,
11413 "PP: entering MISC\n");
11414 #endif
11415 } else {
11416 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11417 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11418 (!ctxt->disableSAX))
11419 ctxt->sax->startDocument(ctxt->userData);
11420 ctxt->instate = XML_PARSER_MISC;
11421 #ifdef DEBUG_PUSH
11422 xmlGenericError(xmlGenericErrorContext,
11423 "PP: entering MISC\n");
11424 #endif
11425 }
11426 } else {
11427 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11428 ctxt->sax->setDocumentLocator(ctxt->userData,
11429 &xmlDefaultSAXLocator);
11430 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11431 if (ctxt->version == NULL) {
11432 xmlErrMemory(ctxt, NULL);
11433 break;
11434 }
11435 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11436 (!ctxt->disableSAX))
11437 ctxt->sax->startDocument(ctxt->userData);
11438 ctxt->instate = XML_PARSER_MISC;
11439 #ifdef DEBUG_PUSH
11440 xmlGenericError(xmlGenericErrorContext,
11441 "PP: entering MISC\n");
11442 #endif
11443 }
11444 break;
11445 case XML_PARSER_START_TAG: {
11446 const xmlChar *name;
11447 const xmlChar *prefix = NULL;
11448 const xmlChar *URI = NULL;
11449 int nsNr = ctxt->nsNr;
11450
11451 if ((avail < 2) && (ctxt->inputNr == 1))
11452 goto done;
11453 cur = ctxt->input->cur[0];
11454 if (cur != '<') {
11455 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11456 ctxt->instate = XML_PARSER_EOF;
11457 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11458 ctxt->sax->endDocument(ctxt->userData);
11459 goto done;
11460 }
11461 if (!terminate) {
11462 if (ctxt->progressive) {
11463 /* > can be found unescaped in attribute values */
11464 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11465 goto done;
11466 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11467 goto done;
11468 }
11469 }
11470 if (ctxt->spaceNr == 0)
11471 spacePush(ctxt, -1);
11472 else if (*ctxt->space == -2)
11473 spacePush(ctxt, -1);
11474 else
11475 spacePush(ctxt, *ctxt->space);
11476 #ifdef LIBXML_SAX1_ENABLED
11477 if (ctxt->sax2)
11478 #endif /* LIBXML_SAX1_ENABLED */
11479 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11480 #ifdef LIBXML_SAX1_ENABLED
11481 else
11482 name = xmlParseStartTag(ctxt);
11483 #endif /* LIBXML_SAX1_ENABLED */
11484 if (ctxt->instate == XML_PARSER_EOF)
11485 goto done;
11486 if (name == NULL) {
11487 spacePop(ctxt);
11488 ctxt->instate = XML_PARSER_EOF;
11489 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11490 ctxt->sax->endDocument(ctxt->userData);
11491 goto done;
11492 }
11493 #ifdef LIBXML_VALID_ENABLED
11494 /*
11495 * [ VC: Root Element Type ]
11496 * The Name in the document type declaration must match
11497 * the element type of the root element.
11498 */
11499 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11500 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11501 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11502 #endif /* LIBXML_VALID_ENABLED */
11503
11504 /*
11505 * Check for an Empty Element.
11506 */
11507 if ((RAW == '/') && (NXT(1) == '>')) {
11508 SKIP(2);
11509
11510 if (ctxt->sax2) {
11511 if ((ctxt->sax != NULL) &&
11512 (ctxt->sax->endElementNs != NULL) &&
11513 (!ctxt->disableSAX))
11514 ctxt->sax->endElementNs(ctxt->userData, name,
11515 prefix, URI);
11516 if (ctxt->nsNr - nsNr > 0)
11517 nsPop(ctxt, ctxt->nsNr - nsNr);
11518 #ifdef LIBXML_SAX1_ENABLED
11519 } else {
11520 if ((ctxt->sax != NULL) &&
11521 (ctxt->sax->endElement != NULL) &&
11522 (!ctxt->disableSAX))
11523 ctxt->sax->endElement(ctxt->userData, name);
11524 #endif /* LIBXML_SAX1_ENABLED */
11525 }
11526 if (ctxt->instate == XML_PARSER_EOF)
11527 goto done;
11528 spacePop(ctxt);
11529 if (ctxt->nameNr == 0) {
11530 ctxt->instate = XML_PARSER_EPILOG;
11531 } else {
11532 ctxt->instate = XML_PARSER_CONTENT;
11533 }
11534 ctxt->progressive = 1;
11535 break;
11536 }
11537 if (RAW == '>') {
11538 NEXT;
11539 } else {
11540 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11541 "Couldn't find end of Start Tag %s\n",
11542 name);
11543 nodePop(ctxt);
11544 spacePop(ctxt);
11545 }
11546 if (ctxt->sax2)
11547 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11548 #ifdef LIBXML_SAX1_ENABLED
11549 else
11550 namePush(ctxt, name);
11551 #endif /* LIBXML_SAX1_ENABLED */
11552
11553 ctxt->instate = XML_PARSER_CONTENT;
11554 ctxt->progressive = 1;
11555 break;
11556 }
11557 case XML_PARSER_CONTENT: {
11558 const xmlChar *test;
11559 unsigned int cons;
11560 if ((avail < 2) && (ctxt->inputNr == 1))
11561 goto done;
11562 cur = ctxt->input->cur[0];
11563 next = ctxt->input->cur[1];
11564
11565 test = CUR_PTR;
11566 cons = ctxt->input->consumed;
11567 if ((cur == '<') && (next == '/')) {
11568 ctxt->instate = XML_PARSER_END_TAG;
11569 break;
11570 } else if ((cur == '<') && (next == '?')) {
11571 if ((!terminate) &&
11572 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11573 ctxt->progressive = XML_PARSER_PI;
11574 goto done;
11575 }
11576 xmlParsePI(ctxt);
11577 ctxt->instate = XML_PARSER_CONTENT;
11578 ctxt->progressive = 1;
11579 } else if ((cur == '<') && (next != '!')) {
11580 ctxt->instate = XML_PARSER_START_TAG;
11581 break;
11582 } else if ((cur == '<') && (next == '!') &&
11583 (ctxt->input->cur[2] == '-') &&
11584 (ctxt->input->cur[3] == '-')) {
11585 int term;
11586
11587 if (avail < 4)
11588 goto done;
11589 ctxt->input->cur += 4;
11590 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11591 ctxt->input->cur -= 4;
11592 if ((!terminate) && (term < 0)) {
11593 ctxt->progressive = XML_PARSER_COMMENT;
11594 goto done;
11595 }
11596 xmlParseComment(ctxt);
11597 ctxt->instate = XML_PARSER_CONTENT;
11598 ctxt->progressive = 1;
11599 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11600 (ctxt->input->cur[2] == '[') &&
11601 (ctxt->input->cur[3] == 'C') &&
11602 (ctxt->input->cur[4] == 'D') &&
11603 (ctxt->input->cur[5] == 'A') &&
11604 (ctxt->input->cur[6] == 'T') &&
11605 (ctxt->input->cur[7] == 'A') &&
11606 (ctxt->input->cur[8] == '[')) {
11607 SKIP(9);
11608 ctxt->instate = XML_PARSER_CDATA_SECTION;
11609 break;
11610 } else if ((cur == '<') && (next == '!') &&
11611 (avail < 9)) {
11612 goto done;
11613 } else if (cur == '&') {
11614 if ((!terminate) &&
11615 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11616 goto done;
11617 xmlParseReference(ctxt);
11618 } else {
11619 /* TODO Avoid the extra copy, handle directly !!! */
11620 /*
11621 * Goal of the following test is:
11622 * - minimize calls to the SAX 'character' callback
11623 * when they are mergeable
11624 * - handle an problem for isBlank when we only parse
11625 * a sequence of blank chars and the next one is
11626 * not available to check against '<' presence.
11627 * - tries to homogenize the differences in SAX
11628 * callbacks between the push and pull versions
11629 * of the parser.
11630 */
11631 if ((ctxt->inputNr == 1) &&
11632 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11633 if (!terminate) {
11634 if (ctxt->progressive) {
11635 if ((lastlt == NULL) ||
11636 (ctxt->input->cur > lastlt))
11637 goto done;
11638 } else if (xmlParseLookupSequence(ctxt,
11639 '<', 0, 0) < 0) {
11640 goto done;
11641 }
11642 }
11643 }
11644 ctxt->checkIndex = 0;
11645 xmlParseCharData(ctxt, 0);
11646 }
11647 /*
11648 * Pop-up of finished entities.
11649 */
11650 while ((RAW == 0) && (ctxt->inputNr > 1))
11651 xmlPopInput(ctxt);
11652 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11653 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11654 "detected an error in element content\n");
11655 ctxt->instate = XML_PARSER_EOF;
11656 break;
11657 }
11658 break;
11659 }
11660 case XML_PARSER_END_TAG:
11661 if (avail < 2)
11662 goto done;
11663 if (!terminate) {
11664 if (ctxt->progressive) {
11665 /* > can be found unescaped in attribute values */
11666 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11667 goto done;
11668 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11669 goto done;
11670 }
11671 }
11672 if (ctxt->sax2) {
11673 xmlParseEndTag2(ctxt,
11674 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11675 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11676 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11677 nameNsPop(ctxt);
11678 }
11679 #ifdef LIBXML_SAX1_ENABLED
11680 else
11681 xmlParseEndTag1(ctxt, 0);
11682 #endif /* LIBXML_SAX1_ENABLED */
11683 if (ctxt->instate == XML_PARSER_EOF) {
11684 /* Nothing */
11685 } else if (ctxt->nameNr == 0) {
11686 ctxt->instate = XML_PARSER_EPILOG;
11687 } else {
11688 ctxt->instate = XML_PARSER_CONTENT;
11689 }
11690 break;
11691 case XML_PARSER_CDATA_SECTION: {
11692 /*
11693 * The Push mode need to have the SAX callback for
11694 * cdataBlock merge back contiguous callbacks.
11695 */
11696 int base;
11697
11698 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11699 if (base < 0) {
11700 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11701 int tmp;
11702
11703 tmp = xmlCheckCdataPush(ctxt->input->cur,
11704 XML_PARSER_BIG_BUFFER_SIZE);
11705 if (tmp < 0) {
11706 tmp = -tmp;
11707 ctxt->input->cur += tmp;
11708 goto encoding_error;
11709 }
11710 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11711 if (ctxt->sax->cdataBlock != NULL)
11712 ctxt->sax->cdataBlock(ctxt->userData,
11713 ctxt->input->cur, tmp);
11714 else if (ctxt->sax->characters != NULL)
11715 ctxt->sax->characters(ctxt->userData,
11716 ctxt->input->cur, tmp);
11717 }
11718 if (ctxt->instate == XML_PARSER_EOF)
11719 goto done;
11720 SKIPL(tmp);
11721 ctxt->checkIndex = 0;
11722 }
11723 goto done;
11724 } else {
11725 int tmp;
11726
11727 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11728 if ((tmp < 0) || (tmp != base)) {
11729 tmp = -tmp;
11730 ctxt->input->cur += tmp;
11731 goto encoding_error;
11732 }
11733 if ((ctxt->sax != NULL) && (base == 0) &&
11734 (ctxt->sax->cdataBlock != NULL) &&
11735 (!ctxt->disableSAX)) {
11736 /*
11737 * Special case to provide identical behaviour
11738 * between pull and push parsers on enpty CDATA
11739 * sections
11740 */
11741 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11742 (!strncmp((const char *)&ctxt->input->cur[-9],
11743 "<![CDATA[", 9)))
11744 ctxt->sax->cdataBlock(ctxt->userData,
11745 BAD_CAST "", 0);
11746 } else if ((ctxt->sax != NULL) && (base > 0) &&
11747 (!ctxt->disableSAX)) {
11748 if (ctxt->sax->cdataBlock != NULL)
11749 ctxt->sax->cdataBlock(ctxt->userData,
11750 ctxt->input->cur, base);
11751 else if (ctxt->sax->characters != NULL)
11752 ctxt->sax->characters(ctxt->userData,
11753 ctxt->input->cur, base);
11754 }
11755 if (ctxt->instate == XML_PARSER_EOF)
11756 goto done;
11757 SKIPL(base + 3);
11758 ctxt->checkIndex = 0;
11759 ctxt->instate = XML_PARSER_CONTENT;
11760 #ifdef DEBUG_PUSH
11761 xmlGenericError(xmlGenericErrorContext,
11762 "PP: entering CONTENT\n");
11763 #endif
11764 }
11765 break;
11766 }
11767 case XML_PARSER_MISC:
11768 SKIP_BLANKS;
11769 if (ctxt->input->buf == NULL)
11770 avail = ctxt->input->length -
11771 (ctxt->input->cur - ctxt->input->base);
11772 else
11773 avail = xmlBufUse(ctxt->input->buf->buffer) -
11774 (ctxt->input->cur - ctxt->input->base);
11775 if (avail < 2)
11776 goto done;
11777 cur = ctxt->input->cur[0];
11778 next = ctxt->input->cur[1];
11779 if ((cur == '<') && (next == '?')) {
11780 if ((!terminate) &&
11781 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11782 ctxt->progressive = XML_PARSER_PI;
11783 goto done;
11784 }
11785 #ifdef DEBUG_PUSH
11786 xmlGenericError(xmlGenericErrorContext,
11787 "PP: Parsing PI\n");
11788 #endif
11789 xmlParsePI(ctxt);
11790 if (ctxt->instate == XML_PARSER_EOF)
11791 goto done;
11792 ctxt->instate = XML_PARSER_MISC;
11793 ctxt->progressive = 1;
11794 ctxt->checkIndex = 0;
11795 } else if ((cur == '<') && (next == '!') &&
11796 (ctxt->input->cur[2] == '-') &&
11797 (ctxt->input->cur[3] == '-')) {
11798 if ((!terminate) &&
11799 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11800 ctxt->progressive = XML_PARSER_COMMENT;
11801 goto done;
11802 }
11803 #ifdef DEBUG_PUSH
11804 xmlGenericError(xmlGenericErrorContext,
11805 "PP: Parsing Comment\n");
11806 #endif
11807 xmlParseComment(ctxt);
11808 if (ctxt->instate == XML_PARSER_EOF)
11809 goto done;
11810 ctxt->instate = XML_PARSER_MISC;
11811 ctxt->progressive = 1;
11812 ctxt->checkIndex = 0;
11813 } else if ((cur == '<') && (next == '!') &&
11814 (ctxt->input->cur[2] == 'D') &&
11815 (ctxt->input->cur[3] == 'O') &&
11816 (ctxt->input->cur[4] == 'C') &&
11817 (ctxt->input->cur[5] == 'T') &&
11818 (ctxt->input->cur[6] == 'Y') &&
11819 (ctxt->input->cur[7] == 'P') &&
11820 (ctxt->input->cur[8] == 'E')) {
11821 if ((!terminate) &&
11822 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11823 ctxt->progressive = XML_PARSER_DTD;
11824 goto done;
11825 }
11826 #ifdef DEBUG_PUSH
11827 xmlGenericError(xmlGenericErrorContext,
11828 "PP: Parsing internal subset\n");
11829 #endif
11830 ctxt->inSubset = 1;
11831 ctxt->progressive = 0;
11832 ctxt->checkIndex = 0;
11833 xmlParseDocTypeDecl(ctxt);
11834 if (ctxt->instate == XML_PARSER_EOF)
11835 goto done;
11836 if (RAW == '[') {
11837 ctxt->instate = XML_PARSER_DTD;
11838 #ifdef DEBUG_PUSH
11839 xmlGenericError(xmlGenericErrorContext,
11840 "PP: entering DTD\n");
11841 #endif
11842 } else {
11843 /*
11844 * Create and update the external subset.
11845 */
11846 ctxt->inSubset = 2;
11847 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11848 (ctxt->sax->externalSubset != NULL))
11849 ctxt->sax->externalSubset(ctxt->userData,
11850 ctxt->intSubName, ctxt->extSubSystem,
11851 ctxt->extSubURI);
11852 ctxt->inSubset = 0;
11853 xmlCleanSpecialAttr(ctxt);
11854 ctxt->instate = XML_PARSER_PROLOG;
11855 #ifdef DEBUG_PUSH
11856 xmlGenericError(xmlGenericErrorContext,
11857 "PP: entering PROLOG\n");
11858 #endif
11859 }
11860 } else if ((cur == '<') && (next == '!') &&
11861 (avail < 9)) {
11862 goto done;
11863 } else {
11864 ctxt->instate = XML_PARSER_START_TAG;
11865 ctxt->progressive = XML_PARSER_START_TAG;
11866 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11867 #ifdef DEBUG_PUSH
11868 xmlGenericError(xmlGenericErrorContext,
11869 "PP: entering START_TAG\n");
11870 #endif
11871 }
11872 break;
11873 case XML_PARSER_PROLOG:
11874 SKIP_BLANKS;
11875 if (ctxt->input->buf == NULL)
11876 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11877 else
11878 avail = xmlBufUse(ctxt->input->buf->buffer) -
11879 (ctxt->input->cur - ctxt->input->base);
11880 if (avail < 2)
11881 goto done;
11882 cur = ctxt->input->cur[0];
11883 next = ctxt->input->cur[1];
11884 if ((cur == '<') && (next == '?')) {
11885 if ((!terminate) &&
11886 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11887 ctxt->progressive = XML_PARSER_PI;
11888 goto done;
11889 }
11890 #ifdef DEBUG_PUSH
11891 xmlGenericError(xmlGenericErrorContext,
11892 "PP: Parsing PI\n");
11893 #endif
11894 xmlParsePI(ctxt);
11895 if (ctxt->instate == XML_PARSER_EOF)
11896 goto done;
11897 ctxt->instate = XML_PARSER_PROLOG;
11898 ctxt->progressive = 1;
11899 } else if ((cur == '<') && (next == '!') &&
11900 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11901 if ((!terminate) &&
11902 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11903 ctxt->progressive = XML_PARSER_COMMENT;
11904 goto done;
11905 }
11906 #ifdef DEBUG_PUSH
11907 xmlGenericError(xmlGenericErrorContext,
11908 "PP: Parsing Comment\n");
11909 #endif
11910 xmlParseComment(ctxt);
11911 if (ctxt->instate == XML_PARSER_EOF)
11912 goto done;
11913 ctxt->instate = XML_PARSER_PROLOG;
11914 ctxt->progressive = 1;
11915 } else if ((cur == '<') && (next == '!') &&
11916 (avail < 4)) {
11917 goto done;
11918 } else {
11919 ctxt->instate = XML_PARSER_START_TAG;
11920 if (ctxt->progressive == 0)
11921 ctxt->progressive = XML_PARSER_START_TAG;
11922 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11923 #ifdef DEBUG_PUSH
11924 xmlGenericError(xmlGenericErrorContext,
11925 "PP: entering START_TAG\n");
11926 #endif
11927 }
11928 break;
11929 case XML_PARSER_EPILOG:
11930 SKIP_BLANKS;
11931 if (ctxt->input->buf == NULL)
11932 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11933 else
11934 avail = xmlBufUse(ctxt->input->buf->buffer) -
11935 (ctxt->input->cur - ctxt->input->base);
11936 if (avail < 2)
11937 goto done;
11938 cur = ctxt->input->cur[0];
11939 next = ctxt->input->cur[1];
11940 if ((cur == '<') && (next == '?')) {
11941 if ((!terminate) &&
11942 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11943 ctxt->progressive = XML_PARSER_PI;
11944 goto done;
11945 }
11946 #ifdef DEBUG_PUSH
11947 xmlGenericError(xmlGenericErrorContext,
11948 "PP: Parsing PI\n");
11949 #endif
11950 xmlParsePI(ctxt);
11951 if (ctxt->instate == XML_PARSER_EOF)
11952 goto done;
11953 ctxt->instate = XML_PARSER_EPILOG;
11954 ctxt->progressive = 1;
11955 } else if ((cur == '<') && (next == '!') &&
11956 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11957 if ((!terminate) &&
11958 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11959 ctxt->progressive = XML_PARSER_COMMENT;
11960 goto done;
11961 }
11962 #ifdef DEBUG_PUSH
11963 xmlGenericError(xmlGenericErrorContext,
11964 "PP: Parsing Comment\n");
11965 #endif
11966 xmlParseComment(ctxt);
11967 if (ctxt->instate == XML_PARSER_EOF)
11968 goto done;
11969 ctxt->instate = XML_PARSER_EPILOG;
11970 ctxt->progressive = 1;
11971 } else if ((cur == '<') && (next == '!') &&
11972 (avail < 4)) {
11973 goto done;
11974 } else {
11975 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11976 ctxt->instate = XML_PARSER_EOF;
11977 #ifdef DEBUG_PUSH
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: entering EOF\n");
11980 #endif
11981 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11982 ctxt->sax->endDocument(ctxt->userData);
11983 goto done;
11984 }
11985 break;
11986 case XML_PARSER_DTD: {
11987 /*
11988 * Sorry but progressive parsing of the internal subset
11989 * is not expected to be supported. We first check that
11990 * the full content of the internal subset is available and
11991 * the parsing is launched only at that point.
11992 * Internal subset ends up with "']' S? '>'" in an unescaped
11993 * section and not in a ']]>' sequence which are conditional
11994 * sections (whoever argued to keep that crap in XML deserve
11995 * a place in hell !).
11996 */
11997 int base, i;
11998 xmlChar *buf;
11999 xmlChar quote = 0;
12000 size_t use;
12001
12002 base = ctxt->input->cur - ctxt->input->base;
12003 if (base < 0) return(0);
12004 if (ctxt->checkIndex > base)
12005 base = ctxt->checkIndex;
12006 buf = xmlBufContent(ctxt->input->buf->buffer);
12007 use = xmlBufUse(ctxt->input->buf->buffer);
12008 for (;(unsigned int) base < use; base++) {
12009 if (quote != 0) {
12010 if (buf[base] == quote)
12011 quote = 0;
12012 continue;
12013 }
12014 if ((quote == 0) && (buf[base] == '<')) {
12015 int found = 0;
12016 /* special handling of comments */
12017 if (((unsigned int) base + 4 < use) &&
12018 (buf[base + 1] == '!') &&
12019 (buf[base + 2] == '-') &&
12020 (buf[base + 3] == '-')) {
12021 for (;(unsigned int) base + 3 < use; base++) {
12022 if ((buf[base] == '-') &&
12023 (buf[base + 1] == '-') &&
12024 (buf[base + 2] == '>')) {
12025 found = 1;
12026 base += 2;
12027 break;
12028 }
12029 }
12030 if (!found) {
12031 #if 0
12032 fprintf(stderr, "unfinished comment\n");
12033 #endif
12034 break; /* for */
12035 }
12036 continue;
12037 }
12038 }
12039 if (buf[base] == '"') {
12040 quote = '"';
12041 continue;
12042 }
12043 if (buf[base] == '\'') {
12044 quote = '\'';
12045 continue;
12046 }
12047 if (buf[base] == ']') {
12048 #if 0
12049 fprintf(stderr, "%c%c%c%c: ", buf[base],
12050 buf[base + 1], buf[base + 2], buf[base + 3]);
12051 #endif
12052 if ((unsigned int) base +1 >= use)
12053 break;
12054 if (buf[base + 1] == ']') {
12055 /* conditional crap, skip both ']' ! */
12056 base++;
12057 continue;
12058 }
12059 for (i = 1; (unsigned int) base + i < use; i++) {
12060 if (buf[base + i] == '>') {
12061 #if 0
12062 fprintf(stderr, "found\n");
12063 #endif
12064 goto found_end_int_subset;
12065 }
12066 if (!IS_BLANK_CH(buf[base + i])) {
12067 #if 0
12068 fprintf(stderr, "not found\n");
12069 #endif
12070 goto not_end_of_int_subset;
12071 }
12072 }
12073 #if 0
12074 fprintf(stderr, "end of stream\n");
12075 #endif
12076 break;
12077
12078 }
12079 not_end_of_int_subset:
12080 continue; /* for */
12081 }
12082 /*
12083 * We didn't found the end of the Internal subset
12084 */
12085 if (quote == 0)
12086 ctxt->checkIndex = base;
12087 else
12088 ctxt->checkIndex = 0;
12089 #ifdef DEBUG_PUSH
12090 if (next == 0)
12091 xmlGenericError(xmlGenericErrorContext,
12092 "PP: lookup of int subset end filed\n");
12093 #endif
12094 goto done;
12095
12096 found_end_int_subset:
12097 ctxt->checkIndex = 0;
12098 xmlParseInternalSubset(ctxt);
12099 if (ctxt->instate == XML_PARSER_EOF)
12100 goto done;
12101 ctxt->inSubset = 2;
12102 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12103 (ctxt->sax->externalSubset != NULL))
12104 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12105 ctxt->extSubSystem, ctxt->extSubURI);
12106 ctxt->inSubset = 0;
12107 xmlCleanSpecialAttr(ctxt);
12108 if (ctxt->instate == XML_PARSER_EOF)
12109 goto done;
12110 ctxt->instate = XML_PARSER_PROLOG;
12111 ctxt->checkIndex = 0;
12112 #ifdef DEBUG_PUSH
12113 xmlGenericError(xmlGenericErrorContext,
12114 "PP: entering PROLOG\n");
12115 #endif
12116 break;
12117 }
12118 case XML_PARSER_COMMENT:
12119 xmlGenericError(xmlGenericErrorContext,
12120 "PP: internal error, state == COMMENT\n");
12121 ctxt->instate = XML_PARSER_CONTENT;
12122 #ifdef DEBUG_PUSH
12123 xmlGenericError(xmlGenericErrorContext,
12124 "PP: entering CONTENT\n");
12125 #endif
12126 break;
12127 case XML_PARSER_IGNORE:
12128 xmlGenericError(xmlGenericErrorContext,
12129 "PP: internal error, state == IGNORE");
12130 ctxt->instate = XML_PARSER_DTD;
12131 #ifdef DEBUG_PUSH
12132 xmlGenericError(xmlGenericErrorContext,
12133 "PP: entering DTD\n");
12134 #endif
12135 break;
12136 case XML_PARSER_PI:
12137 xmlGenericError(xmlGenericErrorContext,
12138 "PP: internal error, state == PI\n");
12139 ctxt->instate = XML_PARSER_CONTENT;
12140 #ifdef DEBUG_PUSH
12141 xmlGenericError(xmlGenericErrorContext,
12142 "PP: entering CONTENT\n");
12143 #endif
12144 break;
12145 case XML_PARSER_ENTITY_DECL:
12146 xmlGenericError(xmlGenericErrorContext,
12147 "PP: internal error, state == ENTITY_DECL\n");
12148 ctxt->instate = XML_PARSER_DTD;
12149 #ifdef DEBUG_PUSH
12150 xmlGenericError(xmlGenericErrorContext,
12151 "PP: entering DTD\n");
12152 #endif
12153 break;
12154 case XML_PARSER_ENTITY_VALUE:
12155 xmlGenericError(xmlGenericErrorContext,
12156 "PP: internal error, state == ENTITY_VALUE\n");
12157 ctxt->instate = XML_PARSER_CONTENT;
12158 #ifdef DEBUG_PUSH
12159 xmlGenericError(xmlGenericErrorContext,
12160 "PP: entering DTD\n");
12161 #endif
12162 break;
12163 case XML_PARSER_ATTRIBUTE_VALUE:
12164 xmlGenericError(xmlGenericErrorContext,
12165 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12166 ctxt->instate = XML_PARSER_START_TAG;
12167 #ifdef DEBUG_PUSH
12168 xmlGenericError(xmlGenericErrorContext,
12169 "PP: entering START_TAG\n");
12170 #endif
12171 break;
12172 case XML_PARSER_SYSTEM_LITERAL:
12173 xmlGenericError(xmlGenericErrorContext,
12174 "PP: internal error, state == SYSTEM_LITERAL\n");
12175 ctxt->instate = XML_PARSER_START_TAG;
12176 #ifdef DEBUG_PUSH
12177 xmlGenericError(xmlGenericErrorContext,
12178 "PP: entering START_TAG\n");
12179 #endif
12180 break;
12181 case XML_PARSER_PUBLIC_LITERAL:
12182 xmlGenericError(xmlGenericErrorContext,
12183 "PP: internal error, state == PUBLIC_LITERAL\n");
12184 ctxt->instate = XML_PARSER_START_TAG;
12185 #ifdef DEBUG_PUSH
12186 xmlGenericError(xmlGenericErrorContext,
12187 "PP: entering START_TAG\n");
12188 #endif
12189 break;
12190 }
12191 }
12192 done:
12193 #ifdef DEBUG_PUSH
12194 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12195 #endif
12196 return(ret);
12197 encoding_error:
12198 {
12199 char buffer[150];
12200
12201 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12202 ctxt->input->cur[0], ctxt->input->cur[1],
12203 ctxt->input->cur[2], ctxt->input->cur[3]);
12204 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12205 "Input is not proper UTF-8, indicate encoding !\n%s",
12206 BAD_CAST buffer, NULL);
12207 }
12208 return(0);
12209 }
12210
12211 /**
12212 * xmlParseCheckTransition:
12213 * @ctxt: an XML parser context
12214 * @chunk: a char array
12215 * @size: the size in byte of the chunk
12216 *
12217 * Check depending on the current parser state if the chunk given must be
12218 * processed immediately or one need more data to advance on parsing.
12219 *
12220 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12221 */
12222 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12223 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12224 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12225 return(-1);
12226 if (ctxt->instate == XML_PARSER_START_TAG) {
12227 if (memchr(chunk, '>', size) != NULL)
12228 return(1);
12229 return(0);
12230 }
12231 if (ctxt->progressive == XML_PARSER_COMMENT) {
12232 if (memchr(chunk, '>', size) != NULL)
12233 return(1);
12234 return(0);
12235 }
12236 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12237 if (memchr(chunk, '>', size) != NULL)
12238 return(1);
12239 return(0);
12240 }
12241 if (ctxt->progressive == XML_PARSER_PI) {
12242 if (memchr(chunk, '>', size) != NULL)
12243 return(1);
12244 return(0);
12245 }
12246 if (ctxt->instate == XML_PARSER_END_TAG) {
12247 if (memchr(chunk, '>', size) != NULL)
12248 return(1);
12249 return(0);
12250 }
12251 if ((ctxt->progressive == XML_PARSER_DTD) ||
12252 (ctxt->instate == XML_PARSER_DTD)) {
12253 if (memchr(chunk, '>', size) != NULL)
12254 return(1);
12255 return(0);
12256 }
12257 return(1);
12258 }
12259
12260 /**
12261 * xmlParseChunk:
12262 * @ctxt: an XML parser context
12263 * @chunk: an char array
12264 * @size: the size in byte of the chunk
12265 * @terminate: last chunk indicator
12266 *
12267 * Parse a Chunk of memory
12268 *
12269 * Returns zero if no error, the xmlParserErrors otherwise.
12270 */
12271 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12272 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12273 int terminate) {
12274 int end_in_lf = 0;
12275 int remain = 0;
12276 size_t old_avail = 0;
12277 size_t avail = 0;
12278
12279 if (ctxt == NULL)
12280 return(XML_ERR_INTERNAL_ERROR);
12281 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12282 return(ctxt->errNo);
12283 if (ctxt->instate == XML_PARSER_EOF)
12284 return(-1);
12285 if (ctxt->instate == XML_PARSER_START)
12286 xmlDetectSAX2(ctxt);
12287 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12288 (chunk[size - 1] == '\r')) {
12289 end_in_lf = 1;
12290 size--;
12291 }
12292
12293 xmldecl_done:
12294
12295 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12296 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12297 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12298 size_t cur = ctxt->input->cur - ctxt->input->base;
12299 int res;
12300
12301 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12302 /*
12303 * Specific handling if we autodetected an encoding, we should not
12304 * push more than the first line ... which depend on the encoding
12305 * And only push the rest once the final encoding was detected
12306 */
12307 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12308 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12309 unsigned int len = 45;
12310
12311 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12312 BAD_CAST "UTF-16")) ||
12313 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12314 BAD_CAST "UTF16")))
12315 len = 90;
12316 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12317 BAD_CAST "UCS-4")) ||
12318 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12319 BAD_CAST "UCS4")))
12320 len = 180;
12321
12322 if (ctxt->input->buf->rawconsumed < len)
12323 len -= ctxt->input->buf->rawconsumed;
12324
12325 /*
12326 * Change size for reading the initial declaration only
12327 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12328 * will blindly copy extra bytes from memory.
12329 */
12330 if ((unsigned int) size > len) {
12331 remain = size - len;
12332 size = len;
12333 } else {
12334 remain = 0;
12335 }
12336 }
12337 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12338 if (res < 0) {
12339 ctxt->errNo = XML_PARSER_EOF;
12340 ctxt->disableSAX = 1;
12341 return (XML_PARSER_EOF);
12342 }
12343 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12344 #ifdef DEBUG_PUSH
12345 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12346 #endif
12347
12348 } else if (ctxt->instate != XML_PARSER_EOF) {
12349 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12350 xmlParserInputBufferPtr in = ctxt->input->buf;
12351 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12352 (in->raw != NULL)) {
12353 int nbchars;
12354 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12355 size_t current = ctxt->input->cur - ctxt->input->base;
12356
12357 nbchars = xmlCharEncInput(in, terminate);
12358 if (nbchars < 0) {
12359 /* TODO 2.6.0 */
12360 xmlGenericError(xmlGenericErrorContext,
12361 "xmlParseChunk: encoder error\n");
12362 return(XML_ERR_INVALID_ENCODING);
12363 }
12364 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12365 }
12366 }
12367 }
12368 if (remain != 0) {
12369 xmlParseTryOrFinish(ctxt, 0);
12370 } else {
12371 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12372 avail = xmlBufUse(ctxt->input->buf->buffer);
12373 /*
12374 * Depending on the current state it may not be such
12375 * a good idea to try parsing if there is nothing in the chunk
12376 * which would be worth doing a parser state transition and we
12377 * need to wait for more data
12378 */
12379 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12380 (old_avail == 0) || (avail == 0) ||
12381 (xmlParseCheckTransition(ctxt,
12382 (const char *)&ctxt->input->base[old_avail],
12383 avail - old_avail)))
12384 xmlParseTryOrFinish(ctxt, terminate);
12385 }
12386 if (ctxt->instate == XML_PARSER_EOF)
12387 return(ctxt->errNo);
12388
12389 if ((ctxt->input != NULL) &&
12390 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12391 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12392 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12393 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12394 ctxt->instate = XML_PARSER_EOF;
12395 }
12396 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12397 return(ctxt->errNo);
12398
12399 if (remain != 0) {
12400 chunk += size;
12401 size = remain;
12402 remain = 0;
12403 goto xmldecl_done;
12404 }
12405 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12406 (ctxt->input->buf != NULL)) {
12407 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12408 ctxt->input);
12409 size_t current = ctxt->input->cur - ctxt->input->base;
12410
12411 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12412
12413 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12414 base, current);
12415 }
12416 if (terminate) {
12417 /*
12418 * Check for termination
12419 */
12420 int cur_avail = 0;
12421
12422 if (ctxt->input != NULL) {
12423 if (ctxt->input->buf == NULL)
12424 cur_avail = ctxt->input->length -
12425 (ctxt->input->cur - ctxt->input->base);
12426 else
12427 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12428 (ctxt->input->cur - ctxt->input->base);
12429 }
12430
12431 if ((ctxt->instate != XML_PARSER_EOF) &&
12432 (ctxt->instate != XML_PARSER_EPILOG)) {
12433 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12434 }
12435 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12436 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12437 }
12438 if (ctxt->instate != XML_PARSER_EOF) {
12439 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12440 ctxt->sax->endDocument(ctxt->userData);
12441 }
12442 ctxt->instate = XML_PARSER_EOF;
12443 }
12444 if (ctxt->wellFormed == 0)
12445 return((xmlParserErrors) ctxt->errNo);
12446 else
12447 return(0);
12448 }
12449
12450 /************************************************************************
12451 * *
12452 * I/O front end functions to the parser *
12453 * *
12454 ************************************************************************/
12455
12456 /**
12457 * xmlCreatePushParserCtxt:
12458 * @sax: a SAX handler
12459 * @user_data: The user data returned on SAX callbacks
12460 * @chunk: a pointer to an array of chars
12461 * @size: number of chars in the array
12462 * @filename: an optional file name or URI
12463 *
12464 * Create a parser context for using the XML parser in push mode.
12465 * If @buffer and @size are non-NULL, the data is used to detect
12466 * the encoding. The remaining characters will be parsed so they
12467 * don't need to be fed in again through xmlParseChunk.
12468 * To allow content encoding detection, @size should be >= 4
12469 * The value of @filename is used for fetching external entities
12470 * and error/warning reports.
12471 *
12472 * Returns the new parser context or NULL
12473 */
12474
12475 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12476 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12477 const char *chunk, int size, const char *filename) {
12478 xmlParserCtxtPtr ctxt;
12479 xmlParserInputPtr inputStream;
12480 xmlParserInputBufferPtr buf;
12481 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12482
12483 /*
12484 * plug some encoding conversion routines
12485 */
12486 if ((chunk != NULL) && (size >= 4))
12487 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12488
12489 buf = xmlAllocParserInputBuffer(enc);
12490 if (buf == NULL) return(NULL);
12491
12492 ctxt = xmlNewParserCtxt();
12493 if (ctxt == NULL) {
12494 xmlErrMemory(NULL, "creating parser: out of memory\n");
12495 xmlFreeParserInputBuffer(buf);
12496 return(NULL);
12497 }
12498 ctxt->dictNames = 1;
12499 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12500 if (ctxt->pushTab == NULL) {
12501 xmlErrMemory(ctxt, NULL);
12502 xmlFreeParserInputBuffer(buf);
12503 xmlFreeParserCtxt(ctxt);
12504 return(NULL);
12505 }
12506 if (sax != NULL) {
12507 #ifdef LIBXML_SAX1_ENABLED
12508 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12509 #endif /* LIBXML_SAX1_ENABLED */
12510 xmlFree(ctxt->sax);
12511 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12512 if (ctxt->sax == NULL) {
12513 xmlErrMemory(ctxt, NULL);
12514 xmlFreeParserInputBuffer(buf);
12515 xmlFreeParserCtxt(ctxt);
12516 return(NULL);
12517 }
12518 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12519 if (sax->initialized == XML_SAX2_MAGIC)
12520 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12521 else
12522 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12523 if (user_data != NULL)
12524 ctxt->userData = user_data;
12525 }
12526 if (filename == NULL) {
12527 ctxt->directory = NULL;
12528 } else {
12529 ctxt->directory = xmlParserGetDirectory(filename);
12530 }
12531
12532 inputStream = xmlNewInputStream(ctxt);
12533 if (inputStream == NULL) {
12534 xmlFreeParserCtxt(ctxt);
12535 xmlFreeParserInputBuffer(buf);
12536 return(NULL);
12537 }
12538
12539 if (filename == NULL)
12540 inputStream->filename = NULL;
12541 else {
12542 inputStream->filename = (char *)
12543 xmlCanonicPath((const xmlChar *) filename);
12544 if (inputStream->filename == NULL) {
12545 xmlFreeParserCtxt(ctxt);
12546 xmlFreeParserInputBuffer(buf);
12547 return(NULL);
12548 }
12549 }
12550 inputStream->buf = buf;
12551 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12552 inputPush(ctxt, inputStream);
12553
12554 /*
12555 * If the caller didn't provide an initial 'chunk' for determining
12556 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12557 * that it can be automatically determined later
12558 */
12559 if ((size == 0) || (chunk == NULL)) {
12560 ctxt->charset = XML_CHAR_ENCODING_NONE;
12561 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12562 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12563 size_t cur = ctxt->input->cur - ctxt->input->base;
12564
12565 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12566
12567 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12568 #ifdef DEBUG_PUSH
12569 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12570 #endif
12571 }
12572
12573 if (enc != XML_CHAR_ENCODING_NONE) {
12574 xmlSwitchEncoding(ctxt, enc);
12575 }
12576
12577 return(ctxt);
12578 }
12579 #endif /* LIBXML_PUSH_ENABLED */
12580
12581 /**
12582 * xmlStopParser:
12583 * @ctxt: an XML parser context
12584 *
12585 * Blocks further parser processing
12586 */
12587 void
xmlStopParser(xmlParserCtxtPtr ctxt)12588 xmlStopParser(xmlParserCtxtPtr ctxt) {
12589 if (ctxt == NULL)
12590 return;
12591 ctxt->instate = XML_PARSER_EOF;
12592 ctxt->errNo = XML_ERR_USER_STOP;
12593 ctxt->disableSAX = 1;
12594 if (ctxt->input != NULL) {
12595 ctxt->input->cur = BAD_CAST"";
12596 ctxt->input->base = ctxt->input->cur;
12597 }
12598 }
12599
12600 /**
12601 * xmlCreateIOParserCtxt:
12602 * @sax: a SAX handler
12603 * @user_data: The user data returned on SAX callbacks
12604 * @ioread: an I/O read function
12605 * @ioclose: an I/O close function
12606 * @ioctx: an I/O handler
12607 * @enc: the charset encoding if known
12608 *
12609 * Create a parser context for using the XML parser with an existing
12610 * I/O stream
12611 *
12612 * Returns the new parser context or NULL
12613 */
12614 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12615 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12616 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12617 void *ioctx, xmlCharEncoding enc) {
12618 xmlParserCtxtPtr ctxt;
12619 xmlParserInputPtr inputStream;
12620 xmlParserInputBufferPtr buf;
12621
12622 if (ioread == NULL) return(NULL);
12623
12624 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12625 if (buf == NULL) {
12626 if (ioclose != NULL)
12627 ioclose(ioctx);
12628 return (NULL);
12629 }
12630
12631 ctxt = xmlNewParserCtxt();
12632 if (ctxt == NULL) {
12633 xmlFreeParserInputBuffer(buf);
12634 return(NULL);
12635 }
12636 if (sax != NULL) {
12637 #ifdef LIBXML_SAX1_ENABLED
12638 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12639 #endif /* LIBXML_SAX1_ENABLED */
12640 xmlFree(ctxt->sax);
12641 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12642 if (ctxt->sax == NULL) {
12643 xmlErrMemory(ctxt, NULL);
12644 xmlFreeParserCtxt(ctxt);
12645 return(NULL);
12646 }
12647 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12648 if (sax->initialized == XML_SAX2_MAGIC)
12649 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12650 else
12651 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12652 if (user_data != NULL)
12653 ctxt->userData = user_data;
12654 }
12655
12656 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12657 if (inputStream == NULL) {
12658 xmlFreeParserCtxt(ctxt);
12659 return(NULL);
12660 }
12661 inputPush(ctxt, inputStream);
12662
12663 return(ctxt);
12664 }
12665
12666 #ifdef LIBXML_VALID_ENABLED
12667 /************************************************************************
12668 * *
12669 * Front ends when parsing a DTD *
12670 * *
12671 ************************************************************************/
12672
12673 /**
12674 * xmlIOParseDTD:
12675 * @sax: the SAX handler block or NULL
12676 * @input: an Input Buffer
12677 * @enc: the charset encoding if known
12678 *
12679 * Load and parse a DTD
12680 *
12681 * Returns the resulting xmlDtdPtr or NULL in case of error.
12682 * @input will be freed by the function in any case.
12683 */
12684
12685 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12686 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12687 xmlCharEncoding enc) {
12688 xmlDtdPtr ret = NULL;
12689 xmlParserCtxtPtr ctxt;
12690 xmlParserInputPtr pinput = NULL;
12691 xmlChar start[4];
12692
12693 if (input == NULL)
12694 return(NULL);
12695
12696 ctxt = xmlNewParserCtxt();
12697 if (ctxt == NULL) {
12698 xmlFreeParserInputBuffer(input);
12699 return(NULL);
12700 }
12701
12702 /* We are loading a DTD */
12703 ctxt->options |= XML_PARSE_DTDLOAD;
12704
12705 /*
12706 * Set-up the SAX context
12707 */
12708 if (sax != NULL) {
12709 if (ctxt->sax != NULL)
12710 xmlFree(ctxt->sax);
12711 ctxt->sax = sax;
12712 ctxt->userData = ctxt;
12713 }
12714 xmlDetectSAX2(ctxt);
12715
12716 /*
12717 * generate a parser input from the I/O handler
12718 */
12719
12720 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12721 if (pinput == NULL) {
12722 if (sax != NULL) ctxt->sax = NULL;
12723 xmlFreeParserInputBuffer(input);
12724 xmlFreeParserCtxt(ctxt);
12725 return(NULL);
12726 }
12727
12728 /*
12729 * plug some encoding conversion routines here.
12730 */
12731 if (xmlPushInput(ctxt, pinput) < 0) {
12732 if (sax != NULL) ctxt->sax = NULL;
12733 xmlFreeParserCtxt(ctxt);
12734 return(NULL);
12735 }
12736 if (enc != XML_CHAR_ENCODING_NONE) {
12737 xmlSwitchEncoding(ctxt, enc);
12738 }
12739
12740 pinput->filename = NULL;
12741 pinput->line = 1;
12742 pinput->col = 1;
12743 pinput->base = ctxt->input->cur;
12744 pinput->cur = ctxt->input->cur;
12745 pinput->free = NULL;
12746
12747 /*
12748 * let's parse that entity knowing it's an external subset.
12749 */
12750 ctxt->inSubset = 2;
12751 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12752 if (ctxt->myDoc == NULL) {
12753 xmlErrMemory(ctxt, "New Doc failed");
12754 return(NULL);
12755 }
12756 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12757 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12758 BAD_CAST "none", BAD_CAST "none");
12759
12760 if ((enc == XML_CHAR_ENCODING_NONE) &&
12761 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12762 /*
12763 * Get the 4 first bytes and decode the charset
12764 * if enc != XML_CHAR_ENCODING_NONE
12765 * plug some encoding conversion routines.
12766 */
12767 start[0] = RAW;
12768 start[1] = NXT(1);
12769 start[2] = NXT(2);
12770 start[3] = NXT(3);
12771 enc = xmlDetectCharEncoding(start, 4);
12772 if (enc != XML_CHAR_ENCODING_NONE) {
12773 xmlSwitchEncoding(ctxt, enc);
12774 }
12775 }
12776
12777 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12778
12779 if (ctxt->myDoc != NULL) {
12780 if (ctxt->wellFormed) {
12781 ret = ctxt->myDoc->extSubset;
12782 ctxt->myDoc->extSubset = NULL;
12783 if (ret != NULL) {
12784 xmlNodePtr tmp;
12785
12786 ret->doc = NULL;
12787 tmp = ret->children;
12788 while (tmp != NULL) {
12789 tmp->doc = NULL;
12790 tmp = tmp->next;
12791 }
12792 }
12793 } else {
12794 ret = NULL;
12795 }
12796 xmlFreeDoc(ctxt->myDoc);
12797 ctxt->myDoc = NULL;
12798 }
12799 if (sax != NULL) ctxt->sax = NULL;
12800 xmlFreeParserCtxt(ctxt);
12801
12802 return(ret);
12803 }
12804
12805 /**
12806 * xmlSAXParseDTD:
12807 * @sax: the SAX handler block
12808 * @ExternalID: a NAME* containing the External ID of the DTD
12809 * @SystemID: a NAME* containing the URL to the DTD
12810 *
12811 * Load and parse an external subset.
12812 *
12813 * Returns the resulting xmlDtdPtr or NULL in case of error.
12814 */
12815
12816 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12817 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12818 const xmlChar *SystemID) {
12819 xmlDtdPtr ret = NULL;
12820 xmlParserCtxtPtr ctxt;
12821 xmlParserInputPtr input = NULL;
12822 xmlCharEncoding enc;
12823 xmlChar* systemIdCanonic;
12824
12825 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12826
12827 ctxt = xmlNewParserCtxt();
12828 if (ctxt == NULL) {
12829 return(NULL);
12830 }
12831
12832 /* We are loading a DTD */
12833 ctxt->options |= XML_PARSE_DTDLOAD;
12834
12835 /*
12836 * Set-up the SAX context
12837 */
12838 if (sax != NULL) {
12839 if (ctxt->sax != NULL)
12840 xmlFree(ctxt->sax);
12841 ctxt->sax = sax;
12842 ctxt->userData = ctxt;
12843 }
12844
12845 /*
12846 * Canonicalise the system ID
12847 */
12848 systemIdCanonic = xmlCanonicPath(SystemID);
12849 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12850 xmlFreeParserCtxt(ctxt);
12851 return(NULL);
12852 }
12853
12854 /*
12855 * Ask the Entity resolver to load the damn thing
12856 */
12857
12858 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12859 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12860 systemIdCanonic);
12861 if (input == NULL) {
12862 if (sax != NULL) ctxt->sax = NULL;
12863 xmlFreeParserCtxt(ctxt);
12864 if (systemIdCanonic != NULL)
12865 xmlFree(systemIdCanonic);
12866 return(NULL);
12867 }
12868
12869 /*
12870 * plug some encoding conversion routines here.
12871 */
12872 if (xmlPushInput(ctxt, input) < 0) {
12873 if (sax != NULL) ctxt->sax = NULL;
12874 xmlFreeParserCtxt(ctxt);
12875 if (systemIdCanonic != NULL)
12876 xmlFree(systemIdCanonic);
12877 return(NULL);
12878 }
12879 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12880 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12881 xmlSwitchEncoding(ctxt, enc);
12882 }
12883
12884 if (input->filename == NULL)
12885 input->filename = (char *) systemIdCanonic;
12886 else
12887 xmlFree(systemIdCanonic);
12888 input->line = 1;
12889 input->col = 1;
12890 input->base = ctxt->input->cur;
12891 input->cur = ctxt->input->cur;
12892 input->free = NULL;
12893
12894 /*
12895 * let's parse that entity knowing it's an external subset.
12896 */
12897 ctxt->inSubset = 2;
12898 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12899 if (ctxt->myDoc == NULL) {
12900 xmlErrMemory(ctxt, "New Doc failed");
12901 if (sax != NULL) ctxt->sax = NULL;
12902 xmlFreeParserCtxt(ctxt);
12903 return(NULL);
12904 }
12905 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12906 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12907 ExternalID, SystemID);
12908 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12909
12910 if (ctxt->myDoc != NULL) {
12911 if (ctxt->wellFormed) {
12912 ret = ctxt->myDoc->extSubset;
12913 ctxt->myDoc->extSubset = NULL;
12914 if (ret != NULL) {
12915 xmlNodePtr tmp;
12916
12917 ret->doc = NULL;
12918 tmp = ret->children;
12919 while (tmp != NULL) {
12920 tmp->doc = NULL;
12921 tmp = tmp->next;
12922 }
12923 }
12924 } else {
12925 ret = NULL;
12926 }
12927 xmlFreeDoc(ctxt->myDoc);
12928 ctxt->myDoc = NULL;
12929 }
12930 if (sax != NULL) ctxt->sax = NULL;
12931 xmlFreeParserCtxt(ctxt);
12932
12933 return(ret);
12934 }
12935
12936
12937 /**
12938 * xmlParseDTD:
12939 * @ExternalID: a NAME* containing the External ID of the DTD
12940 * @SystemID: a NAME* containing the URL to the DTD
12941 *
12942 * Load and parse an external subset.
12943 *
12944 * Returns the resulting xmlDtdPtr or NULL in case of error.
12945 */
12946
12947 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12948 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12949 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12950 }
12951 #endif /* LIBXML_VALID_ENABLED */
12952
12953 /************************************************************************
12954 * *
12955 * Front ends when parsing an Entity *
12956 * *
12957 ************************************************************************/
12958
12959 /**
12960 * xmlParseCtxtExternalEntity:
12961 * @ctx: the existing parsing context
12962 * @URL: the URL for the entity to load
12963 * @ID: the System ID for the entity to load
12964 * @lst: the return value for the set of parsed nodes
12965 *
12966 * Parse an external general entity within an existing parsing context
12967 * An external general parsed entity is well-formed if it matches the
12968 * production labeled extParsedEnt.
12969 *
12970 * [78] extParsedEnt ::= TextDecl? content
12971 *
12972 * Returns 0 if the entity is well formed, -1 in case of args problem and
12973 * the parser error code otherwise
12974 */
12975
12976 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12977 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12978 const xmlChar *ID, xmlNodePtr *lst) {
12979 xmlParserCtxtPtr ctxt;
12980 xmlDocPtr newDoc;
12981 xmlNodePtr newRoot;
12982 xmlSAXHandlerPtr oldsax = NULL;
12983 int ret = 0;
12984 xmlChar start[4];
12985 xmlCharEncoding enc;
12986
12987 if (ctx == NULL) return(-1);
12988
12989 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12990 (ctx->depth > 1024)) {
12991 return(XML_ERR_ENTITY_LOOP);
12992 }
12993
12994 if (lst != NULL)
12995 *lst = NULL;
12996 if ((URL == NULL) && (ID == NULL))
12997 return(-1);
12998 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12999 return(-1);
13000
13001 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
13002 if (ctxt == NULL) {
13003 return(-1);
13004 }
13005
13006 oldsax = ctxt->sax;
13007 ctxt->sax = ctx->sax;
13008 xmlDetectSAX2(ctxt);
13009 newDoc = xmlNewDoc(BAD_CAST "1.0");
13010 if (newDoc == NULL) {
13011 xmlFreeParserCtxt(ctxt);
13012 return(-1);
13013 }
13014 newDoc->properties = XML_DOC_INTERNAL;
13015 if (ctx->myDoc->dict) {
13016 newDoc->dict = ctx->myDoc->dict;
13017 xmlDictReference(newDoc->dict);
13018 }
13019 if (ctx->myDoc != NULL) {
13020 newDoc->intSubset = ctx->myDoc->intSubset;
13021 newDoc->extSubset = ctx->myDoc->extSubset;
13022 }
13023 if (ctx->myDoc->URL != NULL) {
13024 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13025 }
13026 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13027 if (newRoot == NULL) {
13028 ctxt->sax = oldsax;
13029 xmlFreeParserCtxt(ctxt);
13030 newDoc->intSubset = NULL;
13031 newDoc->extSubset = NULL;
13032 xmlFreeDoc(newDoc);
13033 return(-1);
13034 }
13035 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13036 nodePush(ctxt, newDoc->children);
13037 if (ctx->myDoc == NULL) {
13038 ctxt->myDoc = newDoc;
13039 } else {
13040 ctxt->myDoc = ctx->myDoc;
13041 newDoc->children->doc = ctx->myDoc;
13042 }
13043
13044 /*
13045 * Get the 4 first bytes and decode the charset
13046 * if enc != XML_CHAR_ENCODING_NONE
13047 * plug some encoding conversion routines.
13048 */
13049 GROW
13050 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13051 start[0] = RAW;
13052 start[1] = NXT(1);
13053 start[2] = NXT(2);
13054 start[3] = NXT(3);
13055 enc = xmlDetectCharEncoding(start, 4);
13056 if (enc != XML_CHAR_ENCODING_NONE) {
13057 xmlSwitchEncoding(ctxt, enc);
13058 }
13059 }
13060
13061 /*
13062 * Parse a possible text declaration first
13063 */
13064 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13065 xmlParseTextDecl(ctxt);
13066 /*
13067 * An XML-1.0 document can't reference an entity not XML-1.0
13068 */
13069 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13070 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13071 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13072 "Version mismatch between document and entity\n");
13073 }
13074 }
13075
13076 /*
13077 * If the user provided its own SAX callbacks then reuse the
13078 * useData callback field, otherwise the expected setup in a
13079 * DOM builder is to have userData == ctxt
13080 */
13081 if (ctx->userData == ctx)
13082 ctxt->userData = ctxt;
13083 else
13084 ctxt->userData = ctx->userData;
13085
13086 /*
13087 * Doing validity checking on chunk doesn't make sense
13088 */
13089 ctxt->instate = XML_PARSER_CONTENT;
13090 ctxt->validate = ctx->validate;
13091 ctxt->valid = ctx->valid;
13092 ctxt->loadsubset = ctx->loadsubset;
13093 ctxt->depth = ctx->depth + 1;
13094 ctxt->replaceEntities = ctx->replaceEntities;
13095 if (ctxt->validate) {
13096 ctxt->vctxt.error = ctx->vctxt.error;
13097 ctxt->vctxt.warning = ctx->vctxt.warning;
13098 } else {
13099 ctxt->vctxt.error = NULL;
13100 ctxt->vctxt.warning = NULL;
13101 }
13102 ctxt->vctxt.nodeTab = NULL;
13103 ctxt->vctxt.nodeNr = 0;
13104 ctxt->vctxt.nodeMax = 0;
13105 ctxt->vctxt.node = NULL;
13106 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13107 ctxt->dict = ctx->dict;
13108 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13109 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13110 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13111 ctxt->dictNames = ctx->dictNames;
13112 ctxt->attsDefault = ctx->attsDefault;
13113 ctxt->attsSpecial = ctx->attsSpecial;
13114 ctxt->linenumbers = ctx->linenumbers;
13115
13116 xmlParseContent(ctxt);
13117
13118 ctx->validate = ctxt->validate;
13119 ctx->valid = ctxt->valid;
13120 if ((RAW == '<') && (NXT(1) == '/')) {
13121 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13122 } else if (RAW != 0) {
13123 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13124 }
13125 if (ctxt->node != newDoc->children) {
13126 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13127 }
13128
13129 if (!ctxt->wellFormed) {
13130 if (ctxt->errNo == 0)
13131 ret = 1;
13132 else
13133 ret = ctxt->errNo;
13134 } else {
13135 if (lst != NULL) {
13136 xmlNodePtr cur;
13137
13138 /*
13139 * Return the newly created nodeset after unlinking it from
13140 * they pseudo parent.
13141 */
13142 cur = newDoc->children->children;
13143 *lst = cur;
13144 while (cur != NULL) {
13145 cur->parent = NULL;
13146 cur = cur->next;
13147 }
13148 newDoc->children->children = NULL;
13149 }
13150 ret = 0;
13151 }
13152 ctxt->sax = oldsax;
13153 ctxt->dict = NULL;
13154 ctxt->attsDefault = NULL;
13155 ctxt->attsSpecial = NULL;
13156 xmlFreeParserCtxt(ctxt);
13157 newDoc->intSubset = NULL;
13158 newDoc->extSubset = NULL;
13159 xmlFreeDoc(newDoc);
13160
13161 return(ret);
13162 }
13163
13164 /**
13165 * xmlParseExternalEntityPrivate:
13166 * @doc: the document the chunk pertains to
13167 * @oldctxt: the previous parser context if available
13168 * @sax: the SAX handler bloc (possibly NULL)
13169 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13170 * @depth: Used for loop detection, use 0
13171 * @URL: the URL for the entity to load
13172 * @ID: the System ID for the entity to load
13173 * @list: the return value for the set of parsed nodes
13174 *
13175 * Private version of xmlParseExternalEntity()
13176 *
13177 * Returns 0 if the entity is well formed, -1 in case of args problem and
13178 * the parser error code otherwise
13179 */
13180
13181 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13182 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13183 xmlSAXHandlerPtr sax,
13184 void *user_data, int depth, const xmlChar *URL,
13185 const xmlChar *ID, xmlNodePtr *list) {
13186 xmlParserCtxtPtr ctxt;
13187 xmlDocPtr newDoc;
13188 xmlNodePtr newRoot;
13189 xmlSAXHandlerPtr oldsax = NULL;
13190 xmlParserErrors ret = XML_ERR_OK;
13191 xmlChar start[4];
13192 xmlCharEncoding enc;
13193
13194 if (((depth > 40) &&
13195 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13196 (depth > 1024)) {
13197 return(XML_ERR_ENTITY_LOOP);
13198 }
13199
13200 if (list != NULL)
13201 *list = NULL;
13202 if ((URL == NULL) && (ID == NULL))
13203 return(XML_ERR_INTERNAL_ERROR);
13204 if (doc == NULL)
13205 return(XML_ERR_INTERNAL_ERROR);
13206
13207
13208 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13209 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13210 ctxt->userData = ctxt;
13211 if (oldctxt != NULL) {
13212 ctxt->_private = oldctxt->_private;
13213 ctxt->loadsubset = oldctxt->loadsubset;
13214 ctxt->validate = oldctxt->validate;
13215 ctxt->external = oldctxt->external;
13216 ctxt->record_info = oldctxt->record_info;
13217 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13218 ctxt->node_seq.length = oldctxt->node_seq.length;
13219 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13220 } else {
13221 /*
13222 * Doing validity checking on chunk without context
13223 * doesn't make sense
13224 */
13225 ctxt->_private = NULL;
13226 ctxt->validate = 0;
13227 ctxt->external = 2;
13228 ctxt->loadsubset = 0;
13229 }
13230 if (sax != NULL) {
13231 oldsax = ctxt->sax;
13232 ctxt->sax = sax;
13233 if (user_data != NULL)
13234 ctxt->userData = user_data;
13235 }
13236 xmlDetectSAX2(ctxt);
13237 newDoc = xmlNewDoc(BAD_CAST "1.0");
13238 if (newDoc == NULL) {
13239 ctxt->node_seq.maximum = 0;
13240 ctxt->node_seq.length = 0;
13241 ctxt->node_seq.buffer = NULL;
13242 xmlFreeParserCtxt(ctxt);
13243 return(XML_ERR_INTERNAL_ERROR);
13244 }
13245 newDoc->properties = XML_DOC_INTERNAL;
13246 newDoc->intSubset = doc->intSubset;
13247 newDoc->extSubset = doc->extSubset;
13248 newDoc->dict = doc->dict;
13249 xmlDictReference(newDoc->dict);
13250
13251 if (doc->URL != NULL) {
13252 newDoc->URL = xmlStrdup(doc->URL);
13253 }
13254 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13255 if (newRoot == NULL) {
13256 if (sax != NULL)
13257 ctxt->sax = oldsax;
13258 ctxt->node_seq.maximum = 0;
13259 ctxt->node_seq.length = 0;
13260 ctxt->node_seq.buffer = NULL;
13261 xmlFreeParserCtxt(ctxt);
13262 newDoc->intSubset = NULL;
13263 newDoc->extSubset = NULL;
13264 xmlFreeDoc(newDoc);
13265 return(XML_ERR_INTERNAL_ERROR);
13266 }
13267 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13268 nodePush(ctxt, newDoc->children);
13269 ctxt->myDoc = doc;
13270 newRoot->doc = doc;
13271
13272 /*
13273 * Get the 4 first bytes and decode the charset
13274 * if enc != XML_CHAR_ENCODING_NONE
13275 * plug some encoding conversion routines.
13276 */
13277 GROW;
13278 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13279 start[0] = RAW;
13280 start[1] = NXT(1);
13281 start[2] = NXT(2);
13282 start[3] = NXT(3);
13283 enc = xmlDetectCharEncoding(start, 4);
13284 if (enc != XML_CHAR_ENCODING_NONE) {
13285 xmlSwitchEncoding(ctxt, enc);
13286 }
13287 }
13288
13289 /*
13290 * Parse a possible text declaration first
13291 */
13292 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13293 xmlParseTextDecl(ctxt);
13294 }
13295
13296 ctxt->instate = XML_PARSER_CONTENT;
13297 ctxt->depth = depth;
13298
13299 xmlParseContent(ctxt);
13300
13301 if ((RAW == '<') && (NXT(1) == '/')) {
13302 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13303 } else if (RAW != 0) {
13304 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13305 }
13306 if (ctxt->node != newDoc->children) {
13307 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13308 }
13309
13310 if (!ctxt->wellFormed) {
13311 if (ctxt->errNo == 0)
13312 ret = XML_ERR_INTERNAL_ERROR;
13313 else
13314 ret = (xmlParserErrors)ctxt->errNo;
13315 } else {
13316 if (list != NULL) {
13317 xmlNodePtr cur;
13318
13319 /*
13320 * Return the newly created nodeset after unlinking it from
13321 * they pseudo parent.
13322 */
13323 cur = newDoc->children->children;
13324 *list = cur;
13325 while (cur != NULL) {
13326 cur->parent = NULL;
13327 cur = cur->next;
13328 }
13329 newDoc->children->children = NULL;
13330 }
13331 ret = XML_ERR_OK;
13332 }
13333
13334 /*
13335 * Record in the parent context the number of entities replacement
13336 * done when parsing that reference.
13337 */
13338 if (oldctxt != NULL)
13339 oldctxt->nbentities += ctxt->nbentities;
13340
13341 /*
13342 * Also record the size of the entity parsed
13343 */
13344 if (ctxt->input != NULL) {
13345 oldctxt->sizeentities += ctxt->input->consumed;
13346 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13347 }
13348 /*
13349 * And record the last error if any
13350 */
13351 if (ctxt->lastError.code != XML_ERR_OK)
13352 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13353
13354 if (sax != NULL)
13355 ctxt->sax = oldsax;
13356 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13357 oldctxt->node_seq.length = ctxt->node_seq.length;
13358 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13359 ctxt->node_seq.maximum = 0;
13360 ctxt->node_seq.length = 0;
13361 ctxt->node_seq.buffer = NULL;
13362 xmlFreeParserCtxt(ctxt);
13363 newDoc->intSubset = NULL;
13364 newDoc->extSubset = NULL;
13365 xmlFreeDoc(newDoc);
13366
13367 return(ret);
13368 }
13369
13370 #ifdef LIBXML_SAX1_ENABLED
13371 /**
13372 * xmlParseExternalEntity:
13373 * @doc: the document the chunk pertains to
13374 * @sax: the SAX handler bloc (possibly NULL)
13375 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13376 * @depth: Used for loop detection, use 0
13377 * @URL: the URL for the entity to load
13378 * @ID: the System ID for the entity to load
13379 * @lst: the return value for the set of parsed nodes
13380 *
13381 * Parse an external general entity
13382 * An external general parsed entity is well-formed if it matches the
13383 * production labeled extParsedEnt.
13384 *
13385 * [78] extParsedEnt ::= TextDecl? content
13386 *
13387 * Returns 0 if the entity is well formed, -1 in case of args problem and
13388 * the parser error code otherwise
13389 */
13390
13391 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13392 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13393 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13394 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13395 ID, lst));
13396 }
13397
13398 /**
13399 * xmlParseBalancedChunkMemory:
13400 * @doc: the document the chunk pertains to
13401 * @sax: the SAX handler bloc (possibly NULL)
13402 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13403 * @depth: Used for loop detection, use 0
13404 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13405 * @lst: the return value for the set of parsed nodes
13406 *
13407 * Parse a well-balanced chunk of an XML document
13408 * called by the parser
13409 * The allowed sequence for the Well Balanced Chunk is the one defined by
13410 * the content production in the XML grammar:
13411 *
13412 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13413 *
13414 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13415 * the parser error code otherwise
13416 */
13417
13418 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13419 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13420 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13421 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13422 depth, string, lst, 0 );
13423 }
13424 #endif /* LIBXML_SAX1_ENABLED */
13425
13426 /**
13427 * xmlParseBalancedChunkMemoryInternal:
13428 * @oldctxt: the existing parsing context
13429 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13430 * @user_data: the user data field for the parser context
13431 * @lst: the return value for the set of parsed nodes
13432 *
13433 *
13434 * Parse a well-balanced chunk of an XML document
13435 * called by the parser
13436 * The allowed sequence for the Well Balanced Chunk is the one defined by
13437 * the content production in the XML grammar:
13438 *
13439 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13440 *
13441 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13442 * error code otherwise
13443 *
13444 * In case recover is set to 1, the nodelist will not be empty even if
13445 * the parsed chunk is not well balanced.
13446 */
13447 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13448 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13449 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13450 xmlParserCtxtPtr ctxt;
13451 xmlDocPtr newDoc = NULL;
13452 xmlNodePtr newRoot;
13453 xmlSAXHandlerPtr oldsax = NULL;
13454 xmlNodePtr content = NULL;
13455 xmlNodePtr last = NULL;
13456 int size;
13457 xmlParserErrors ret = XML_ERR_OK;
13458 #ifdef SAX2
13459 int i;
13460 #endif
13461
13462 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13463 (oldctxt->depth > 1024)) {
13464 return(XML_ERR_ENTITY_LOOP);
13465 }
13466
13467
13468 if (lst != NULL)
13469 *lst = NULL;
13470 if (string == NULL)
13471 return(XML_ERR_INTERNAL_ERROR);
13472
13473 size = xmlStrlen(string);
13474
13475 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13476 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13477 if (user_data != NULL)
13478 ctxt->userData = user_data;
13479 else
13480 ctxt->userData = ctxt;
13481 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13482 ctxt->dict = oldctxt->dict;
13483 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13484 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13485 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13486
13487 #ifdef SAX2
13488 /* propagate namespaces down the entity */
13489 for (i = 0;i < oldctxt->nsNr;i += 2) {
13490 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13491 }
13492 #endif
13493
13494 oldsax = ctxt->sax;
13495 ctxt->sax = oldctxt->sax;
13496 xmlDetectSAX2(ctxt);
13497 ctxt->replaceEntities = oldctxt->replaceEntities;
13498 ctxt->options = oldctxt->options;
13499
13500 ctxt->_private = oldctxt->_private;
13501 if (oldctxt->myDoc == NULL) {
13502 newDoc = xmlNewDoc(BAD_CAST "1.0");
13503 if (newDoc == NULL) {
13504 ctxt->sax = oldsax;
13505 ctxt->dict = NULL;
13506 xmlFreeParserCtxt(ctxt);
13507 return(XML_ERR_INTERNAL_ERROR);
13508 }
13509 newDoc->properties = XML_DOC_INTERNAL;
13510 newDoc->dict = ctxt->dict;
13511 xmlDictReference(newDoc->dict);
13512 ctxt->myDoc = newDoc;
13513 } else {
13514 ctxt->myDoc = oldctxt->myDoc;
13515 content = ctxt->myDoc->children;
13516 last = ctxt->myDoc->last;
13517 }
13518 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13519 if (newRoot == NULL) {
13520 ctxt->sax = oldsax;
13521 ctxt->dict = NULL;
13522 xmlFreeParserCtxt(ctxt);
13523 if (newDoc != NULL) {
13524 xmlFreeDoc(newDoc);
13525 }
13526 return(XML_ERR_INTERNAL_ERROR);
13527 }
13528 ctxt->myDoc->children = NULL;
13529 ctxt->myDoc->last = NULL;
13530 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13531 nodePush(ctxt, ctxt->myDoc->children);
13532 ctxt->instate = XML_PARSER_CONTENT;
13533 ctxt->depth = oldctxt->depth + 1;
13534
13535 ctxt->validate = 0;
13536 ctxt->loadsubset = oldctxt->loadsubset;
13537 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13538 /*
13539 * ID/IDREF registration will be done in xmlValidateElement below
13540 */
13541 ctxt->loadsubset |= XML_SKIP_IDS;
13542 }
13543 ctxt->dictNames = oldctxt->dictNames;
13544 ctxt->attsDefault = oldctxt->attsDefault;
13545 ctxt->attsSpecial = oldctxt->attsSpecial;
13546
13547 xmlParseContent(ctxt);
13548 if ((RAW == '<') && (NXT(1) == '/')) {
13549 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13550 } else if (RAW != 0) {
13551 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13552 }
13553 if (ctxt->node != ctxt->myDoc->children) {
13554 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13555 }
13556
13557 if (!ctxt->wellFormed) {
13558 if (ctxt->errNo == 0)
13559 ret = XML_ERR_INTERNAL_ERROR;
13560 else
13561 ret = (xmlParserErrors)ctxt->errNo;
13562 } else {
13563 ret = XML_ERR_OK;
13564 }
13565
13566 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13567 xmlNodePtr cur;
13568
13569 /*
13570 * Return the newly created nodeset after unlinking it from
13571 * they pseudo parent.
13572 */
13573 cur = ctxt->myDoc->children->children;
13574 *lst = cur;
13575 while (cur != NULL) {
13576 #ifdef LIBXML_VALID_ENABLED
13577 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13578 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13579 (cur->type == XML_ELEMENT_NODE)) {
13580 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13581 oldctxt->myDoc, cur);
13582 }
13583 #endif /* LIBXML_VALID_ENABLED */
13584 cur->parent = NULL;
13585 cur = cur->next;
13586 }
13587 ctxt->myDoc->children->children = NULL;
13588 }
13589 if (ctxt->myDoc != NULL) {
13590 xmlFreeNode(ctxt->myDoc->children);
13591 ctxt->myDoc->children = content;
13592 ctxt->myDoc->last = last;
13593 }
13594
13595 /*
13596 * Record in the parent context the number of entities replacement
13597 * done when parsing that reference.
13598 */
13599 if (oldctxt != NULL)
13600 oldctxt->nbentities += ctxt->nbentities;
13601
13602 /*
13603 * Also record the last error if any
13604 */
13605 if (ctxt->lastError.code != XML_ERR_OK)
13606 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13607
13608 ctxt->sax = oldsax;
13609 ctxt->dict = NULL;
13610 ctxt->attsDefault = NULL;
13611 ctxt->attsSpecial = NULL;
13612 xmlFreeParserCtxt(ctxt);
13613 if (newDoc != NULL) {
13614 xmlFreeDoc(newDoc);
13615 }
13616
13617 return(ret);
13618 }
13619
13620 /**
13621 * xmlParseInNodeContext:
13622 * @node: the context node
13623 * @data: the input string
13624 * @datalen: the input string length in bytes
13625 * @options: a combination of xmlParserOption
13626 * @lst: the return value for the set of parsed nodes
13627 *
13628 * Parse a well-balanced chunk of an XML document
13629 * within the context (DTD, namespaces, etc ...) of the given node.
13630 *
13631 * The allowed sequence for the data is a Well Balanced Chunk defined by
13632 * the content production in the XML grammar:
13633 *
13634 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13635 *
13636 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13637 * error code otherwise
13638 */
13639 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13640 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13641 int options, xmlNodePtr *lst) {
13642 #ifdef SAX2
13643 xmlParserCtxtPtr ctxt;
13644 xmlDocPtr doc = NULL;
13645 xmlNodePtr fake, cur;
13646 int nsnr = 0;
13647
13648 xmlParserErrors ret = XML_ERR_OK;
13649
13650 /*
13651 * check all input parameters, grab the document
13652 */
13653 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13654 return(XML_ERR_INTERNAL_ERROR);
13655 switch (node->type) {
13656 case XML_ELEMENT_NODE:
13657 case XML_ATTRIBUTE_NODE:
13658 case XML_TEXT_NODE:
13659 case XML_CDATA_SECTION_NODE:
13660 case XML_ENTITY_REF_NODE:
13661 case XML_PI_NODE:
13662 case XML_COMMENT_NODE:
13663 case XML_DOCUMENT_NODE:
13664 case XML_HTML_DOCUMENT_NODE:
13665 break;
13666 default:
13667 return(XML_ERR_INTERNAL_ERROR);
13668
13669 }
13670 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13671 (node->type != XML_DOCUMENT_NODE) &&
13672 (node->type != XML_HTML_DOCUMENT_NODE))
13673 node = node->parent;
13674 if (node == NULL)
13675 return(XML_ERR_INTERNAL_ERROR);
13676 if (node->type == XML_ELEMENT_NODE)
13677 doc = node->doc;
13678 else
13679 doc = (xmlDocPtr) node;
13680 if (doc == NULL)
13681 return(XML_ERR_INTERNAL_ERROR);
13682
13683 /*
13684 * allocate a context and set-up everything not related to the
13685 * node position in the tree
13686 */
13687 if (doc->type == XML_DOCUMENT_NODE)
13688 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13689 #ifdef LIBXML_HTML_ENABLED
13690 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13691 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13692 /*
13693 * When parsing in context, it makes no sense to add implied
13694 * elements like html/body/etc...
13695 */
13696 options |= HTML_PARSE_NOIMPLIED;
13697 }
13698 #endif
13699 else
13700 return(XML_ERR_INTERNAL_ERROR);
13701
13702 if (ctxt == NULL)
13703 return(XML_ERR_NO_MEMORY);
13704
13705 /*
13706 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13707 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13708 * we must wait until the last moment to free the original one.
13709 */
13710 if (doc->dict != NULL) {
13711 if (ctxt->dict != NULL)
13712 xmlDictFree(ctxt->dict);
13713 ctxt->dict = doc->dict;
13714 } else
13715 options |= XML_PARSE_NODICT;
13716
13717 if (doc->encoding != NULL) {
13718 xmlCharEncodingHandlerPtr hdlr;
13719
13720 if (ctxt->encoding != NULL)
13721 xmlFree((xmlChar *) ctxt->encoding);
13722 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13723
13724 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13725 if (hdlr != NULL) {
13726 xmlSwitchToEncoding(ctxt, hdlr);
13727 } else {
13728 return(XML_ERR_UNSUPPORTED_ENCODING);
13729 }
13730 }
13731
13732 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13733 xmlDetectSAX2(ctxt);
13734 ctxt->myDoc = doc;
13735 /* parsing in context, i.e. as within existing content */
13736 ctxt->instate = XML_PARSER_CONTENT;
13737
13738 fake = xmlNewComment(NULL);
13739 if (fake == NULL) {
13740 xmlFreeParserCtxt(ctxt);
13741 return(XML_ERR_NO_MEMORY);
13742 }
13743 xmlAddChild(node, fake);
13744
13745 if (node->type == XML_ELEMENT_NODE) {
13746 nodePush(ctxt, node);
13747 /*
13748 * initialize the SAX2 namespaces stack
13749 */
13750 cur = node;
13751 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13752 xmlNsPtr ns = cur->nsDef;
13753 const xmlChar *iprefix, *ihref;
13754
13755 while (ns != NULL) {
13756 if (ctxt->dict) {
13757 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13758 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13759 } else {
13760 iprefix = ns->prefix;
13761 ihref = ns->href;
13762 }
13763
13764 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13765 nsPush(ctxt, iprefix, ihref);
13766 nsnr++;
13767 }
13768 ns = ns->next;
13769 }
13770 cur = cur->parent;
13771 }
13772 }
13773
13774 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13775 /*
13776 * ID/IDREF registration will be done in xmlValidateElement below
13777 */
13778 ctxt->loadsubset |= XML_SKIP_IDS;
13779 }
13780
13781 #ifdef LIBXML_HTML_ENABLED
13782 if (doc->type == XML_HTML_DOCUMENT_NODE)
13783 __htmlParseContent(ctxt);
13784 else
13785 #endif
13786 xmlParseContent(ctxt);
13787
13788 nsPop(ctxt, nsnr);
13789 if ((RAW == '<') && (NXT(1) == '/')) {
13790 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13791 } else if (RAW != 0) {
13792 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13793 }
13794 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13795 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13796 ctxt->wellFormed = 0;
13797 }
13798
13799 if (!ctxt->wellFormed) {
13800 if (ctxt->errNo == 0)
13801 ret = XML_ERR_INTERNAL_ERROR;
13802 else
13803 ret = (xmlParserErrors)ctxt->errNo;
13804 } else {
13805 ret = XML_ERR_OK;
13806 }
13807
13808 /*
13809 * Return the newly created nodeset after unlinking it from
13810 * the pseudo sibling.
13811 */
13812
13813 cur = fake->next;
13814 fake->next = NULL;
13815 node->last = fake;
13816
13817 if (cur != NULL) {
13818 cur->prev = NULL;
13819 }
13820
13821 *lst = cur;
13822
13823 while (cur != NULL) {
13824 cur->parent = NULL;
13825 cur = cur->next;
13826 }
13827
13828 xmlUnlinkNode(fake);
13829 xmlFreeNode(fake);
13830
13831
13832 if (ret != XML_ERR_OK) {
13833 xmlFreeNodeList(*lst);
13834 *lst = NULL;
13835 }
13836
13837 if (doc->dict != NULL)
13838 ctxt->dict = NULL;
13839 xmlFreeParserCtxt(ctxt);
13840
13841 return(ret);
13842 #else /* !SAX2 */
13843 return(XML_ERR_INTERNAL_ERROR);
13844 #endif
13845 }
13846
13847 #ifdef LIBXML_SAX1_ENABLED
13848 /**
13849 * xmlParseBalancedChunkMemoryRecover:
13850 * @doc: the document the chunk pertains to
13851 * @sax: the SAX handler bloc (possibly NULL)
13852 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13853 * @depth: Used for loop detection, use 0
13854 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13855 * @lst: the return value for the set of parsed nodes
13856 * @recover: return nodes even if the data is broken (use 0)
13857 *
13858 *
13859 * Parse a well-balanced chunk of an XML document
13860 * called by the parser
13861 * The allowed sequence for the Well Balanced Chunk is the one defined by
13862 * the content production in the XML grammar:
13863 *
13864 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13865 *
13866 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13867 * the parser error code otherwise
13868 *
13869 * In case recover is set to 1, the nodelist will not be empty even if
13870 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13871 * some extent.
13872 */
13873 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13874 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13875 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13876 int recover) {
13877 xmlParserCtxtPtr ctxt;
13878 xmlDocPtr newDoc;
13879 xmlSAXHandlerPtr oldsax = NULL;
13880 xmlNodePtr content, newRoot;
13881 int size;
13882 int ret = 0;
13883
13884 if (depth > 40) {
13885 return(XML_ERR_ENTITY_LOOP);
13886 }
13887
13888
13889 if (lst != NULL)
13890 *lst = NULL;
13891 if (string == NULL)
13892 return(-1);
13893
13894 size = xmlStrlen(string);
13895
13896 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13897 if (ctxt == NULL) return(-1);
13898 ctxt->userData = ctxt;
13899 if (sax != NULL) {
13900 oldsax = ctxt->sax;
13901 ctxt->sax = sax;
13902 if (user_data != NULL)
13903 ctxt->userData = user_data;
13904 }
13905 newDoc = xmlNewDoc(BAD_CAST "1.0");
13906 if (newDoc == NULL) {
13907 xmlFreeParserCtxt(ctxt);
13908 return(-1);
13909 }
13910 newDoc->properties = XML_DOC_INTERNAL;
13911 if ((doc != NULL) && (doc->dict != NULL)) {
13912 xmlDictFree(ctxt->dict);
13913 ctxt->dict = doc->dict;
13914 xmlDictReference(ctxt->dict);
13915 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13916 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13917 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13918 ctxt->dictNames = 1;
13919 } else {
13920 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13921 }
13922 if (doc != NULL) {
13923 newDoc->intSubset = doc->intSubset;
13924 newDoc->extSubset = doc->extSubset;
13925 }
13926 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13927 if (newRoot == NULL) {
13928 if (sax != NULL)
13929 ctxt->sax = oldsax;
13930 xmlFreeParserCtxt(ctxt);
13931 newDoc->intSubset = NULL;
13932 newDoc->extSubset = NULL;
13933 xmlFreeDoc(newDoc);
13934 return(-1);
13935 }
13936 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13937 nodePush(ctxt, newRoot);
13938 if (doc == NULL) {
13939 ctxt->myDoc = newDoc;
13940 } else {
13941 ctxt->myDoc = newDoc;
13942 newDoc->children->doc = doc;
13943 /* Ensure that doc has XML spec namespace */
13944 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13945 newDoc->oldNs = doc->oldNs;
13946 }
13947 ctxt->instate = XML_PARSER_CONTENT;
13948 ctxt->depth = depth;
13949
13950 /*
13951 * Doing validity checking on chunk doesn't make sense
13952 */
13953 ctxt->validate = 0;
13954 ctxt->loadsubset = 0;
13955 xmlDetectSAX2(ctxt);
13956
13957 if ( doc != NULL ){
13958 content = doc->children;
13959 doc->children = NULL;
13960 xmlParseContent(ctxt);
13961 doc->children = content;
13962 }
13963 else {
13964 xmlParseContent(ctxt);
13965 }
13966 if ((RAW == '<') && (NXT(1) == '/')) {
13967 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13968 } else if (RAW != 0) {
13969 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13970 }
13971 if (ctxt->node != newDoc->children) {
13972 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13973 }
13974
13975 if (!ctxt->wellFormed) {
13976 if (ctxt->errNo == 0)
13977 ret = 1;
13978 else
13979 ret = ctxt->errNo;
13980 } else {
13981 ret = 0;
13982 }
13983
13984 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13985 xmlNodePtr cur;
13986
13987 /*
13988 * Return the newly created nodeset after unlinking it from
13989 * they pseudo parent.
13990 */
13991 cur = newDoc->children->children;
13992 *lst = cur;
13993 while (cur != NULL) {
13994 xmlSetTreeDoc(cur, doc);
13995 cur->parent = NULL;
13996 cur = cur->next;
13997 }
13998 newDoc->children->children = NULL;
13999 }
14000
14001 if (sax != NULL)
14002 ctxt->sax = oldsax;
14003 xmlFreeParserCtxt(ctxt);
14004 newDoc->intSubset = NULL;
14005 newDoc->extSubset = NULL;
14006 newDoc->oldNs = NULL;
14007 xmlFreeDoc(newDoc);
14008
14009 return(ret);
14010 }
14011
14012 /**
14013 * xmlSAXParseEntity:
14014 * @sax: the SAX handler block
14015 * @filename: the filename
14016 *
14017 * parse an XML external entity out of context and build a tree.
14018 * It use the given SAX function block to handle the parsing callback.
14019 * If sax is NULL, fallback to the default DOM tree building routines.
14020 *
14021 * [78] extParsedEnt ::= TextDecl? content
14022 *
14023 * This correspond to a "Well Balanced" chunk
14024 *
14025 * Returns the resulting document tree
14026 */
14027
14028 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)14029 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14030 xmlDocPtr ret;
14031 xmlParserCtxtPtr ctxt;
14032
14033 ctxt = xmlCreateFileParserCtxt(filename);
14034 if (ctxt == NULL) {
14035 return(NULL);
14036 }
14037 if (sax != NULL) {
14038 if (ctxt->sax != NULL)
14039 xmlFree(ctxt->sax);
14040 ctxt->sax = sax;
14041 ctxt->userData = NULL;
14042 }
14043
14044 xmlParseExtParsedEnt(ctxt);
14045
14046 if (ctxt->wellFormed)
14047 ret = ctxt->myDoc;
14048 else {
14049 ret = NULL;
14050 xmlFreeDoc(ctxt->myDoc);
14051 ctxt->myDoc = NULL;
14052 }
14053 if (sax != NULL)
14054 ctxt->sax = NULL;
14055 xmlFreeParserCtxt(ctxt);
14056
14057 return(ret);
14058 }
14059
14060 /**
14061 * xmlParseEntity:
14062 * @filename: the filename
14063 *
14064 * parse an XML external entity out of context and build a tree.
14065 *
14066 * [78] extParsedEnt ::= TextDecl? content
14067 *
14068 * This correspond to a "Well Balanced" chunk
14069 *
14070 * Returns the resulting document tree
14071 */
14072
14073 xmlDocPtr
xmlParseEntity(const char * filename)14074 xmlParseEntity(const char *filename) {
14075 return(xmlSAXParseEntity(NULL, filename));
14076 }
14077 #endif /* LIBXML_SAX1_ENABLED */
14078
14079 /**
14080 * xmlCreateEntityParserCtxtInternal:
14081 * @URL: the entity URL
14082 * @ID: the entity PUBLIC ID
14083 * @base: a possible base for the target URI
14084 * @pctx: parser context used to set options on new context
14085 *
14086 * Create a parser context for an external entity
14087 * Automatic support for ZLIB/Compress compressed document is provided
14088 * by default if found at compile-time.
14089 *
14090 * Returns the new parser context or NULL
14091 */
14092 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)14093 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14094 const xmlChar *base, xmlParserCtxtPtr pctx) {
14095 xmlParserCtxtPtr ctxt;
14096 xmlParserInputPtr inputStream;
14097 char *directory = NULL;
14098 xmlChar *uri;
14099
14100 ctxt = xmlNewParserCtxt();
14101 if (ctxt == NULL) {
14102 return(NULL);
14103 }
14104
14105 if (pctx != NULL) {
14106 ctxt->options = pctx->options;
14107 ctxt->_private = pctx->_private;
14108 }
14109
14110 uri = xmlBuildURI(URL, base);
14111
14112 if (uri == NULL) {
14113 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14114 if (inputStream == NULL) {
14115 xmlFreeParserCtxt(ctxt);
14116 return(NULL);
14117 }
14118
14119 inputPush(ctxt, inputStream);
14120
14121 if ((ctxt->directory == NULL) && (directory == NULL))
14122 directory = xmlParserGetDirectory((char *)URL);
14123 if ((ctxt->directory == NULL) && (directory != NULL))
14124 ctxt->directory = directory;
14125 } else {
14126 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14127 if (inputStream == NULL) {
14128 xmlFree(uri);
14129 xmlFreeParserCtxt(ctxt);
14130 return(NULL);
14131 }
14132
14133 inputPush(ctxt, inputStream);
14134
14135 if ((ctxt->directory == NULL) && (directory == NULL))
14136 directory = xmlParserGetDirectory((char *)uri);
14137 if ((ctxt->directory == NULL) && (directory != NULL))
14138 ctxt->directory = directory;
14139 xmlFree(uri);
14140 }
14141 return(ctxt);
14142 }
14143
14144 /**
14145 * xmlCreateEntityParserCtxt:
14146 * @URL: the entity URL
14147 * @ID: the entity PUBLIC ID
14148 * @base: a possible base for the target URI
14149 *
14150 * Create a parser context for an external entity
14151 * Automatic support for ZLIB/Compress compressed document is provided
14152 * by default if found at compile-time.
14153 *
14154 * Returns the new parser context or NULL
14155 */
14156 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14157 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14158 const xmlChar *base) {
14159 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14160
14161 }
14162
14163 /************************************************************************
14164 * *
14165 * Front ends when parsing from a file *
14166 * *
14167 ************************************************************************/
14168
14169 /**
14170 * xmlCreateURLParserCtxt:
14171 * @filename: the filename or URL
14172 * @options: a combination of xmlParserOption
14173 *
14174 * Create a parser context for a file or URL content.
14175 * Automatic support for ZLIB/Compress compressed document is provided
14176 * by default if found at compile-time and for file accesses
14177 *
14178 * Returns the new parser context or NULL
14179 */
14180 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14181 xmlCreateURLParserCtxt(const char *filename, int options)
14182 {
14183 xmlParserCtxtPtr ctxt;
14184 xmlParserInputPtr inputStream;
14185 char *directory = NULL;
14186
14187 ctxt = xmlNewParserCtxt();
14188 if (ctxt == NULL) {
14189 xmlErrMemory(NULL, "cannot allocate parser context");
14190 return(NULL);
14191 }
14192
14193 if (options)
14194 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14195 ctxt->linenumbers = 1;
14196
14197 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14198 if (inputStream == NULL) {
14199 xmlFreeParserCtxt(ctxt);
14200 return(NULL);
14201 }
14202
14203 inputPush(ctxt, inputStream);
14204 if ((ctxt->directory == NULL) && (directory == NULL))
14205 directory = xmlParserGetDirectory(filename);
14206 if ((ctxt->directory == NULL) && (directory != NULL))
14207 ctxt->directory = directory;
14208
14209 return(ctxt);
14210 }
14211
14212 /**
14213 * xmlCreateFileParserCtxt:
14214 * @filename: the filename
14215 *
14216 * Create a parser context for a file content.
14217 * Automatic support for ZLIB/Compress compressed document is provided
14218 * by default if found at compile-time.
14219 *
14220 * Returns the new parser context or NULL
14221 */
14222 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14223 xmlCreateFileParserCtxt(const char *filename)
14224 {
14225 return(xmlCreateURLParserCtxt(filename, 0));
14226 }
14227
14228 #ifdef LIBXML_SAX1_ENABLED
14229 /**
14230 * xmlSAXParseFileWithData:
14231 * @sax: the SAX handler block
14232 * @filename: the filename
14233 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14234 * documents
14235 * @data: the userdata
14236 *
14237 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14238 * compressed document is provided by default if found at compile-time.
14239 * It use the given SAX function block to handle the parsing callback.
14240 * If sax is NULL, fallback to the default DOM tree building routines.
14241 *
14242 * User data (void *) is stored within the parser context in the
14243 * context's _private member, so it is available nearly everywhere in libxml
14244 *
14245 * Returns the resulting document tree
14246 */
14247
14248 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14249 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14250 int recovery, void *data) {
14251 xmlDocPtr ret;
14252 xmlParserCtxtPtr ctxt;
14253
14254 xmlInitParser();
14255
14256 ctxt = xmlCreateFileParserCtxt(filename);
14257 if (ctxt == NULL) {
14258 return(NULL);
14259 }
14260 if (sax != NULL) {
14261 if (ctxt->sax != NULL)
14262 xmlFree(ctxt->sax);
14263 ctxt->sax = sax;
14264 }
14265 xmlDetectSAX2(ctxt);
14266 if (data!=NULL) {
14267 ctxt->_private = data;
14268 }
14269
14270 if (ctxt->directory == NULL)
14271 ctxt->directory = xmlParserGetDirectory(filename);
14272
14273 ctxt->recovery = recovery;
14274
14275 xmlParseDocument(ctxt);
14276
14277 if ((ctxt->wellFormed) || recovery) {
14278 ret = ctxt->myDoc;
14279 if (ret != NULL) {
14280 if (ctxt->input->buf->compressed > 0)
14281 ret->compression = 9;
14282 else
14283 ret->compression = ctxt->input->buf->compressed;
14284 }
14285 }
14286 else {
14287 ret = NULL;
14288 xmlFreeDoc(ctxt->myDoc);
14289 ctxt->myDoc = NULL;
14290 }
14291 if (sax != NULL)
14292 ctxt->sax = NULL;
14293 xmlFreeParserCtxt(ctxt);
14294
14295 return(ret);
14296 }
14297
14298 /**
14299 * xmlSAXParseFile:
14300 * @sax: the SAX handler block
14301 * @filename: the filename
14302 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14303 * documents
14304 *
14305 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14306 * compressed document is provided by default if found at compile-time.
14307 * It use the given SAX function block to handle the parsing callback.
14308 * If sax is NULL, fallback to the default DOM tree building routines.
14309 *
14310 * Returns the resulting document tree
14311 */
14312
14313 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14314 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14315 int recovery) {
14316 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14317 }
14318
14319 /**
14320 * xmlRecoverDoc:
14321 * @cur: a pointer to an array of xmlChar
14322 *
14323 * parse an XML in-memory document and build a tree.
14324 * In the case the document is not Well Formed, a attempt to build a
14325 * tree is tried anyway
14326 *
14327 * Returns the resulting document tree or NULL in case of failure
14328 */
14329
14330 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14331 xmlRecoverDoc(const xmlChar *cur) {
14332 return(xmlSAXParseDoc(NULL, cur, 1));
14333 }
14334
14335 /**
14336 * xmlParseFile:
14337 * @filename: the filename
14338 *
14339 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14340 * compressed document is provided by default if found at compile-time.
14341 *
14342 * Returns the resulting document tree if the file was wellformed,
14343 * NULL otherwise.
14344 */
14345
14346 xmlDocPtr
xmlParseFile(const char * filename)14347 xmlParseFile(const char *filename) {
14348 return(xmlSAXParseFile(NULL, filename, 0));
14349 }
14350
14351 /**
14352 * xmlRecoverFile:
14353 * @filename: the filename
14354 *
14355 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14356 * compressed document is provided by default if found at compile-time.
14357 * In the case the document is not Well Formed, it attempts to build
14358 * a tree anyway
14359 *
14360 * Returns the resulting document tree or NULL in case of failure
14361 */
14362
14363 xmlDocPtr
xmlRecoverFile(const char * filename)14364 xmlRecoverFile(const char *filename) {
14365 return(xmlSAXParseFile(NULL, filename, 1));
14366 }
14367
14368
14369 /**
14370 * xmlSetupParserForBuffer:
14371 * @ctxt: an XML parser context
14372 * @buffer: a xmlChar * buffer
14373 * @filename: a file name
14374 *
14375 * Setup the parser context to parse a new buffer; Clears any prior
14376 * contents from the parser context. The buffer parameter must not be
14377 * NULL, but the filename parameter can be
14378 */
14379 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14380 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14381 const char* filename)
14382 {
14383 xmlParserInputPtr input;
14384
14385 if ((ctxt == NULL) || (buffer == NULL))
14386 return;
14387
14388 input = xmlNewInputStream(ctxt);
14389 if (input == NULL) {
14390 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14391 xmlClearParserCtxt(ctxt);
14392 return;
14393 }
14394
14395 xmlClearParserCtxt(ctxt);
14396 if (filename != NULL)
14397 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14398 input->base = buffer;
14399 input->cur = buffer;
14400 input->end = &buffer[xmlStrlen(buffer)];
14401 inputPush(ctxt, input);
14402 }
14403
14404 /**
14405 * xmlSAXUserParseFile:
14406 * @sax: a SAX handler
14407 * @user_data: The user data returned on SAX callbacks
14408 * @filename: a file name
14409 *
14410 * parse an XML file and call the given SAX handler routines.
14411 * Automatic support for ZLIB/Compress compressed document is provided
14412 *
14413 * Returns 0 in case of success or a error number otherwise
14414 */
14415 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14416 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14417 const char *filename) {
14418 int ret = 0;
14419 xmlParserCtxtPtr ctxt;
14420
14421 ctxt = xmlCreateFileParserCtxt(filename);
14422 if (ctxt == NULL) return -1;
14423 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14424 xmlFree(ctxt->sax);
14425 ctxt->sax = sax;
14426 xmlDetectSAX2(ctxt);
14427
14428 if (user_data != NULL)
14429 ctxt->userData = user_data;
14430
14431 xmlParseDocument(ctxt);
14432
14433 if (ctxt->wellFormed)
14434 ret = 0;
14435 else {
14436 if (ctxt->errNo != 0)
14437 ret = ctxt->errNo;
14438 else
14439 ret = -1;
14440 }
14441 if (sax != NULL)
14442 ctxt->sax = NULL;
14443 if (ctxt->myDoc != NULL) {
14444 xmlFreeDoc(ctxt->myDoc);
14445 ctxt->myDoc = NULL;
14446 }
14447 xmlFreeParserCtxt(ctxt);
14448
14449 return ret;
14450 }
14451 #endif /* LIBXML_SAX1_ENABLED */
14452
14453 /************************************************************************
14454 * *
14455 * Front ends when parsing from memory *
14456 * *
14457 ************************************************************************/
14458
14459 /**
14460 * xmlCreateMemoryParserCtxt:
14461 * @buffer: a pointer to a char array
14462 * @size: the size of the array
14463 *
14464 * Create a parser context for an XML in-memory document.
14465 *
14466 * Returns the new parser context or NULL
14467 */
14468 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14469 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14470 xmlParserCtxtPtr ctxt;
14471 xmlParserInputPtr input;
14472 xmlParserInputBufferPtr buf;
14473
14474 if (buffer == NULL)
14475 return(NULL);
14476 if (size <= 0)
14477 return(NULL);
14478
14479 ctxt = xmlNewParserCtxt();
14480 if (ctxt == NULL)
14481 return(NULL);
14482
14483 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14484 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14485 if (buf == NULL) {
14486 xmlFreeParserCtxt(ctxt);
14487 return(NULL);
14488 }
14489
14490 input = xmlNewInputStream(ctxt);
14491 if (input == NULL) {
14492 xmlFreeParserInputBuffer(buf);
14493 xmlFreeParserCtxt(ctxt);
14494 return(NULL);
14495 }
14496
14497 input->filename = NULL;
14498 input->buf = buf;
14499 xmlBufResetInput(input->buf->buffer, input);
14500
14501 inputPush(ctxt, input);
14502 return(ctxt);
14503 }
14504
14505 #ifdef LIBXML_SAX1_ENABLED
14506 /**
14507 * xmlSAXParseMemoryWithData:
14508 * @sax: the SAX handler block
14509 * @buffer: an pointer to a char array
14510 * @size: the size of the array
14511 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14512 * documents
14513 * @data: the userdata
14514 *
14515 * parse an XML in-memory block and use the given SAX function block
14516 * to handle the parsing callback. If sax is NULL, fallback to the default
14517 * DOM tree building routines.
14518 *
14519 * User data (void *) is stored within the parser context in the
14520 * context's _private member, so it is available nearly everywhere in libxml
14521 *
14522 * Returns the resulting document tree
14523 */
14524
14525 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14526 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14527 int size, int recovery, void *data) {
14528 xmlDocPtr ret;
14529 xmlParserCtxtPtr ctxt;
14530
14531 xmlInitParser();
14532
14533 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14534 if (ctxt == NULL) return(NULL);
14535 if (sax != NULL) {
14536 if (ctxt->sax != NULL)
14537 xmlFree(ctxt->sax);
14538 ctxt->sax = sax;
14539 }
14540 xmlDetectSAX2(ctxt);
14541 if (data!=NULL) {
14542 ctxt->_private=data;
14543 }
14544
14545 ctxt->recovery = recovery;
14546
14547 xmlParseDocument(ctxt);
14548
14549 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14550 else {
14551 ret = NULL;
14552 xmlFreeDoc(ctxt->myDoc);
14553 ctxt->myDoc = NULL;
14554 }
14555 if (sax != NULL)
14556 ctxt->sax = NULL;
14557 xmlFreeParserCtxt(ctxt);
14558
14559 return(ret);
14560 }
14561
14562 /**
14563 * xmlSAXParseMemory:
14564 * @sax: the SAX handler block
14565 * @buffer: an pointer to a char array
14566 * @size: the size of the array
14567 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14568 * documents
14569 *
14570 * parse an XML in-memory block and use the given SAX function block
14571 * to handle the parsing callback. If sax is NULL, fallback to the default
14572 * DOM tree building routines.
14573 *
14574 * Returns the resulting document tree
14575 */
14576 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14577 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14578 int size, int recovery) {
14579 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14580 }
14581
14582 /**
14583 * xmlParseMemory:
14584 * @buffer: an pointer to a char array
14585 * @size: the size of the array
14586 *
14587 * parse an XML in-memory block and build a tree.
14588 *
14589 * Returns the resulting document tree
14590 */
14591
xmlParseMemory(const char * buffer,int size)14592 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14593 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14594 }
14595
14596 /**
14597 * xmlRecoverMemory:
14598 * @buffer: an pointer to a char array
14599 * @size: the size of the array
14600 *
14601 * parse an XML in-memory block and build a tree.
14602 * In the case the document is not Well Formed, an attempt to
14603 * build a tree is tried anyway
14604 *
14605 * Returns the resulting document tree or NULL in case of error
14606 */
14607
xmlRecoverMemory(const char * buffer,int size)14608 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14609 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14610 }
14611
14612 /**
14613 * xmlSAXUserParseMemory:
14614 * @sax: a SAX handler
14615 * @user_data: The user data returned on SAX callbacks
14616 * @buffer: an in-memory XML document input
14617 * @size: the length of the XML document in bytes
14618 *
14619 * A better SAX parsing routine.
14620 * parse an XML in-memory buffer and call the given SAX handler routines.
14621 *
14622 * Returns 0 in case of success or a error number otherwise
14623 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14624 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14625 const char *buffer, int size) {
14626 int ret = 0;
14627 xmlParserCtxtPtr ctxt;
14628
14629 xmlInitParser();
14630
14631 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14632 if (ctxt == NULL) return -1;
14633 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14634 xmlFree(ctxt->sax);
14635 ctxt->sax = sax;
14636 xmlDetectSAX2(ctxt);
14637
14638 if (user_data != NULL)
14639 ctxt->userData = user_data;
14640
14641 xmlParseDocument(ctxt);
14642
14643 if (ctxt->wellFormed)
14644 ret = 0;
14645 else {
14646 if (ctxt->errNo != 0)
14647 ret = ctxt->errNo;
14648 else
14649 ret = -1;
14650 }
14651 if (sax != NULL)
14652 ctxt->sax = NULL;
14653 if (ctxt->myDoc != NULL) {
14654 xmlFreeDoc(ctxt->myDoc);
14655 ctxt->myDoc = NULL;
14656 }
14657 xmlFreeParserCtxt(ctxt);
14658
14659 return ret;
14660 }
14661 #endif /* LIBXML_SAX1_ENABLED */
14662
14663 /**
14664 * xmlCreateDocParserCtxt:
14665 * @cur: a pointer to an array of xmlChar
14666 *
14667 * Creates a parser context for an XML in-memory document.
14668 *
14669 * Returns the new parser context or NULL
14670 */
14671 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14672 xmlCreateDocParserCtxt(const xmlChar *cur) {
14673 int len;
14674
14675 if (cur == NULL)
14676 return(NULL);
14677 len = xmlStrlen(cur);
14678 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14679 }
14680
14681 #ifdef LIBXML_SAX1_ENABLED
14682 /**
14683 * xmlSAXParseDoc:
14684 * @sax: the SAX handler block
14685 * @cur: a pointer to an array of xmlChar
14686 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14687 * documents
14688 *
14689 * parse an XML in-memory document and build a tree.
14690 * It use the given SAX function block to handle the parsing callback.
14691 * If sax is NULL, fallback to the default DOM tree building routines.
14692 *
14693 * Returns the resulting document tree
14694 */
14695
14696 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14697 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14698 xmlDocPtr ret;
14699 xmlParserCtxtPtr ctxt;
14700 xmlSAXHandlerPtr oldsax = NULL;
14701
14702 if (cur == NULL) return(NULL);
14703
14704
14705 ctxt = xmlCreateDocParserCtxt(cur);
14706 if (ctxt == NULL) return(NULL);
14707 if (sax != NULL) {
14708 oldsax = ctxt->sax;
14709 ctxt->sax = sax;
14710 ctxt->userData = NULL;
14711 }
14712 xmlDetectSAX2(ctxt);
14713
14714 xmlParseDocument(ctxt);
14715 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14716 else {
14717 ret = NULL;
14718 xmlFreeDoc(ctxt->myDoc);
14719 ctxt->myDoc = NULL;
14720 }
14721 if (sax != NULL)
14722 ctxt->sax = oldsax;
14723 xmlFreeParserCtxt(ctxt);
14724
14725 return(ret);
14726 }
14727
14728 /**
14729 * xmlParseDoc:
14730 * @cur: a pointer to an array of xmlChar
14731 *
14732 * parse an XML in-memory document and build a tree.
14733 *
14734 * Returns the resulting document tree
14735 */
14736
14737 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14738 xmlParseDoc(const xmlChar *cur) {
14739 return(xmlSAXParseDoc(NULL, cur, 0));
14740 }
14741 #endif /* LIBXML_SAX1_ENABLED */
14742
14743 #ifdef LIBXML_LEGACY_ENABLED
14744 /************************************************************************
14745 * *
14746 * Specific function to keep track of entities references *
14747 * and used by the XSLT debugger *
14748 * *
14749 ************************************************************************/
14750
14751 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14752
14753 /**
14754 * xmlAddEntityReference:
14755 * @ent : A valid entity
14756 * @firstNode : A valid first node for children of entity
14757 * @lastNode : A valid last node of children entity
14758 *
14759 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14760 */
14761 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14762 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14763 xmlNodePtr lastNode)
14764 {
14765 if (xmlEntityRefFunc != NULL) {
14766 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14767 }
14768 }
14769
14770
14771 /**
14772 * xmlSetEntityReferenceFunc:
14773 * @func: A valid function
14774 *
14775 * Set the function to call call back when a xml reference has been made
14776 */
14777 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14778 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14779 {
14780 xmlEntityRefFunc = func;
14781 }
14782 #endif /* LIBXML_LEGACY_ENABLED */
14783
14784 /************************************************************************
14785 * *
14786 * Miscellaneous *
14787 * *
14788 ************************************************************************/
14789
14790 #ifdef LIBXML_XPATH_ENABLED
14791 #include <libxml/xpath.h>
14792 #endif
14793
14794 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14795 static int xmlParserInitialized = 0;
14796
14797 /**
14798 * xmlInitParser:
14799 *
14800 * Initialization function for the XML parser.
14801 * This is not reentrant. Call once before processing in case of
14802 * use in multithreaded programs.
14803 */
14804
14805 void
xmlInitParser(void)14806 xmlInitParser(void) {
14807 if (xmlParserInitialized != 0)
14808 return;
14809
14810 #ifdef LIBXML_THREAD_ENABLED
14811 __xmlGlobalInitMutexLock();
14812 if (xmlParserInitialized == 0) {
14813 #endif
14814 xmlInitThreads();
14815 xmlInitGlobals();
14816 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14817 (xmlGenericError == NULL))
14818 initGenericErrorDefaultFunc(NULL);
14819 xmlInitMemory();
14820 xmlInitializeDict();
14821 xmlInitCharEncodingHandlers();
14822 xmlDefaultSAXHandlerInit();
14823 xmlRegisterDefaultInputCallbacks();
14824 #ifdef LIBXML_OUTPUT_ENABLED
14825 xmlRegisterDefaultOutputCallbacks();
14826 #endif /* LIBXML_OUTPUT_ENABLED */
14827 #ifdef LIBXML_HTML_ENABLED
14828 htmlInitAutoClose();
14829 htmlDefaultSAXHandlerInit();
14830 #endif
14831 #ifdef LIBXML_XPATH_ENABLED
14832 xmlXPathInit();
14833 #endif
14834 xmlParserInitialized = 1;
14835 #ifdef LIBXML_THREAD_ENABLED
14836 }
14837 __xmlGlobalInitMutexUnlock();
14838 #endif
14839 }
14840
14841 /**
14842 * xmlCleanupParser:
14843 *
14844 * This function name is somewhat misleading. It does not clean up
14845 * parser state, it cleans up memory allocated by the library itself.
14846 * It is a cleanup function for the XML library. It tries to reclaim all
14847 * related global memory allocated for the library processing.
14848 * It doesn't deallocate any document related memory. One should
14849 * call xmlCleanupParser() only when the process has finished using
14850 * the library and all XML/HTML documents built with it.
14851 * See also xmlInitParser() which has the opposite function of preparing
14852 * the library for operations.
14853 *
14854 * WARNING: if your application is multithreaded or has plugin support
14855 * calling this may crash the application if another thread or
14856 * a plugin is still using libxml2. It's sometimes very hard to
14857 * guess if libxml2 is in use in the application, some libraries
14858 * or plugins may use it without notice. In case of doubt abstain
14859 * from calling this function or do it just before calling exit()
14860 * to avoid leak reports from valgrind !
14861 */
14862
14863 void
xmlCleanupParser(void)14864 xmlCleanupParser(void) {
14865 if (!xmlParserInitialized)
14866 return;
14867
14868 xmlCleanupCharEncodingHandlers();
14869 #ifdef LIBXML_CATALOG_ENABLED
14870 xmlCatalogCleanup();
14871 #endif
14872 xmlDictCleanup();
14873 xmlCleanupInputCallbacks();
14874 #ifdef LIBXML_OUTPUT_ENABLED
14875 xmlCleanupOutputCallbacks();
14876 #endif
14877 #ifdef LIBXML_SCHEMAS_ENABLED
14878 xmlSchemaCleanupTypes();
14879 xmlRelaxNGCleanupTypes();
14880 #endif
14881 xmlResetLastError();
14882 xmlCleanupGlobals();
14883 xmlCleanupThreads(); /* must be last if called not from the main thread */
14884 xmlCleanupMemory();
14885 xmlParserInitialized = 0;
14886 }
14887
14888 /************************************************************************
14889 * *
14890 * New set (2.6.0) of simpler and more flexible APIs *
14891 * *
14892 ************************************************************************/
14893
14894 /**
14895 * DICT_FREE:
14896 * @str: a string
14897 *
14898 * Free a string if it is not owned by the "dict" dictionnary in the
14899 * current scope
14900 */
14901 #define DICT_FREE(str) \
14902 if ((str) && ((!dict) || \
14903 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14904 xmlFree((char *)(str));
14905
14906 /**
14907 * xmlCtxtReset:
14908 * @ctxt: an XML parser context
14909 *
14910 * Reset a parser context
14911 */
14912 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14913 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14914 {
14915 xmlParserInputPtr input;
14916 xmlDictPtr dict;
14917
14918 if (ctxt == NULL)
14919 return;
14920
14921 dict = ctxt->dict;
14922
14923 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14924 xmlFreeInputStream(input);
14925 }
14926 ctxt->inputNr = 0;
14927 ctxt->input = NULL;
14928
14929 ctxt->spaceNr = 0;
14930 if (ctxt->spaceTab != NULL) {
14931 ctxt->spaceTab[0] = -1;
14932 ctxt->space = &ctxt->spaceTab[0];
14933 } else {
14934 ctxt->space = NULL;
14935 }
14936
14937
14938 ctxt->nodeNr = 0;
14939 ctxt->node = NULL;
14940
14941 ctxt->nameNr = 0;
14942 ctxt->name = NULL;
14943
14944 DICT_FREE(ctxt->version);
14945 ctxt->version = NULL;
14946 DICT_FREE(ctxt->encoding);
14947 ctxt->encoding = NULL;
14948 DICT_FREE(ctxt->directory);
14949 ctxt->directory = NULL;
14950 DICT_FREE(ctxt->extSubURI);
14951 ctxt->extSubURI = NULL;
14952 DICT_FREE(ctxt->extSubSystem);
14953 ctxt->extSubSystem = NULL;
14954 if (ctxt->myDoc != NULL)
14955 xmlFreeDoc(ctxt->myDoc);
14956 ctxt->myDoc = NULL;
14957
14958 ctxt->standalone = -1;
14959 ctxt->hasExternalSubset = 0;
14960 ctxt->hasPErefs = 0;
14961 ctxt->html = 0;
14962 ctxt->external = 0;
14963 ctxt->instate = XML_PARSER_START;
14964 ctxt->token = 0;
14965
14966 ctxt->wellFormed = 1;
14967 ctxt->nsWellFormed = 1;
14968 ctxt->disableSAX = 0;
14969 ctxt->valid = 1;
14970 #if 0
14971 ctxt->vctxt.userData = ctxt;
14972 ctxt->vctxt.error = xmlParserValidityError;
14973 ctxt->vctxt.warning = xmlParserValidityWarning;
14974 #endif
14975 ctxt->record_info = 0;
14976 ctxt->nbChars = 0;
14977 ctxt->checkIndex = 0;
14978 ctxt->inSubset = 0;
14979 ctxt->errNo = XML_ERR_OK;
14980 ctxt->depth = 0;
14981 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14982 ctxt->catalogs = NULL;
14983 ctxt->nbentities = 0;
14984 ctxt->sizeentities = 0;
14985 ctxt->sizeentcopy = 0;
14986 xmlInitNodeInfoSeq(&ctxt->node_seq);
14987
14988 if (ctxt->attsDefault != NULL) {
14989 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14990 ctxt->attsDefault = NULL;
14991 }
14992 if (ctxt->attsSpecial != NULL) {
14993 xmlHashFree(ctxt->attsSpecial, NULL);
14994 ctxt->attsSpecial = NULL;
14995 }
14996
14997 #ifdef LIBXML_CATALOG_ENABLED
14998 if (ctxt->catalogs != NULL)
14999 xmlCatalogFreeLocal(ctxt->catalogs);
15000 #endif
15001 if (ctxt->lastError.code != XML_ERR_OK)
15002 xmlResetError(&ctxt->lastError);
15003 }
15004
15005 /**
15006 * xmlCtxtResetPush:
15007 * @ctxt: an XML parser context
15008 * @chunk: a pointer to an array of chars
15009 * @size: number of chars in the array
15010 * @filename: an optional file name or URI
15011 * @encoding: the document encoding, or NULL
15012 *
15013 * Reset a push parser context
15014 *
15015 * Returns 0 in case of success and 1 in case of error
15016 */
15017 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)15018 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15019 int size, const char *filename, const char *encoding)
15020 {
15021 xmlParserInputPtr inputStream;
15022 xmlParserInputBufferPtr buf;
15023 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15024
15025 if (ctxt == NULL)
15026 return(1);
15027
15028 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15029 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15030
15031 buf = xmlAllocParserInputBuffer(enc);
15032 if (buf == NULL)
15033 return(1);
15034
15035 if (ctxt == NULL) {
15036 xmlFreeParserInputBuffer(buf);
15037 return(1);
15038 }
15039
15040 xmlCtxtReset(ctxt);
15041
15042 if (ctxt->pushTab == NULL) {
15043 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15044 sizeof(xmlChar *));
15045 if (ctxt->pushTab == NULL) {
15046 xmlErrMemory(ctxt, NULL);
15047 xmlFreeParserInputBuffer(buf);
15048 return(1);
15049 }
15050 }
15051
15052 if (filename == NULL) {
15053 ctxt->directory = NULL;
15054 } else {
15055 ctxt->directory = xmlParserGetDirectory(filename);
15056 }
15057
15058 inputStream = xmlNewInputStream(ctxt);
15059 if (inputStream == NULL) {
15060 xmlFreeParserInputBuffer(buf);
15061 return(1);
15062 }
15063
15064 if (filename == NULL)
15065 inputStream->filename = NULL;
15066 else
15067 inputStream->filename = (char *)
15068 xmlCanonicPath((const xmlChar *) filename);
15069 inputStream->buf = buf;
15070 xmlBufResetInput(buf->buffer, inputStream);
15071
15072 inputPush(ctxt, inputStream);
15073
15074 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15075 (ctxt->input->buf != NULL)) {
15076 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15077 size_t cur = ctxt->input->cur - ctxt->input->base;
15078
15079 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15080
15081 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15082 #ifdef DEBUG_PUSH
15083 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15084 #endif
15085 }
15086
15087 if (encoding != NULL) {
15088 xmlCharEncodingHandlerPtr hdlr;
15089
15090 if (ctxt->encoding != NULL)
15091 xmlFree((xmlChar *) ctxt->encoding);
15092 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15093
15094 hdlr = xmlFindCharEncodingHandler(encoding);
15095 if (hdlr != NULL) {
15096 xmlSwitchToEncoding(ctxt, hdlr);
15097 } else {
15098 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15099 "Unsupported encoding %s\n", BAD_CAST encoding);
15100 }
15101 } else if (enc != XML_CHAR_ENCODING_NONE) {
15102 xmlSwitchEncoding(ctxt, enc);
15103 }
15104
15105 return(0);
15106 }
15107
15108
15109 /**
15110 * xmlCtxtUseOptionsInternal:
15111 * @ctxt: an XML parser context
15112 * @options: a combination of xmlParserOption
15113 * @encoding: the user provided encoding to use
15114 *
15115 * Applies the options to the parser context
15116 *
15117 * Returns 0 in case of success, the set of unknown or unimplemented options
15118 * in case of error.
15119 */
15120 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15121 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15122 {
15123 if (ctxt == NULL)
15124 return(-1);
15125 if (encoding != NULL) {
15126 if (ctxt->encoding != NULL)
15127 xmlFree((xmlChar *) ctxt->encoding);
15128 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15129 }
15130 if (options & XML_PARSE_RECOVER) {
15131 ctxt->recovery = 1;
15132 options -= XML_PARSE_RECOVER;
15133 ctxt->options |= XML_PARSE_RECOVER;
15134 } else
15135 ctxt->recovery = 0;
15136 if (options & XML_PARSE_DTDLOAD) {
15137 ctxt->loadsubset = XML_DETECT_IDS;
15138 options -= XML_PARSE_DTDLOAD;
15139 ctxt->options |= XML_PARSE_DTDLOAD;
15140 } else
15141 ctxt->loadsubset = 0;
15142 if (options & XML_PARSE_DTDATTR) {
15143 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15144 options -= XML_PARSE_DTDATTR;
15145 ctxt->options |= XML_PARSE_DTDATTR;
15146 }
15147 if (options & XML_PARSE_NOENT) {
15148 ctxt->replaceEntities = 1;
15149 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15150 options -= XML_PARSE_NOENT;
15151 ctxt->options |= XML_PARSE_NOENT;
15152 } else
15153 ctxt->replaceEntities = 0;
15154 if (options & XML_PARSE_PEDANTIC) {
15155 ctxt->pedantic = 1;
15156 options -= XML_PARSE_PEDANTIC;
15157 ctxt->options |= XML_PARSE_PEDANTIC;
15158 } else
15159 ctxt->pedantic = 0;
15160 if (options & XML_PARSE_NOBLANKS) {
15161 ctxt->keepBlanks = 0;
15162 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15163 options -= XML_PARSE_NOBLANKS;
15164 ctxt->options |= XML_PARSE_NOBLANKS;
15165 } else
15166 ctxt->keepBlanks = 1;
15167 if (options & XML_PARSE_DTDVALID) {
15168 ctxt->validate = 1;
15169 if (options & XML_PARSE_NOWARNING)
15170 ctxt->vctxt.warning = NULL;
15171 if (options & XML_PARSE_NOERROR)
15172 ctxt->vctxt.error = NULL;
15173 options -= XML_PARSE_DTDVALID;
15174 ctxt->options |= XML_PARSE_DTDVALID;
15175 } else
15176 ctxt->validate = 0;
15177 if (options & XML_PARSE_NOWARNING) {
15178 ctxt->sax->warning = NULL;
15179 options -= XML_PARSE_NOWARNING;
15180 }
15181 if (options & XML_PARSE_NOERROR) {
15182 ctxt->sax->error = NULL;
15183 ctxt->sax->fatalError = NULL;
15184 options -= XML_PARSE_NOERROR;
15185 }
15186 #ifdef LIBXML_SAX1_ENABLED
15187 if (options & XML_PARSE_SAX1) {
15188 ctxt->sax->startElement = xmlSAX2StartElement;
15189 ctxt->sax->endElement = xmlSAX2EndElement;
15190 ctxt->sax->startElementNs = NULL;
15191 ctxt->sax->endElementNs = NULL;
15192 ctxt->sax->initialized = 1;
15193 options -= XML_PARSE_SAX1;
15194 ctxt->options |= XML_PARSE_SAX1;
15195 }
15196 #endif /* LIBXML_SAX1_ENABLED */
15197 if (options & XML_PARSE_NODICT) {
15198 ctxt->dictNames = 0;
15199 options -= XML_PARSE_NODICT;
15200 ctxt->options |= XML_PARSE_NODICT;
15201 } else {
15202 ctxt->dictNames = 1;
15203 }
15204 if (options & XML_PARSE_NOCDATA) {
15205 ctxt->sax->cdataBlock = NULL;
15206 options -= XML_PARSE_NOCDATA;
15207 ctxt->options |= XML_PARSE_NOCDATA;
15208 }
15209 if (options & XML_PARSE_NSCLEAN) {
15210 ctxt->options |= XML_PARSE_NSCLEAN;
15211 options -= XML_PARSE_NSCLEAN;
15212 }
15213 if (options & XML_PARSE_NONET) {
15214 ctxt->options |= XML_PARSE_NONET;
15215 options -= XML_PARSE_NONET;
15216 }
15217 if (options & XML_PARSE_COMPACT) {
15218 ctxt->options |= XML_PARSE_COMPACT;
15219 options -= XML_PARSE_COMPACT;
15220 }
15221 if (options & XML_PARSE_OLD10) {
15222 ctxt->options |= XML_PARSE_OLD10;
15223 options -= XML_PARSE_OLD10;
15224 }
15225 if (options & XML_PARSE_NOBASEFIX) {
15226 ctxt->options |= XML_PARSE_NOBASEFIX;
15227 options -= XML_PARSE_NOBASEFIX;
15228 }
15229 if (options & XML_PARSE_HUGE) {
15230 ctxt->options |= XML_PARSE_HUGE;
15231 options -= XML_PARSE_HUGE;
15232 if (ctxt->dict != NULL)
15233 xmlDictSetLimit(ctxt->dict, 0);
15234 }
15235 if (options & XML_PARSE_OLDSAX) {
15236 ctxt->options |= XML_PARSE_OLDSAX;
15237 options -= XML_PARSE_OLDSAX;
15238 }
15239 if (options & XML_PARSE_IGNORE_ENC) {
15240 ctxt->options |= XML_PARSE_IGNORE_ENC;
15241 options -= XML_PARSE_IGNORE_ENC;
15242 }
15243 if (options & XML_PARSE_BIG_LINES) {
15244 ctxt->options |= XML_PARSE_BIG_LINES;
15245 options -= XML_PARSE_BIG_LINES;
15246 }
15247 ctxt->linenumbers = 1;
15248 return (options);
15249 }
15250
15251 /**
15252 * xmlCtxtUseOptions:
15253 * @ctxt: an XML parser context
15254 * @options: a combination of xmlParserOption
15255 *
15256 * Applies the options to the parser context
15257 *
15258 * Returns 0 in case of success, the set of unknown or unimplemented options
15259 * in case of error.
15260 */
15261 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15262 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15263 {
15264 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15265 }
15266
15267 /**
15268 * xmlDoRead:
15269 * @ctxt: an XML parser context
15270 * @URL: the base URL to use for the document
15271 * @encoding: the document encoding, or NULL
15272 * @options: a combination of xmlParserOption
15273 * @reuse: keep the context for reuse
15274 *
15275 * Common front-end for the xmlRead functions
15276 *
15277 * Returns the resulting document tree or NULL
15278 */
15279 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15280 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15281 int options, int reuse)
15282 {
15283 xmlDocPtr ret;
15284
15285 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15286 if (encoding != NULL) {
15287 xmlCharEncodingHandlerPtr hdlr;
15288
15289 hdlr = xmlFindCharEncodingHandler(encoding);
15290 if (hdlr != NULL)
15291 xmlSwitchToEncoding(ctxt, hdlr);
15292 }
15293 if ((URL != NULL) && (ctxt->input != NULL) &&
15294 (ctxt->input->filename == NULL))
15295 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15296 xmlParseDocument(ctxt);
15297 if ((ctxt->wellFormed) || ctxt->recovery)
15298 ret = ctxt->myDoc;
15299 else {
15300 ret = NULL;
15301 if (ctxt->myDoc != NULL) {
15302 xmlFreeDoc(ctxt->myDoc);
15303 }
15304 }
15305 ctxt->myDoc = NULL;
15306 if (!reuse) {
15307 xmlFreeParserCtxt(ctxt);
15308 }
15309
15310 return (ret);
15311 }
15312
15313 /**
15314 * xmlReadDoc:
15315 * @cur: a pointer to a zero terminated string
15316 * @URL: the base URL to use for the document
15317 * @encoding: the document encoding, or NULL
15318 * @options: a combination of xmlParserOption
15319 *
15320 * parse an XML in-memory document and build a tree.
15321 *
15322 * Returns the resulting document tree
15323 */
15324 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15325 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15326 {
15327 xmlParserCtxtPtr ctxt;
15328
15329 if (cur == NULL)
15330 return (NULL);
15331 xmlInitParser();
15332
15333 ctxt = xmlCreateDocParserCtxt(cur);
15334 if (ctxt == NULL)
15335 return (NULL);
15336 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15337 }
15338
15339 /**
15340 * xmlReadFile:
15341 * @filename: a file or URL
15342 * @encoding: the document encoding, or NULL
15343 * @options: a combination of xmlParserOption
15344 *
15345 * parse an XML file from the filesystem or the network.
15346 *
15347 * Returns the resulting document tree
15348 */
15349 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15350 xmlReadFile(const char *filename, const char *encoding, int options)
15351 {
15352 xmlParserCtxtPtr ctxt;
15353
15354 xmlInitParser();
15355 ctxt = xmlCreateURLParserCtxt(filename, options);
15356 if (ctxt == NULL)
15357 return (NULL);
15358 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15359 }
15360
15361 /**
15362 * xmlReadMemory:
15363 * @buffer: a pointer to a char array
15364 * @size: the size of the array
15365 * @URL: the base URL to use for the document
15366 * @encoding: the document encoding, or NULL
15367 * @options: a combination of xmlParserOption
15368 *
15369 * parse an XML in-memory document and build a tree.
15370 *
15371 * Returns the resulting document tree
15372 */
15373 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15374 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15375 {
15376 xmlParserCtxtPtr ctxt;
15377
15378 xmlInitParser();
15379 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15380 if (ctxt == NULL)
15381 return (NULL);
15382 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15383 }
15384
15385 /**
15386 * xmlReadFd:
15387 * @fd: an open file descriptor
15388 * @URL: the base URL to use for the document
15389 * @encoding: the document encoding, or NULL
15390 * @options: a combination of xmlParserOption
15391 *
15392 * parse an XML from a file descriptor and build a tree.
15393 * NOTE that the file descriptor will not be closed when the
15394 * reader is closed or reset.
15395 *
15396 * Returns the resulting document tree
15397 */
15398 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15399 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15400 {
15401 xmlParserCtxtPtr ctxt;
15402 xmlParserInputBufferPtr input;
15403 xmlParserInputPtr stream;
15404
15405 if (fd < 0)
15406 return (NULL);
15407 xmlInitParser();
15408
15409 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15410 if (input == NULL)
15411 return (NULL);
15412 input->closecallback = NULL;
15413 ctxt = xmlNewParserCtxt();
15414 if (ctxt == NULL) {
15415 xmlFreeParserInputBuffer(input);
15416 return (NULL);
15417 }
15418 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15419 if (stream == NULL) {
15420 xmlFreeParserInputBuffer(input);
15421 xmlFreeParserCtxt(ctxt);
15422 return (NULL);
15423 }
15424 inputPush(ctxt, stream);
15425 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15426 }
15427
15428 /**
15429 * xmlReadIO:
15430 * @ioread: an I/O read function
15431 * @ioclose: an I/O close function
15432 * @ioctx: an I/O handler
15433 * @URL: the base URL to use for the document
15434 * @encoding: the document encoding, or NULL
15435 * @options: a combination of xmlParserOption
15436 *
15437 * parse an XML document from I/O functions and source and build a tree.
15438 *
15439 * Returns the resulting document tree
15440 */
15441 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15442 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15443 void *ioctx, const char *URL, const char *encoding, int options)
15444 {
15445 xmlParserCtxtPtr ctxt;
15446 xmlParserInputBufferPtr input;
15447 xmlParserInputPtr stream;
15448
15449 if (ioread == NULL)
15450 return (NULL);
15451 xmlInitParser();
15452
15453 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15454 XML_CHAR_ENCODING_NONE);
15455 if (input == NULL) {
15456 if (ioclose != NULL)
15457 ioclose(ioctx);
15458 return (NULL);
15459 }
15460 ctxt = xmlNewParserCtxt();
15461 if (ctxt == NULL) {
15462 xmlFreeParserInputBuffer(input);
15463 return (NULL);
15464 }
15465 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15466 if (stream == NULL) {
15467 xmlFreeParserInputBuffer(input);
15468 xmlFreeParserCtxt(ctxt);
15469 return (NULL);
15470 }
15471 inputPush(ctxt, stream);
15472 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15473 }
15474
15475 /**
15476 * xmlCtxtReadDoc:
15477 * @ctxt: an XML parser context
15478 * @cur: a pointer to a zero terminated string
15479 * @URL: the base URL to use for the document
15480 * @encoding: the document encoding, or NULL
15481 * @options: a combination of xmlParserOption
15482 *
15483 * parse an XML in-memory document and build a tree.
15484 * This reuses the existing @ctxt parser context
15485 *
15486 * Returns the resulting document tree
15487 */
15488 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15489 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15490 const char *URL, const char *encoding, int options)
15491 {
15492 xmlParserInputPtr stream;
15493
15494 if (cur == NULL)
15495 return (NULL);
15496 if (ctxt == NULL)
15497 return (NULL);
15498 xmlInitParser();
15499
15500 xmlCtxtReset(ctxt);
15501
15502 stream = xmlNewStringInputStream(ctxt, cur);
15503 if (stream == NULL) {
15504 return (NULL);
15505 }
15506 inputPush(ctxt, stream);
15507 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15508 }
15509
15510 /**
15511 * xmlCtxtReadFile:
15512 * @ctxt: an XML parser context
15513 * @filename: a file or URL
15514 * @encoding: the document encoding, or NULL
15515 * @options: a combination of xmlParserOption
15516 *
15517 * parse an XML file from the filesystem or the network.
15518 * This reuses the existing @ctxt parser context
15519 *
15520 * Returns the resulting document tree
15521 */
15522 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15523 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15524 const char *encoding, int options)
15525 {
15526 xmlParserInputPtr stream;
15527
15528 if (filename == NULL)
15529 return (NULL);
15530 if (ctxt == NULL)
15531 return (NULL);
15532 xmlInitParser();
15533
15534 xmlCtxtReset(ctxt);
15535
15536 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15537 if (stream == NULL) {
15538 return (NULL);
15539 }
15540 inputPush(ctxt, stream);
15541 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15542 }
15543
15544 /**
15545 * xmlCtxtReadMemory:
15546 * @ctxt: an XML parser context
15547 * @buffer: a pointer to a char array
15548 * @size: the size of the array
15549 * @URL: the base URL to use for the document
15550 * @encoding: the document encoding, or NULL
15551 * @options: a combination of xmlParserOption
15552 *
15553 * parse an XML in-memory document and build a tree.
15554 * This reuses the existing @ctxt parser context
15555 *
15556 * Returns the resulting document tree
15557 */
15558 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15559 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15560 const char *URL, const char *encoding, int options)
15561 {
15562 xmlParserInputBufferPtr input;
15563 xmlParserInputPtr stream;
15564
15565 if (ctxt == NULL)
15566 return (NULL);
15567 if (buffer == NULL)
15568 return (NULL);
15569 xmlInitParser();
15570
15571 xmlCtxtReset(ctxt);
15572
15573 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15574 if (input == NULL) {
15575 return(NULL);
15576 }
15577
15578 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15579 if (stream == NULL) {
15580 xmlFreeParserInputBuffer(input);
15581 return(NULL);
15582 }
15583
15584 inputPush(ctxt, stream);
15585 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15586 }
15587
15588 /**
15589 * xmlCtxtReadFd:
15590 * @ctxt: an XML parser context
15591 * @fd: an open file descriptor
15592 * @URL: the base URL to use for the document
15593 * @encoding: the document encoding, or NULL
15594 * @options: a combination of xmlParserOption
15595 *
15596 * parse an XML from a file descriptor and build a tree.
15597 * This reuses the existing @ctxt parser context
15598 * NOTE that the file descriptor will not be closed when the
15599 * reader is closed or reset.
15600 *
15601 * Returns the resulting document tree
15602 */
15603 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15604 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15605 const char *URL, const char *encoding, int options)
15606 {
15607 xmlParserInputBufferPtr input;
15608 xmlParserInputPtr stream;
15609
15610 if (fd < 0)
15611 return (NULL);
15612 if (ctxt == NULL)
15613 return (NULL);
15614 xmlInitParser();
15615
15616 xmlCtxtReset(ctxt);
15617
15618
15619 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15620 if (input == NULL)
15621 return (NULL);
15622 input->closecallback = NULL;
15623 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15624 if (stream == NULL) {
15625 xmlFreeParserInputBuffer(input);
15626 return (NULL);
15627 }
15628 inputPush(ctxt, stream);
15629 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15630 }
15631
15632 /**
15633 * xmlCtxtReadIO:
15634 * @ctxt: an XML parser context
15635 * @ioread: an I/O read function
15636 * @ioclose: an I/O close function
15637 * @ioctx: an I/O handler
15638 * @URL: the base URL to use for the document
15639 * @encoding: the document encoding, or NULL
15640 * @options: a combination of xmlParserOption
15641 *
15642 * parse an XML document from I/O functions and source and build a tree.
15643 * This reuses the existing @ctxt parser context
15644 *
15645 * Returns the resulting document tree
15646 */
15647 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15648 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15649 xmlInputCloseCallback ioclose, void *ioctx,
15650 const char *URL,
15651 const char *encoding, int options)
15652 {
15653 xmlParserInputBufferPtr input;
15654 xmlParserInputPtr stream;
15655
15656 if (ioread == NULL)
15657 return (NULL);
15658 if (ctxt == NULL)
15659 return (NULL);
15660 xmlInitParser();
15661
15662 xmlCtxtReset(ctxt);
15663
15664 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15665 XML_CHAR_ENCODING_NONE);
15666 if (input == NULL) {
15667 if (ioclose != NULL)
15668 ioclose(ioctx);
15669 return (NULL);
15670 }
15671 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15672 if (stream == NULL) {
15673 xmlFreeParserInputBuffer(input);
15674 return (NULL);
15675 }
15676 inputPush(ctxt, stream);
15677 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15678 }
15679
15680 #define bottom_parser
15681 #include "elfgcchack.h"
15682