1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 #define IN_LIBXML
34 #include "libxml.h"
35 
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41 
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <string.h>
45 #include <stdarg.h>
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
60 #endif
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
64 #endif
65 #ifdef HAVE_CTYPE_H
66 #include <ctype.h>
67 #endif
68 #ifdef HAVE_STDLIB_H
69 #include <stdlib.h>
70 #endif
71 #ifdef HAVE_SYS_STAT_H
72 #include <sys/stat.h>
73 #endif
74 #ifdef HAVE_FCNTL_H
75 #include <fcntl.h>
76 #endif
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 #ifdef HAVE_ZLIB_H
81 #include <zlib.h>
82 #endif
83 #ifdef HAVE_LZMA_H
84 #include <lzma.h>
85 #endif
86 
87 #include "buf.h"
88 #include "enc.h"
89 
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92 
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
96 
97 /************************************************************************
98  *									*
99  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
100  *									*
101  ************************************************************************/
102 
103 #define XML_PARSER_BIG_ENTITY 1000
104 #define XML_PARSER_LOT_ENTITY 5000
105 
106 /*
107  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108  *    replacement over the size in byte of the input indicates that you have
109  *    and eponential behaviour. A value of 10 correspond to at least 3 entity
110  *    replacement per byte of input.
111  */
112 #define XML_PARSER_NON_LINEAR 10
113 
114 /*
115  * xmlParserEntityCheck
116  *
117  * Function to check non-linear entity expansion behaviour
118  * This is here to detect and stop exponential linear entity expansion
119  * This is not a limitation of the parser but a safety
120  * boundary feature. It can be disabled with the XML_PARSE_HUGE
121  * parser option.
122  */
123 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)124 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125                      xmlEntityPtr ent, size_t replacement)
126 {
127     size_t consumed = 0;
128 
129     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130         return (0);
131     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132         return (1);
133 
134     /*
135      * This may look absurd but is needed to detect
136      * entities problems
137      */
138     if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
139 	(ent->content != NULL) && (ent->checked == 0)) {
140 	unsigned long oldnbent = ctxt->nbentities;
141 	xmlChar *rep;
142 
143 	ent->checked = 1;
144 
145 	rep = xmlStringDecodeEntities(ctxt, ent->content,
146 				  XML_SUBSTITUTE_REF, 0, 0, 0);
147 
148 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
149 	if (rep != NULL) {
150 	    if (xmlStrchr(rep, '<'))
151 		ent->checked |= 1;
152 	    xmlFree(rep);
153 	    rep = NULL;
154 	}
155     }
156     if (replacement != 0) {
157 	if (replacement < XML_MAX_TEXT_LENGTH)
158 	    return(0);
159 
160         /*
161 	 * If the volume of entity copy reaches 10 times the
162 	 * amount of parsed data and over the large text threshold
163 	 * then that's very likely to be an abuse.
164 	 */
165         if (ctxt->input != NULL) {
166 	    consumed = ctxt->input->consumed +
167 	               (ctxt->input->cur - ctxt->input->base);
168 	}
169         consumed += ctxt->sizeentities;
170 
171         if (replacement < XML_PARSER_NON_LINEAR * consumed)
172 	    return(0);
173     } else if (size != 0) {
174         /*
175          * Do the check based on the replacement size of the entity
176          */
177         if (size < XML_PARSER_BIG_ENTITY)
178 	    return(0);
179 
180         /*
181          * A limit on the amount of text data reasonably used
182          */
183         if (ctxt->input != NULL) {
184             consumed = ctxt->input->consumed +
185                 (ctxt->input->cur - ctxt->input->base);
186         }
187         consumed += ctxt->sizeentities;
188 
189         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
190 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
191             return (0);
192     } else if (ent != NULL) {
193         /*
194          * use the number of parsed entities in the replacement
195          */
196         size = ent->checked / 2;
197 
198         /*
199          * The amount of data parsed counting entities size only once
200          */
201         if (ctxt->input != NULL) {
202             consumed = ctxt->input->consumed +
203                 (ctxt->input->cur - ctxt->input->base);
204         }
205         consumed += ctxt->sizeentities;
206 
207         /*
208          * Check the density of entities for the amount of data
209 	 * knowing an entity reference will take at least 3 bytes
210          */
211         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
212             return (0);
213     } else {
214         /*
215          * strange we got no data for checking
216          */
217 	if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
218 	     (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
219 	    (ctxt->nbentities <= 10000))
220 	    return (0);
221     }
222     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
223     return (1);
224 }
225 
226 /**
227  * xmlParserMaxDepth:
228  *
229  * arbitrary depth limit for the XML documents that we allow to
230  * process. This is not a limitation of the parser but a safety
231  * boundary feature. It can be disabled with the XML_PARSE_HUGE
232  * parser option.
233  */
234 unsigned int xmlParserMaxDepth = 256;
235 
236 
237 
238 #define SAX2 1
239 #define XML_PARSER_BIG_BUFFER_SIZE 300
240 #define XML_PARSER_BUFFER_SIZE 100
241 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
242 
243 /**
244  * XML_PARSER_CHUNK_SIZE
245  *
246  * When calling GROW that's the minimal amount of data
247  * the parser expected to have received. It is not a hard
248  * limit but an optimization when reading strings like Names
249  * It is not strictly needed as long as inputs available characters
250  * are followed by 0, which should be provided by the I/O level
251  */
252 #define XML_PARSER_CHUNK_SIZE 100
253 
254 /*
255  * List of XML prefixed PI allowed by W3C specs
256  */
257 
258 static const char *xmlW3CPIs[] = {
259     "xml-stylesheet",
260     "xml-model",
261     NULL
262 };
263 
264 
265 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
266 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
267                                               const xmlChar **str);
268 
269 static xmlParserErrors
270 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
271 	              xmlSAXHandlerPtr sax,
272 		      void *user_data, int depth, const xmlChar *URL,
273 		      const xmlChar *ID, xmlNodePtr *list);
274 
275 static int
276 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
277                           const char *encoding);
278 #ifdef LIBXML_LEGACY_ENABLED
279 static void
280 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
281                       xmlNodePtr lastNode);
282 #endif /* LIBXML_LEGACY_ENABLED */
283 
284 static xmlParserErrors
285 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
286 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
287 
288 static int
289 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
290 
291 /************************************************************************
292  *									*
293  *		Some factorized error routines				*
294  *									*
295  ************************************************************************/
296 
297 /**
298  * xmlErrAttributeDup:
299  * @ctxt:  an XML parser context
300  * @prefix:  the attribute prefix
301  * @localname:  the attribute localname
302  *
303  * Handle a redefinition of attribute error
304  */
305 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)306 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
307                    const xmlChar * localname)
308 {
309     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
310         (ctxt->instate == XML_PARSER_EOF))
311 	return;
312     if (ctxt != NULL)
313 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
314 
315     if (prefix == NULL)
316         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
317                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
318                         (const char *) localname, NULL, NULL, 0, 0,
319                         "Attribute %s redefined\n", localname);
320     else
321         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
322                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
323                         (const char *) prefix, (const char *) localname,
324                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
325                         localname);
326     if (ctxt != NULL) {
327 	ctxt->wellFormed = 0;
328 	if (ctxt->recovery == 0)
329 	    ctxt->disableSAX = 1;
330     }
331 }
332 
333 /**
334  * xmlFatalErr:
335  * @ctxt:  an XML parser context
336  * @error:  the error number
337  * @extra:  extra information string
338  *
339  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
340  */
341 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)342 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
343 {
344     const char *errmsg;
345     char errstr[129] = "";
346 
347     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
348         (ctxt->instate == XML_PARSER_EOF))
349 	return;
350     switch (error) {
351         case XML_ERR_INVALID_HEX_CHARREF:
352             errmsg = "CharRef: invalid hexadecimal value";
353             break;
354         case XML_ERR_INVALID_DEC_CHARREF:
355             errmsg = "CharRef: invalid decimal value";
356             break;
357         case XML_ERR_INVALID_CHARREF:
358             errmsg = "CharRef: invalid value";
359             break;
360         case XML_ERR_INTERNAL_ERROR:
361             errmsg = "internal error";
362             break;
363         case XML_ERR_PEREF_AT_EOF:
364             errmsg = "PEReference at end of document";
365             break;
366         case XML_ERR_PEREF_IN_PROLOG:
367             errmsg = "PEReference in prolog";
368             break;
369         case XML_ERR_PEREF_IN_EPILOG:
370             errmsg = "PEReference in epilog";
371             break;
372         case XML_ERR_PEREF_NO_NAME:
373             errmsg = "PEReference: no name";
374             break;
375         case XML_ERR_PEREF_SEMICOL_MISSING:
376             errmsg = "PEReference: expecting ';'";
377             break;
378         case XML_ERR_ENTITY_LOOP:
379             errmsg = "Detected an entity reference loop";
380             break;
381         case XML_ERR_ENTITY_NOT_STARTED:
382             errmsg = "EntityValue: \" or ' expected";
383             break;
384         case XML_ERR_ENTITY_PE_INTERNAL:
385             errmsg = "PEReferences forbidden in internal subset";
386             break;
387         case XML_ERR_ENTITY_NOT_FINISHED:
388             errmsg = "EntityValue: \" or ' expected";
389             break;
390         case XML_ERR_ATTRIBUTE_NOT_STARTED:
391             errmsg = "AttValue: \" or ' expected";
392             break;
393         case XML_ERR_LT_IN_ATTRIBUTE:
394             errmsg = "Unescaped '<' not allowed in attributes values";
395             break;
396         case XML_ERR_LITERAL_NOT_STARTED:
397             errmsg = "SystemLiteral \" or ' expected";
398             break;
399         case XML_ERR_LITERAL_NOT_FINISHED:
400             errmsg = "Unfinished System or Public ID \" or ' expected";
401             break;
402         case XML_ERR_MISPLACED_CDATA_END:
403             errmsg = "Sequence ']]>' not allowed in content";
404             break;
405         case XML_ERR_URI_REQUIRED:
406             errmsg = "SYSTEM or PUBLIC, the URI is missing";
407             break;
408         case XML_ERR_PUBID_REQUIRED:
409             errmsg = "PUBLIC, the Public Identifier is missing";
410             break;
411         case XML_ERR_HYPHEN_IN_COMMENT:
412             errmsg = "Comment must not contain '--' (double-hyphen)";
413             break;
414         case XML_ERR_PI_NOT_STARTED:
415             errmsg = "xmlParsePI : no target name";
416             break;
417         case XML_ERR_RESERVED_XML_NAME:
418             errmsg = "Invalid PI name";
419             break;
420         case XML_ERR_NOTATION_NOT_STARTED:
421             errmsg = "NOTATION: Name expected here";
422             break;
423         case XML_ERR_NOTATION_NOT_FINISHED:
424             errmsg = "'>' required to close NOTATION declaration";
425             break;
426         case XML_ERR_VALUE_REQUIRED:
427             errmsg = "Entity value required";
428             break;
429         case XML_ERR_URI_FRAGMENT:
430             errmsg = "Fragment not allowed";
431             break;
432         case XML_ERR_ATTLIST_NOT_STARTED:
433             errmsg = "'(' required to start ATTLIST enumeration";
434             break;
435         case XML_ERR_NMTOKEN_REQUIRED:
436             errmsg = "NmToken expected in ATTLIST enumeration";
437             break;
438         case XML_ERR_ATTLIST_NOT_FINISHED:
439             errmsg = "')' required to finish ATTLIST enumeration";
440             break;
441         case XML_ERR_MIXED_NOT_STARTED:
442             errmsg = "MixedContentDecl : '|' or ')*' expected";
443             break;
444         case XML_ERR_PCDATA_REQUIRED:
445             errmsg = "MixedContentDecl : '#PCDATA' expected";
446             break;
447         case XML_ERR_ELEMCONTENT_NOT_STARTED:
448             errmsg = "ContentDecl : Name or '(' expected";
449             break;
450         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
451             errmsg = "ContentDecl : ',' '|' or ')' expected";
452             break;
453         case XML_ERR_PEREF_IN_INT_SUBSET:
454             errmsg =
455                 "PEReference: forbidden within markup decl in internal subset";
456             break;
457         case XML_ERR_GT_REQUIRED:
458             errmsg = "expected '>'";
459             break;
460         case XML_ERR_CONDSEC_INVALID:
461             errmsg = "XML conditional section '[' expected";
462             break;
463         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
464             errmsg = "Content error in the external subset";
465             break;
466         case XML_ERR_CONDSEC_INVALID_KEYWORD:
467             errmsg =
468                 "conditional section INCLUDE or IGNORE keyword expected";
469             break;
470         case XML_ERR_CONDSEC_NOT_FINISHED:
471             errmsg = "XML conditional section not closed";
472             break;
473         case XML_ERR_XMLDECL_NOT_STARTED:
474             errmsg = "Text declaration '<?xml' required";
475             break;
476         case XML_ERR_XMLDECL_NOT_FINISHED:
477             errmsg = "parsing XML declaration: '?>' expected";
478             break;
479         case XML_ERR_EXT_ENTITY_STANDALONE:
480             errmsg = "external parsed entities cannot be standalone";
481             break;
482         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
483             errmsg = "EntityRef: expecting ';'";
484             break;
485         case XML_ERR_DOCTYPE_NOT_FINISHED:
486             errmsg = "DOCTYPE improperly terminated";
487             break;
488         case XML_ERR_LTSLASH_REQUIRED:
489             errmsg = "EndTag: '</' not found";
490             break;
491         case XML_ERR_EQUAL_REQUIRED:
492             errmsg = "expected '='";
493             break;
494         case XML_ERR_STRING_NOT_CLOSED:
495             errmsg = "String not closed expecting \" or '";
496             break;
497         case XML_ERR_STRING_NOT_STARTED:
498             errmsg = "String not started expecting ' or \"";
499             break;
500         case XML_ERR_ENCODING_NAME:
501             errmsg = "Invalid XML encoding name";
502             break;
503         case XML_ERR_STANDALONE_VALUE:
504             errmsg = "standalone accepts only 'yes' or 'no'";
505             break;
506         case XML_ERR_DOCUMENT_EMPTY:
507             errmsg = "Document is empty";
508             break;
509         case XML_ERR_DOCUMENT_END:
510             errmsg = "Extra content at the end of the document";
511             break;
512         case XML_ERR_NOT_WELL_BALANCED:
513             errmsg = "chunk is not well balanced";
514             break;
515         case XML_ERR_EXTRA_CONTENT:
516             errmsg = "extra content at the end of well balanced chunk";
517             break;
518         case XML_ERR_VERSION_MISSING:
519             errmsg = "Malformed declaration expecting version";
520             break;
521         case XML_ERR_NAME_TOO_LONG:
522             errmsg = "Name too long use XML_PARSE_HUGE option";
523             break;
524 #if 0
525         case:
526             errmsg = "";
527             break;
528 #endif
529         default:
530             errmsg = "Unregistered error message";
531     }
532     if (info == NULL)
533         snprintf(errstr, 128, "%s\n", errmsg);
534     else
535         snprintf(errstr, 128, "%s: %%s\n", errmsg);
536     if (ctxt != NULL)
537 	ctxt->errNo = error;
538     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
539                     XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
540                     info);
541     if (ctxt != NULL) {
542 	ctxt->wellFormed = 0;
543 	if (ctxt->recovery == 0)
544 	    ctxt->disableSAX = 1;
545     }
546 }
547 
548 /**
549  * xmlFatalErrMsg:
550  * @ctxt:  an XML parser context
551  * @error:  the error number
552  * @msg:  the error message
553  *
554  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
555  */
556 static void
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)557 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
558                const char *msg)
559 {
560     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
561         (ctxt->instate == XML_PARSER_EOF))
562 	return;
563     if (ctxt != NULL)
564 	ctxt->errNo = error;
565     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
566                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
567     if (ctxt != NULL) {
568 	ctxt->wellFormed = 0;
569 	if (ctxt->recovery == 0)
570 	    ctxt->disableSAX = 1;
571     }
572 }
573 
574 /**
575  * xmlWarningMsg:
576  * @ctxt:  an XML parser context
577  * @error:  the error number
578  * @msg:  the error message
579  * @str1:  extra data
580  * @str2:  extra data
581  *
582  * Handle a warning.
583  */
584 static void
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)585 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
586               const char *msg, const xmlChar *str1, const xmlChar *str2)
587 {
588     xmlStructuredErrorFunc schannel = NULL;
589 
590     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
591         (ctxt->instate == XML_PARSER_EOF))
592 	return;
593     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
594         (ctxt->sax->initialized == XML_SAX2_MAGIC))
595         schannel = ctxt->sax->serror;
596     if (ctxt != NULL) {
597         __xmlRaiseError(schannel,
598                     (ctxt->sax) ? ctxt->sax->warning : NULL,
599                     ctxt->userData,
600                     ctxt, NULL, XML_FROM_PARSER, error,
601                     XML_ERR_WARNING, NULL, 0,
602 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
603 		    msg, (const char *) str1, (const char *) str2);
604     } else {
605         __xmlRaiseError(schannel, NULL, NULL,
606                     ctxt, NULL, XML_FROM_PARSER, error,
607                     XML_ERR_WARNING, NULL, 0,
608 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
609 		    msg, (const char *) str1, (const char *) str2);
610     }
611 }
612 
613 /**
614  * xmlValidityError:
615  * @ctxt:  an XML parser context
616  * @error:  the error number
617  * @msg:  the error message
618  * @str1:  extra data
619  *
620  * Handle a validity error.
621  */
622 static void
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)623 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
624               const char *msg, const xmlChar *str1, const xmlChar *str2)
625 {
626     xmlStructuredErrorFunc schannel = NULL;
627 
628     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629         (ctxt->instate == XML_PARSER_EOF))
630 	return;
631     if (ctxt != NULL) {
632 	ctxt->errNo = error;
633 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
634 	    schannel = ctxt->sax->serror;
635     }
636     if (ctxt != NULL) {
637         __xmlRaiseError(schannel,
638                     ctxt->vctxt.error, ctxt->vctxt.userData,
639                     ctxt, NULL, XML_FROM_DTD, error,
640                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
641 		    (const char *) str2, NULL, 0, 0,
642 		    msg, (const char *) str1, (const char *) str2);
643 	ctxt->valid = 0;
644     } else {
645         __xmlRaiseError(schannel, NULL, NULL,
646                     ctxt, NULL, XML_FROM_DTD, error,
647                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
648 		    (const char *) str2, NULL, 0, 0,
649 		    msg, (const char *) str1, (const char *) str2);
650     }
651 }
652 
653 /**
654  * xmlFatalErrMsgInt:
655  * @ctxt:  an XML parser context
656  * @error:  the error number
657  * @msg:  the error message
658  * @val:  an integer value
659  *
660  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
661  */
662 static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)663 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
664                   const char *msg, int val)
665 {
666     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
667         (ctxt->instate == XML_PARSER_EOF))
668 	return;
669     if (ctxt != NULL)
670 	ctxt->errNo = error;
671     __xmlRaiseError(NULL, NULL, NULL,
672                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
673                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
674     if (ctxt != NULL) {
675 	ctxt->wellFormed = 0;
676 	if (ctxt->recovery == 0)
677 	    ctxt->disableSAX = 1;
678     }
679 }
680 
681 /**
682  * xmlFatalErrMsgStrIntStr:
683  * @ctxt:  an XML parser context
684  * @error:  the error number
685  * @msg:  the error message
686  * @str1:  an string info
687  * @val:  an integer value
688  * @str2:  an string info
689  *
690  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691  */
692 static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)693 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694                   const char *msg, const xmlChar *str1, int val,
695 		  const xmlChar *str2)
696 {
697     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
698         (ctxt->instate == XML_PARSER_EOF))
699 	return;
700     if (ctxt != NULL)
701 	ctxt->errNo = error;
702     __xmlRaiseError(NULL, NULL, NULL,
703                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
704                     NULL, 0, (const char *) str1, (const char *) str2,
705 		    NULL, val, 0, msg, str1, val, str2);
706     if (ctxt != NULL) {
707 	ctxt->wellFormed = 0;
708 	if (ctxt->recovery == 0)
709 	    ctxt->disableSAX = 1;
710     }
711 }
712 
713 /**
714  * xmlFatalErrMsgStr:
715  * @ctxt:  an XML parser context
716  * @error:  the error number
717  * @msg:  the error message
718  * @val:  a string value
719  *
720  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
721  */
722 static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)723 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
724                   const char *msg, const xmlChar * val)
725 {
726     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
727         (ctxt->instate == XML_PARSER_EOF))
728 	return;
729     if (ctxt != NULL)
730 	ctxt->errNo = error;
731     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
732                     XML_FROM_PARSER, error, XML_ERR_FATAL,
733                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
734                     val);
735     if (ctxt != NULL) {
736 	ctxt->wellFormed = 0;
737 	if (ctxt->recovery == 0)
738 	    ctxt->disableSAX = 1;
739     }
740 }
741 
742 /**
743  * xmlErrMsgStr:
744  * @ctxt:  an XML parser context
745  * @error:  the error number
746  * @msg:  the error message
747  * @val:  a string value
748  *
749  * Handle a non fatal parser error
750  */
751 static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)752 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
753                   const char *msg, const xmlChar * val)
754 {
755     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
756         (ctxt->instate == XML_PARSER_EOF))
757 	return;
758     if (ctxt != NULL)
759 	ctxt->errNo = error;
760     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
761                     XML_FROM_PARSER, error, XML_ERR_ERROR,
762                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
763                     val);
764 }
765 
766 /**
767  * xmlNsErr:
768  * @ctxt:  an XML parser context
769  * @error:  the error number
770  * @msg:  the message
771  * @info1:  extra information string
772  * @info2:  extra information string
773  *
774  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
775  */
776 static void
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)777 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
778          const char *msg,
779          const xmlChar * info1, const xmlChar * info2,
780          const xmlChar * info3)
781 {
782     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
783         (ctxt->instate == XML_PARSER_EOF))
784 	return;
785     if (ctxt != NULL)
786 	ctxt->errNo = error;
787     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
788                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
789                     (const char *) info2, (const char *) info3, 0, 0, msg,
790                     info1, info2, info3);
791     if (ctxt != NULL)
792 	ctxt->nsWellFormed = 0;
793 }
794 
795 /**
796  * xmlNsWarn
797  * @ctxt:  an XML parser context
798  * @error:  the error number
799  * @msg:  the message
800  * @info1:  extra information string
801  * @info2:  extra information string
802  *
803  * Handle a namespace warning error
804  */
805 static void
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)806 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
807          const char *msg,
808          const xmlChar * info1, const xmlChar * info2,
809          const xmlChar * info3)
810 {
811     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
812         (ctxt->instate == XML_PARSER_EOF))
813 	return;
814     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
815                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
816                     (const char *) info2, (const char *) info3, 0, 0, msg,
817                     info1, info2, info3);
818 }
819 
820 /************************************************************************
821  *									*
822  *		Library wide options					*
823  *									*
824  ************************************************************************/
825 
826 /**
827   * xmlHasFeature:
828   * @feature: the feature to be examined
829   *
830   * Examines if the library has been compiled with a given feature.
831   *
832   * Returns a non-zero value if the feature exist, otherwise zero.
833   * Returns zero (0) if the feature does not exist or an unknown
834   * unknown feature is requested, non-zero otherwise.
835   */
836 int
xmlHasFeature(xmlFeature feature)837 xmlHasFeature(xmlFeature feature)
838 {
839     switch (feature) {
840 	case XML_WITH_THREAD:
841 #ifdef LIBXML_THREAD_ENABLED
842 	    return(1);
843 #else
844 	    return(0);
845 #endif
846         case XML_WITH_TREE:
847 #ifdef LIBXML_TREE_ENABLED
848             return(1);
849 #else
850             return(0);
851 #endif
852         case XML_WITH_OUTPUT:
853 #ifdef LIBXML_OUTPUT_ENABLED
854             return(1);
855 #else
856             return(0);
857 #endif
858         case XML_WITH_PUSH:
859 #ifdef LIBXML_PUSH_ENABLED
860             return(1);
861 #else
862             return(0);
863 #endif
864         case XML_WITH_READER:
865 #ifdef LIBXML_READER_ENABLED
866             return(1);
867 #else
868             return(0);
869 #endif
870         case XML_WITH_PATTERN:
871 #ifdef LIBXML_PATTERN_ENABLED
872             return(1);
873 #else
874             return(0);
875 #endif
876         case XML_WITH_WRITER:
877 #ifdef LIBXML_WRITER_ENABLED
878             return(1);
879 #else
880             return(0);
881 #endif
882         case XML_WITH_SAX1:
883 #ifdef LIBXML_SAX1_ENABLED
884             return(1);
885 #else
886             return(0);
887 #endif
888         case XML_WITH_FTP:
889 #ifdef LIBXML_FTP_ENABLED
890             return(1);
891 #else
892             return(0);
893 #endif
894         case XML_WITH_HTTP:
895 #ifdef LIBXML_HTTP_ENABLED
896             return(1);
897 #else
898             return(0);
899 #endif
900         case XML_WITH_VALID:
901 #ifdef LIBXML_VALID_ENABLED
902             return(1);
903 #else
904             return(0);
905 #endif
906         case XML_WITH_HTML:
907 #ifdef LIBXML_HTML_ENABLED
908             return(1);
909 #else
910             return(0);
911 #endif
912         case XML_WITH_LEGACY:
913 #ifdef LIBXML_LEGACY_ENABLED
914             return(1);
915 #else
916             return(0);
917 #endif
918         case XML_WITH_C14N:
919 #ifdef LIBXML_C14N_ENABLED
920             return(1);
921 #else
922             return(0);
923 #endif
924         case XML_WITH_CATALOG:
925 #ifdef LIBXML_CATALOG_ENABLED
926             return(1);
927 #else
928             return(0);
929 #endif
930         case XML_WITH_XPATH:
931 #ifdef LIBXML_XPATH_ENABLED
932             return(1);
933 #else
934             return(0);
935 #endif
936         case XML_WITH_XPTR:
937 #ifdef LIBXML_XPTR_ENABLED
938             return(1);
939 #else
940             return(0);
941 #endif
942         case XML_WITH_XINCLUDE:
943 #ifdef LIBXML_XINCLUDE_ENABLED
944             return(1);
945 #else
946             return(0);
947 #endif
948         case XML_WITH_ICONV:
949 #ifdef LIBXML_ICONV_ENABLED
950             return(1);
951 #else
952             return(0);
953 #endif
954         case XML_WITH_ISO8859X:
955 #ifdef LIBXML_ISO8859X_ENABLED
956             return(1);
957 #else
958             return(0);
959 #endif
960         case XML_WITH_UNICODE:
961 #ifdef LIBXML_UNICODE_ENABLED
962             return(1);
963 #else
964             return(0);
965 #endif
966         case XML_WITH_REGEXP:
967 #ifdef LIBXML_REGEXP_ENABLED
968             return(1);
969 #else
970             return(0);
971 #endif
972         case XML_WITH_AUTOMATA:
973 #ifdef LIBXML_AUTOMATA_ENABLED
974             return(1);
975 #else
976             return(0);
977 #endif
978         case XML_WITH_EXPR:
979 #ifdef LIBXML_EXPR_ENABLED
980             return(1);
981 #else
982             return(0);
983 #endif
984         case XML_WITH_SCHEMAS:
985 #ifdef LIBXML_SCHEMAS_ENABLED
986             return(1);
987 #else
988             return(0);
989 #endif
990         case XML_WITH_SCHEMATRON:
991 #ifdef LIBXML_SCHEMATRON_ENABLED
992             return(1);
993 #else
994             return(0);
995 #endif
996         case XML_WITH_MODULES:
997 #ifdef LIBXML_MODULES_ENABLED
998             return(1);
999 #else
1000             return(0);
1001 #endif
1002         case XML_WITH_DEBUG:
1003 #ifdef LIBXML_DEBUG_ENABLED
1004             return(1);
1005 #else
1006             return(0);
1007 #endif
1008         case XML_WITH_DEBUG_MEM:
1009 #ifdef DEBUG_MEMORY_LOCATION
1010             return(1);
1011 #else
1012             return(0);
1013 #endif
1014         case XML_WITH_DEBUG_RUN:
1015 #ifdef LIBXML_DEBUG_RUNTIME
1016             return(1);
1017 #else
1018             return(0);
1019 #endif
1020         case XML_WITH_ZLIB:
1021 #ifdef LIBXML_ZLIB_ENABLED
1022             return(1);
1023 #else
1024             return(0);
1025 #endif
1026         case XML_WITH_LZMA:
1027 #ifdef LIBXML_LZMA_ENABLED
1028             return(1);
1029 #else
1030             return(0);
1031 #endif
1032         case XML_WITH_ICU:
1033 #ifdef LIBXML_ICU_ENABLED
1034             return(1);
1035 #else
1036             return(0);
1037 #endif
1038         default:
1039 	    break;
1040      }
1041      return(0);
1042 }
1043 
1044 /************************************************************************
1045  *									*
1046  *		SAX2 defaulted attributes handling			*
1047  *									*
1048  ************************************************************************/
1049 
1050 /**
1051  * xmlDetectSAX2:
1052  * @ctxt:  an XML parser context
1053  *
1054  * Do the SAX2 detection and specific intialization
1055  */
1056 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1057 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1058     if (ctxt == NULL) return;
1059 #ifdef LIBXML_SAX1_ENABLED
1060     if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1061         ((ctxt->sax->startElementNs != NULL) ||
1062          (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1063 #else
1064     ctxt->sax2 = 1;
1065 #endif /* LIBXML_SAX1_ENABLED */
1066 
1067     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1068     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1069     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1070     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1071 		(ctxt->str_xml_ns == NULL)) {
1072         xmlErrMemory(ctxt, NULL);
1073     }
1074 }
1075 
1076 typedef struct _xmlDefAttrs xmlDefAttrs;
1077 typedef xmlDefAttrs *xmlDefAttrsPtr;
1078 struct _xmlDefAttrs {
1079     int nbAttrs;	/* number of defaulted attributes on that element */
1080     int maxAttrs;       /* the size of the array */
1081     const xmlChar *values[5]; /* array of localname/prefix/values/external */
1082 };
1083 
1084 /**
1085  * xmlAttrNormalizeSpace:
1086  * @src: the source string
1087  * @dst: the target string
1088  *
1089  * Normalize the space in non CDATA attribute values:
1090  * If the attribute type is not CDATA, then the XML processor MUST further
1091  * process the normalized attribute value by discarding any leading and
1092  * trailing space (#x20) characters, and by replacing sequences of space
1093  * (#x20) characters by a single space (#x20) character.
1094  * Note that the size of dst need to be at least src, and if one doesn't need
1095  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1096  * passing src as dst is just fine.
1097  *
1098  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1099  *         is needed.
1100  */
1101 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1102 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1103 {
1104     if ((src == NULL) || (dst == NULL))
1105         return(NULL);
1106 
1107     while (*src == 0x20) src++;
1108     while (*src != 0) {
1109 	if (*src == 0x20) {
1110 	    while (*src == 0x20) src++;
1111 	    if (*src != 0)
1112 		*dst++ = 0x20;
1113 	} else {
1114 	    *dst++ = *src++;
1115 	}
1116     }
1117     *dst = 0;
1118     if (dst == src)
1119        return(NULL);
1120     return(dst);
1121 }
1122 
1123 /**
1124  * xmlAttrNormalizeSpace2:
1125  * @src: the source string
1126  *
1127  * Normalize the space in non CDATA attribute values, a slightly more complex
1128  * front end to avoid allocation problems when running on attribute values
1129  * coming from the input.
1130  *
1131  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1132  *         is needed.
1133  */
1134 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1135 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1136 {
1137     int i;
1138     int remove_head = 0;
1139     int need_realloc = 0;
1140     const xmlChar *cur;
1141 
1142     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1143         return(NULL);
1144     i = *len;
1145     if (i <= 0)
1146         return(NULL);
1147 
1148     cur = src;
1149     while (*cur == 0x20) {
1150         cur++;
1151 	remove_head++;
1152     }
1153     while (*cur != 0) {
1154 	if (*cur == 0x20) {
1155 	    cur++;
1156 	    if ((*cur == 0x20) || (*cur == 0)) {
1157 	        need_realloc = 1;
1158 		break;
1159 	    }
1160 	} else
1161 	    cur++;
1162     }
1163     if (need_realloc) {
1164         xmlChar *ret;
1165 
1166 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1167 	if (ret == NULL) {
1168 	    xmlErrMemory(ctxt, NULL);
1169 	    return(NULL);
1170 	}
1171 	xmlAttrNormalizeSpace(ret, ret);
1172 	*len = (int) strlen((const char *)ret);
1173         return(ret);
1174     } else if (remove_head) {
1175         *len -= remove_head;
1176         memmove(src, src + remove_head, 1 + *len);
1177 	return(src);
1178     }
1179     return(NULL);
1180 }
1181 
1182 /**
1183  * xmlAddDefAttrs:
1184  * @ctxt:  an XML parser context
1185  * @fullname:  the element fullname
1186  * @fullattr:  the attribute fullname
1187  * @value:  the attribute value
1188  *
1189  * Add a defaulted attribute for an element
1190  */
1191 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1192 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1193                const xmlChar *fullname,
1194                const xmlChar *fullattr,
1195                const xmlChar *value) {
1196     xmlDefAttrsPtr defaults;
1197     int len;
1198     const xmlChar *name;
1199     const xmlChar *prefix;
1200 
1201     /*
1202      * Allows to detect attribute redefinitions
1203      */
1204     if (ctxt->attsSpecial != NULL) {
1205         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1206 	    return;
1207     }
1208 
1209     if (ctxt->attsDefault == NULL) {
1210         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1211 	if (ctxt->attsDefault == NULL)
1212 	    goto mem_error;
1213     }
1214 
1215     /*
1216      * split the element name into prefix:localname , the string found
1217      * are within the DTD and then not associated to namespace names.
1218      */
1219     name = xmlSplitQName3(fullname, &len);
1220     if (name == NULL) {
1221         name = xmlDictLookup(ctxt->dict, fullname, -1);
1222 	prefix = NULL;
1223     } else {
1224         name = xmlDictLookup(ctxt->dict, name, -1);
1225 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1226     }
1227 
1228     /*
1229      * make sure there is some storage
1230      */
1231     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1232     if (defaults == NULL) {
1233         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1234 	                   (4 * 5) * sizeof(const xmlChar *));
1235 	if (defaults == NULL)
1236 	    goto mem_error;
1237 	defaults->nbAttrs = 0;
1238 	defaults->maxAttrs = 4;
1239 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1240 	                        defaults, NULL) < 0) {
1241 	    xmlFree(defaults);
1242 	    goto mem_error;
1243 	}
1244     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1245         xmlDefAttrsPtr temp;
1246 
1247         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1248 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1249 	if (temp == NULL)
1250 	    goto mem_error;
1251 	defaults = temp;
1252 	defaults->maxAttrs *= 2;
1253 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 	                        defaults, NULL) < 0) {
1255 	    xmlFree(defaults);
1256 	    goto mem_error;
1257 	}
1258     }
1259 
1260     /*
1261      * Split the element name into prefix:localname , the string found
1262      * are within the DTD and hen not associated to namespace names.
1263      */
1264     name = xmlSplitQName3(fullattr, &len);
1265     if (name == NULL) {
1266         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1267 	prefix = NULL;
1268     } else {
1269         name = xmlDictLookup(ctxt->dict, name, -1);
1270 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1271     }
1272 
1273     defaults->values[5 * defaults->nbAttrs] = name;
1274     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1275     /* intern the string and precompute the end */
1276     len = xmlStrlen(value);
1277     value = xmlDictLookup(ctxt->dict, value, len);
1278     defaults->values[5 * defaults->nbAttrs + 2] = value;
1279     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1280     if (ctxt->external)
1281         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1282     else
1283         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1284     defaults->nbAttrs++;
1285 
1286     return;
1287 
1288 mem_error:
1289     xmlErrMemory(ctxt, NULL);
1290     return;
1291 }
1292 
1293 /**
1294  * xmlAddSpecialAttr:
1295  * @ctxt:  an XML parser context
1296  * @fullname:  the element fullname
1297  * @fullattr:  the attribute fullname
1298  * @type:  the attribute type
1299  *
1300  * Register this attribute type
1301  */
1302 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1303 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1304 		  const xmlChar *fullname,
1305 		  const xmlChar *fullattr,
1306 		  int type)
1307 {
1308     if (ctxt->attsSpecial == NULL) {
1309         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1310 	if (ctxt->attsSpecial == NULL)
1311 	    goto mem_error;
1312     }
1313 
1314     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1315         return;
1316 
1317     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1318                      (void *) (long) type);
1319     return;
1320 
1321 mem_error:
1322     xmlErrMemory(ctxt, NULL);
1323     return;
1324 }
1325 
1326 /**
1327  * xmlCleanSpecialAttrCallback:
1328  *
1329  * Removes CDATA attributes from the special attribute table
1330  */
1331 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1332 xmlCleanSpecialAttrCallback(void *payload, void *data,
1333                             const xmlChar *fullname, const xmlChar *fullattr,
1334                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1335     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1336 
1337     if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1338         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1339     }
1340 }
1341 
1342 /**
1343  * xmlCleanSpecialAttr:
1344  * @ctxt:  an XML parser context
1345  *
1346  * Trim the list of attributes defined to remove all those of type
1347  * CDATA as they are not special. This call should be done when finishing
1348  * to parse the DTD and before starting to parse the document root.
1349  */
1350 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1351 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1352 {
1353     if (ctxt->attsSpecial == NULL)
1354         return;
1355 
1356     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1357 
1358     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1359         xmlHashFree(ctxt->attsSpecial, NULL);
1360         ctxt->attsSpecial = NULL;
1361     }
1362     return;
1363 }
1364 
1365 /**
1366  * xmlCheckLanguageID:
1367  * @lang:  pointer to the string value
1368  *
1369  * Checks that the value conforms to the LanguageID production:
1370  *
1371  * NOTE: this is somewhat deprecated, those productions were removed from
1372  *       the XML Second edition.
1373  *
1374  * [33] LanguageID ::= Langcode ('-' Subcode)*
1375  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1376  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1377  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1378  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1379  * [38] Subcode ::= ([a-z] | [A-Z])+
1380  *
1381  * The current REC reference the sucessors of RFC 1766, currently 5646
1382  *
1383  * http://www.rfc-editor.org/rfc/rfc5646.txt
1384  * langtag       = language
1385  *                 ["-" script]
1386  *                 ["-" region]
1387  *                 *("-" variant)
1388  *                 *("-" extension)
1389  *                 ["-" privateuse]
1390  * language      = 2*3ALPHA            ; shortest ISO 639 code
1391  *                 ["-" extlang]       ; sometimes followed by
1392  *                                     ; extended language subtags
1393  *               / 4ALPHA              ; or reserved for future use
1394  *               / 5*8ALPHA            ; or registered language subtag
1395  *
1396  * extlang       = 3ALPHA              ; selected ISO 639 codes
1397  *                 *2("-" 3ALPHA)      ; permanently reserved
1398  *
1399  * script        = 4ALPHA              ; ISO 15924 code
1400  *
1401  * region        = 2ALPHA              ; ISO 3166-1 code
1402  *               / 3DIGIT              ; UN M.49 code
1403  *
1404  * variant       = 5*8alphanum         ; registered variants
1405  *               / (DIGIT 3alphanum)
1406  *
1407  * extension     = singleton 1*("-" (2*8alphanum))
1408  *
1409  *                                     ; Single alphanumerics
1410  *                                     ; "x" reserved for private use
1411  * singleton     = DIGIT               ; 0 - 9
1412  *               / %x41-57             ; A - W
1413  *               / %x59-5A             ; Y - Z
1414  *               / %x61-77             ; a - w
1415  *               / %x79-7A             ; y - z
1416  *
1417  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1418  * The parser below doesn't try to cope with extension or privateuse
1419  * that could be added but that's not interoperable anyway
1420  *
1421  * Returns 1 if correct 0 otherwise
1422  **/
1423 int
xmlCheckLanguageID(const xmlChar * lang)1424 xmlCheckLanguageID(const xmlChar * lang)
1425 {
1426     const xmlChar *cur = lang, *nxt;
1427 
1428     if (cur == NULL)
1429         return (0);
1430     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1431         ((cur[0] == 'I') && (cur[1] == '-')) ||
1432         ((cur[0] == 'x') && (cur[1] == '-')) ||
1433         ((cur[0] == 'X') && (cur[1] == '-'))) {
1434         /*
1435          * Still allow IANA code and user code which were coming
1436          * from the previous version of the XML-1.0 specification
1437          * it's deprecated but we should not fail
1438          */
1439         cur += 2;
1440         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1441                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1442             cur++;
1443         return(cur[0] == 0);
1444     }
1445     nxt = cur;
1446     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1447            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1448            nxt++;
1449     if (nxt - cur >= 4) {
1450         /*
1451          * Reserved
1452          */
1453         if ((nxt - cur > 8) || (nxt[0] != 0))
1454             return(0);
1455         return(1);
1456     }
1457     if (nxt - cur < 2)
1458         return(0);
1459     /* we got an ISO 639 code */
1460     if (nxt[0] == 0)
1461         return(1);
1462     if (nxt[0] != '-')
1463         return(0);
1464 
1465     nxt++;
1466     cur = nxt;
1467     /* now we can have extlang or script or region or variant */
1468     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1469         goto region_m49;
1470 
1471     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1472            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1473            nxt++;
1474     if (nxt - cur == 4)
1475         goto script;
1476     if (nxt - cur == 2)
1477         goto region;
1478     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1479         goto variant;
1480     if (nxt - cur != 3)
1481         return(0);
1482     /* we parsed an extlang */
1483     if (nxt[0] == 0)
1484         return(1);
1485     if (nxt[0] != '-')
1486         return(0);
1487 
1488     nxt++;
1489     cur = nxt;
1490     /* now we can have script or region or variant */
1491     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1492         goto region_m49;
1493 
1494     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1495            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1496            nxt++;
1497     if (nxt - cur == 2)
1498         goto region;
1499     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1500         goto variant;
1501     if (nxt - cur != 4)
1502         return(0);
1503     /* we parsed a script */
1504 script:
1505     if (nxt[0] == 0)
1506         return(1);
1507     if (nxt[0] != '-')
1508         return(0);
1509 
1510     nxt++;
1511     cur = nxt;
1512     /* now we can have region or variant */
1513     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1514         goto region_m49;
1515 
1516     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1517            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1518            nxt++;
1519 
1520     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1521         goto variant;
1522     if (nxt - cur != 2)
1523         return(0);
1524     /* we parsed a region */
1525 region:
1526     if (nxt[0] == 0)
1527         return(1);
1528     if (nxt[0] != '-')
1529         return(0);
1530 
1531     nxt++;
1532     cur = nxt;
1533     /* now we can just have a variant */
1534     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1535            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1536            nxt++;
1537 
1538     if ((nxt - cur < 5) || (nxt - cur > 8))
1539         return(0);
1540 
1541     /* we parsed a variant */
1542 variant:
1543     if (nxt[0] == 0)
1544         return(1);
1545     if (nxt[0] != '-')
1546         return(0);
1547     /* extensions and private use subtags not checked */
1548     return (1);
1549 
1550 region_m49:
1551     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1552         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1553         nxt += 3;
1554         goto region;
1555     }
1556     return(0);
1557 }
1558 
1559 /************************************************************************
1560  *									*
1561  *		Parser stacks related functions and macros		*
1562  *									*
1563  ************************************************************************/
1564 
1565 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1566                                             const xmlChar ** str);
1567 
1568 #ifdef SAX2
1569 /**
1570  * nsPush:
1571  * @ctxt:  an XML parser context
1572  * @prefix:  the namespace prefix or NULL
1573  * @URL:  the namespace name
1574  *
1575  * Pushes a new parser namespace on top of the ns stack
1576  *
1577  * Returns -1 in case of error, -2 if the namespace should be discarded
1578  *	   and the index in the stack otherwise.
1579  */
1580 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1581 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1582 {
1583     if (ctxt->options & XML_PARSE_NSCLEAN) {
1584         int i;
1585 	for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1586 	    if (ctxt->nsTab[i] == prefix) {
1587 		/* in scope */
1588 	        if (ctxt->nsTab[i + 1] == URL)
1589 		    return(-2);
1590 		/* out of scope keep it */
1591 		break;
1592 	    }
1593 	}
1594     }
1595     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1596 	ctxt->nsMax = 10;
1597 	ctxt->nsNr = 0;
1598 	ctxt->nsTab = (const xmlChar **)
1599 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1600 	if (ctxt->nsTab == NULL) {
1601 	    xmlErrMemory(ctxt, NULL);
1602 	    ctxt->nsMax = 0;
1603             return (-1);
1604 	}
1605     } else if (ctxt->nsNr >= ctxt->nsMax) {
1606         const xmlChar ** tmp;
1607         ctxt->nsMax *= 2;
1608         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1609 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1610         if (tmp == NULL) {
1611             xmlErrMemory(ctxt, NULL);
1612 	    ctxt->nsMax /= 2;
1613             return (-1);
1614         }
1615 	ctxt->nsTab = tmp;
1616     }
1617     ctxt->nsTab[ctxt->nsNr++] = prefix;
1618     ctxt->nsTab[ctxt->nsNr++] = URL;
1619     return (ctxt->nsNr);
1620 }
1621 /**
1622  * nsPop:
1623  * @ctxt: an XML parser context
1624  * @nr:  the number to pop
1625  *
1626  * Pops the top @nr parser prefix/namespace from the ns stack
1627  *
1628  * Returns the number of namespaces removed
1629  */
1630 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1631 nsPop(xmlParserCtxtPtr ctxt, int nr)
1632 {
1633     int i;
1634 
1635     if (ctxt->nsTab == NULL) return(0);
1636     if (ctxt->nsNr < nr) {
1637         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1638         nr = ctxt->nsNr;
1639     }
1640     if (ctxt->nsNr <= 0)
1641         return (0);
1642 
1643     for (i = 0;i < nr;i++) {
1644          ctxt->nsNr--;
1645 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1646     }
1647     return(nr);
1648 }
1649 #endif
1650 
1651 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1652 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1653     const xmlChar **atts;
1654     int *attallocs;
1655     int maxatts;
1656 
1657     if (ctxt->atts == NULL) {
1658 	maxatts = 55; /* allow for 10 attrs by default */
1659 	atts = (const xmlChar **)
1660 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1661 	if (atts == NULL) goto mem_error;
1662 	ctxt->atts = atts;
1663 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1664 	if (attallocs == NULL) goto mem_error;
1665 	ctxt->attallocs = attallocs;
1666 	ctxt->maxatts = maxatts;
1667     } else if (nr + 5 > ctxt->maxatts) {
1668 	maxatts = (nr + 5) * 2;
1669 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1670 				     maxatts * sizeof(const xmlChar *));
1671 	if (atts == NULL) goto mem_error;
1672 	ctxt->atts = atts;
1673 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1674 	                             (maxatts / 5) * sizeof(int));
1675 	if (attallocs == NULL) goto mem_error;
1676 	ctxt->attallocs = attallocs;
1677 	ctxt->maxatts = maxatts;
1678     }
1679     return(ctxt->maxatts);
1680 mem_error:
1681     xmlErrMemory(ctxt, NULL);
1682     return(-1);
1683 }
1684 
1685 /**
1686  * inputPush:
1687  * @ctxt:  an XML parser context
1688  * @value:  the parser input
1689  *
1690  * Pushes a new parser input on top of the input stack
1691  *
1692  * Returns -1 in case of error, the index in the stack otherwise
1693  */
1694 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1695 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1696 {
1697     if ((ctxt == NULL) || (value == NULL))
1698         return(-1);
1699     if (ctxt->inputNr >= ctxt->inputMax) {
1700         ctxt->inputMax *= 2;
1701         ctxt->inputTab =
1702             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1703                                              ctxt->inputMax *
1704                                              sizeof(ctxt->inputTab[0]));
1705         if (ctxt->inputTab == NULL) {
1706             xmlErrMemory(ctxt, NULL);
1707 	    xmlFreeInputStream(value);
1708 	    ctxt->inputMax /= 2;
1709 	    value = NULL;
1710             return (-1);
1711         }
1712     }
1713     ctxt->inputTab[ctxt->inputNr] = value;
1714     ctxt->input = value;
1715     return (ctxt->inputNr++);
1716 }
1717 /**
1718  * inputPop:
1719  * @ctxt: an XML parser context
1720  *
1721  * Pops the top parser input from the input stack
1722  *
1723  * Returns the input just removed
1724  */
1725 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1726 inputPop(xmlParserCtxtPtr ctxt)
1727 {
1728     xmlParserInputPtr ret;
1729 
1730     if (ctxt == NULL)
1731         return(NULL);
1732     if (ctxt->inputNr <= 0)
1733         return (NULL);
1734     ctxt->inputNr--;
1735     if (ctxt->inputNr > 0)
1736         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1737     else
1738         ctxt->input = NULL;
1739     ret = ctxt->inputTab[ctxt->inputNr];
1740     ctxt->inputTab[ctxt->inputNr] = NULL;
1741     return (ret);
1742 }
1743 /**
1744  * nodePush:
1745  * @ctxt:  an XML parser context
1746  * @value:  the element node
1747  *
1748  * Pushes a new element node on top of the node stack
1749  *
1750  * Returns -1 in case of error, the index in the stack otherwise
1751  */
1752 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1753 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1754 {
1755     if (ctxt == NULL) return(0);
1756     if (ctxt->nodeNr >= ctxt->nodeMax) {
1757         xmlNodePtr *tmp;
1758 
1759 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1760                                       ctxt->nodeMax * 2 *
1761                                       sizeof(ctxt->nodeTab[0]));
1762         if (tmp == NULL) {
1763             xmlErrMemory(ctxt, NULL);
1764             return (-1);
1765         }
1766         ctxt->nodeTab = tmp;
1767 	ctxt->nodeMax *= 2;
1768     }
1769     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1770         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1771 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1772 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1773 			  xmlParserMaxDepth);
1774 	ctxt->instate = XML_PARSER_EOF;
1775 	return(-1);
1776     }
1777     ctxt->nodeTab[ctxt->nodeNr] = value;
1778     ctxt->node = value;
1779     return (ctxt->nodeNr++);
1780 }
1781 
1782 /**
1783  * nodePop:
1784  * @ctxt: an XML parser context
1785  *
1786  * Pops the top element node from the node stack
1787  *
1788  * Returns the node just removed
1789  */
1790 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1791 nodePop(xmlParserCtxtPtr ctxt)
1792 {
1793     xmlNodePtr ret;
1794 
1795     if (ctxt == NULL) return(NULL);
1796     if (ctxt->nodeNr <= 0)
1797         return (NULL);
1798     ctxt->nodeNr--;
1799     if (ctxt->nodeNr > 0)
1800         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1801     else
1802         ctxt->node = NULL;
1803     ret = ctxt->nodeTab[ctxt->nodeNr];
1804     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1805     return (ret);
1806 }
1807 
1808 #ifdef LIBXML_PUSH_ENABLED
1809 /**
1810  * nameNsPush:
1811  * @ctxt:  an XML parser context
1812  * @value:  the element name
1813  * @prefix:  the element prefix
1814  * @URI:  the element namespace name
1815  *
1816  * Pushes a new element name/prefix/URL on top of the name stack
1817  *
1818  * Returns -1 in case of error, the index in the stack otherwise
1819  */
1820 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1821 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1822            const xmlChar *prefix, const xmlChar *URI, int nsNr)
1823 {
1824     if (ctxt->nameNr >= ctxt->nameMax) {
1825         const xmlChar * *tmp;
1826         void **tmp2;
1827         ctxt->nameMax *= 2;
1828         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1829                                     ctxt->nameMax *
1830                                     sizeof(ctxt->nameTab[0]));
1831         if (tmp == NULL) {
1832 	    ctxt->nameMax /= 2;
1833 	    goto mem_error;
1834         }
1835 	ctxt->nameTab = tmp;
1836         tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1837                                     ctxt->nameMax * 3 *
1838                                     sizeof(ctxt->pushTab[0]));
1839         if (tmp2 == NULL) {
1840 	    ctxt->nameMax /= 2;
1841 	    goto mem_error;
1842         }
1843 	ctxt->pushTab = tmp2;
1844     }
1845     ctxt->nameTab[ctxt->nameNr] = value;
1846     ctxt->name = value;
1847     ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1848     ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1849     ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1850     return (ctxt->nameNr++);
1851 mem_error:
1852     xmlErrMemory(ctxt, NULL);
1853     return (-1);
1854 }
1855 /**
1856  * nameNsPop:
1857  * @ctxt: an XML parser context
1858  *
1859  * Pops the top element/prefix/URI name from the name stack
1860  *
1861  * Returns the name just removed
1862  */
1863 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1864 nameNsPop(xmlParserCtxtPtr ctxt)
1865 {
1866     const xmlChar *ret;
1867 
1868     if (ctxt->nameNr <= 0)
1869         return (NULL);
1870     ctxt->nameNr--;
1871     if (ctxt->nameNr > 0)
1872         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1873     else
1874         ctxt->name = NULL;
1875     ret = ctxt->nameTab[ctxt->nameNr];
1876     ctxt->nameTab[ctxt->nameNr] = NULL;
1877     return (ret);
1878 }
1879 #endif /* LIBXML_PUSH_ENABLED */
1880 
1881 /**
1882  * namePush:
1883  * @ctxt:  an XML parser context
1884  * @value:  the element name
1885  *
1886  * Pushes a new element name on top of the name stack
1887  *
1888  * Returns -1 in case of error, the index in the stack otherwise
1889  */
1890 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1891 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1892 {
1893     if (ctxt == NULL) return (-1);
1894 
1895     if (ctxt->nameNr >= ctxt->nameMax) {
1896         const xmlChar * *tmp;
1897         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1898                                     ctxt->nameMax * 2 *
1899                                     sizeof(ctxt->nameTab[0]));
1900         if (tmp == NULL) {
1901 	    goto mem_error;
1902         }
1903 	ctxt->nameTab = tmp;
1904         ctxt->nameMax *= 2;
1905     }
1906     ctxt->nameTab[ctxt->nameNr] = value;
1907     ctxt->name = value;
1908     return (ctxt->nameNr++);
1909 mem_error:
1910     xmlErrMemory(ctxt, NULL);
1911     return (-1);
1912 }
1913 /**
1914  * namePop:
1915  * @ctxt: an XML parser context
1916  *
1917  * Pops the top element name from the name stack
1918  *
1919  * Returns the name just removed
1920  */
1921 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1922 namePop(xmlParserCtxtPtr ctxt)
1923 {
1924     const xmlChar *ret;
1925 
1926     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1927         return (NULL);
1928     ctxt->nameNr--;
1929     if (ctxt->nameNr > 0)
1930         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1931     else
1932         ctxt->name = NULL;
1933     ret = ctxt->nameTab[ctxt->nameNr];
1934     ctxt->nameTab[ctxt->nameNr] = NULL;
1935     return (ret);
1936 }
1937 
spacePush(xmlParserCtxtPtr ctxt,int val)1938 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1939     if (ctxt->spaceNr >= ctxt->spaceMax) {
1940         int *tmp;
1941 
1942 	ctxt->spaceMax *= 2;
1943         tmp = (int *) xmlRealloc(ctxt->spaceTab,
1944 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1945         if (tmp == NULL) {
1946 	    xmlErrMemory(ctxt, NULL);
1947 	    ctxt->spaceMax /=2;
1948 	    return(-1);
1949 	}
1950 	ctxt->spaceTab = tmp;
1951     }
1952     ctxt->spaceTab[ctxt->spaceNr] = val;
1953     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1954     return(ctxt->spaceNr++);
1955 }
1956 
spacePop(xmlParserCtxtPtr ctxt)1957 static int spacePop(xmlParserCtxtPtr ctxt) {
1958     int ret;
1959     if (ctxt->spaceNr <= 0) return(0);
1960     ctxt->spaceNr--;
1961     if (ctxt->spaceNr > 0)
1962 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1963     else
1964         ctxt->space = &ctxt->spaceTab[0];
1965     ret = ctxt->spaceTab[ctxt->spaceNr];
1966     ctxt->spaceTab[ctxt->spaceNr] = -1;
1967     return(ret);
1968 }
1969 
1970 /*
1971  * Macros for accessing the content. Those should be used only by the parser,
1972  * and not exported.
1973  *
1974  * Dirty macros, i.e. one often need to make assumption on the context to
1975  * use them
1976  *
1977  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1978  *           To be used with extreme caution since operations consuming
1979  *           characters may move the input buffer to a different location !
1980  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1981  *           This should be used internally by the parser
1982  *           only to compare to ASCII values otherwise it would break when
1983  *           running with UTF-8 encoding.
1984  *   RAW     same as CUR but in the input buffer, bypass any token
1985  *           extraction that may have been done
1986  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1987  *           to compare on ASCII based substring.
1988  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1989  *           strings without newlines within the parser.
1990  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1991  *           defined char within the parser.
1992  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1993  *
1994  *   NEXT    Skip to the next character, this does the proper decoding
1995  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
1996  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
1997  *   CUR_CHAR(l) returns the current unicode character (int), set l
1998  *           to the number of xmlChars used for the encoding [0-5].
1999  *   CUR_SCHAR  same but operate on a string instead of the context
2000  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2001  *            the index
2002  *   GROW, SHRINK  handling of input buffers
2003  */
2004 
2005 #define RAW (*ctxt->input->cur)
2006 #define CUR (*ctxt->input->cur)
2007 #define NXT(val) ctxt->input->cur[(val)]
2008 #define CUR_PTR ctxt->input->cur
2009 
2010 #define CMP4( s, c1, c2, c3, c4 ) \
2011   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2012     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2013 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2014   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2015 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2016   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2017 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2018   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2019 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2020   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2021 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2022   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2023     ((unsigned char *) s)[ 8 ] == c9 )
2024 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2025   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2026     ((unsigned char *) s)[ 9 ] == c10 )
2027 
2028 #define SKIP(val) do {							\
2029     ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2030     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2031     if ((*ctxt->input->cur == 0) &&					\
2032         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
2033 	    xmlPopInput(ctxt);						\
2034   } while (0)
2035 
2036 #define SKIPL(val) do {							\
2037     int skipl;								\
2038     for(skipl=0; skipl<val; skipl++) {					\
2039 	if (*(ctxt->input->cur) == '\n') {				\
2040 	ctxt->input->line++; ctxt->input->col = 1;			\
2041 	} else ctxt->input->col++;					\
2042 	ctxt->nbChars++;						\
2043 	ctxt->input->cur++;						\
2044     }									\
2045     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2046     if ((*ctxt->input->cur == 0) &&					\
2047         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
2048 	    xmlPopInput(ctxt);						\
2049   } while (0)
2050 
2051 #define SHRINK if ((ctxt->progressive == 0) &&				\
2052 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2053 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2054 	xmlSHRINK (ctxt);
2055 
xmlSHRINK(xmlParserCtxtPtr ctxt)2056 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2057     xmlParserInputShrink(ctxt->input);
2058     if ((*ctxt->input->cur == 0) &&
2059         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2060 	    xmlPopInput(ctxt);
2061   }
2062 
2063 #define GROW if ((ctxt->progressive == 0) &&				\
2064 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2065 	xmlGROW (ctxt);
2066 
xmlGROW(xmlParserCtxtPtr ctxt)2067 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2068     unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2069     unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2070 
2071     if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2072          (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2073          ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2074         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2075         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2076         ctxt->instate = XML_PARSER_EOF;
2077     }
2078     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2079     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2080         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2081 	    xmlPopInput(ctxt);
2082 }
2083 
2084 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2085 
2086 #define NEXT xmlNextChar(ctxt)
2087 
2088 #define NEXT1 {								\
2089 	ctxt->input->col++;						\
2090 	ctxt->input->cur++;						\
2091 	ctxt->nbChars++;						\
2092 	if (*ctxt->input->cur == 0)					\
2093 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2094     }
2095 
2096 #define NEXTL(l) do {							\
2097     if (*(ctxt->input->cur) == '\n') {					\
2098 	ctxt->input->line++; ctxt->input->col = 1;			\
2099     } else ctxt->input->col++;						\
2100     ctxt->input->cur += l;				\
2101     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2102   } while (0)
2103 
2104 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2105 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2106 
2107 #define COPY_BUF(l,b,i,v)						\
2108     if (l == 1) b[i++] = (xmlChar) v;					\
2109     else i += xmlCopyCharMultiByte(&b[i],v)
2110 
2111 /**
2112  * xmlSkipBlankChars:
2113  * @ctxt:  the XML parser context
2114  *
2115  * skip all blanks character found at that point in the input streams.
2116  * It pops up finished entities in the process if allowable at that point.
2117  *
2118  * Returns the number of space chars skipped
2119  */
2120 
2121 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2122 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2123     int res = 0;
2124 
2125     /*
2126      * It's Okay to use CUR/NEXT here since all the blanks are on
2127      * the ASCII range.
2128      */
2129     if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2130 	const xmlChar *cur;
2131 	/*
2132 	 * if we are in the document content, go really fast
2133 	 */
2134 	cur = ctxt->input->cur;
2135 	while (IS_BLANK_CH(*cur)) {
2136 	    if (*cur == '\n') {
2137 		ctxt->input->line++; ctxt->input->col = 1;
2138 	    } else {
2139 		ctxt->input->col++;
2140 	    }
2141 	    cur++;
2142 	    res++;
2143 	    if (*cur == 0) {
2144 		ctxt->input->cur = cur;
2145 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2146 		cur = ctxt->input->cur;
2147 	    }
2148 	}
2149 	ctxt->input->cur = cur;
2150     } else {
2151 	int cur;
2152 	do {
2153 	    cur = CUR;
2154 	    while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2155 		NEXT;
2156 		cur = CUR;
2157 		res++;
2158 	    }
2159 	    while ((cur == 0) && (ctxt->inputNr > 1) &&
2160 		   (ctxt->instate != XML_PARSER_COMMENT)) {
2161 		xmlPopInput(ctxt);
2162 		cur = CUR;
2163 	    }
2164 	    /*
2165 	     * Need to handle support of entities branching here
2166 	     */
2167 	    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2168 	} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2169     }
2170     return(res);
2171 }
2172 
2173 /************************************************************************
2174  *									*
2175  *		Commodity functions to handle entities			*
2176  *									*
2177  ************************************************************************/
2178 
2179 /**
2180  * xmlPopInput:
2181  * @ctxt:  an XML parser context
2182  *
2183  * xmlPopInput: the current input pointed by ctxt->input came to an end
2184  *          pop it and return the next char.
2185  *
2186  * Returns the current xmlChar in the parser context
2187  */
2188 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2189 xmlPopInput(xmlParserCtxtPtr ctxt) {
2190     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2191     if (xmlParserDebugEntities)
2192 	xmlGenericError(xmlGenericErrorContext,
2193 		"Popping input %d\n", ctxt->inputNr);
2194     xmlFreeInputStream(inputPop(ctxt));
2195     if ((*ctxt->input->cur == 0) &&
2196         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2197 	    return(xmlPopInput(ctxt));
2198     return(CUR);
2199 }
2200 
2201 /**
2202  * xmlPushInput:
2203  * @ctxt:  an XML parser context
2204  * @input:  an XML parser input fragment (entity, XML fragment ...).
2205  *
2206  * xmlPushInput: switch to a new input stream which is stacked on top
2207  *               of the previous one(s).
2208  * Returns -1 in case of error or the index in the input stack
2209  */
2210 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2211 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2212     int ret;
2213     if (input == NULL) return(-1);
2214 
2215     if (xmlParserDebugEntities) {
2216 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2217 	    xmlGenericError(xmlGenericErrorContext,
2218 		    "%s(%d): ", ctxt->input->filename,
2219 		    ctxt->input->line);
2220 	xmlGenericError(xmlGenericErrorContext,
2221 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2222     }
2223     ret = inputPush(ctxt, input);
2224     if (ctxt->instate == XML_PARSER_EOF)
2225         return(-1);
2226     GROW;
2227     return(ret);
2228 }
2229 
2230 /**
2231  * xmlParseCharRef:
2232  * @ctxt:  an XML parser context
2233  *
2234  * parse Reference declarations
2235  *
2236  * [66] CharRef ::= '&#' [0-9]+ ';' |
2237  *                  '&#x' [0-9a-fA-F]+ ';'
2238  *
2239  * [ WFC: Legal Character ]
2240  * Characters referred to using character references must match the
2241  * production for Char.
2242  *
2243  * Returns the value parsed (as an int), 0 in case of error
2244  */
2245 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2246 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2247     unsigned int val = 0;
2248     int count = 0;
2249     unsigned int outofrange = 0;
2250 
2251     /*
2252      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2253      */
2254     if ((RAW == '&') && (NXT(1) == '#') &&
2255         (NXT(2) == 'x')) {
2256 	SKIP(3);
2257 	GROW;
2258 	while (RAW != ';') { /* loop blocked by count */
2259 	    if (count++ > 20) {
2260 		count = 0;
2261 		GROW;
2262                 if (ctxt->instate == XML_PARSER_EOF)
2263                     return(0);
2264 	    }
2265 	    if ((RAW >= '0') && (RAW <= '9'))
2266 	        val = val * 16 + (CUR - '0');
2267 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2268 	        val = val * 16 + (CUR - 'a') + 10;
2269 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2270 	        val = val * 16 + (CUR - 'A') + 10;
2271 	    else {
2272 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2273 		val = 0;
2274 		break;
2275 	    }
2276 	    if (val > 0x10FFFF)
2277 	        outofrange = val;
2278 
2279 	    NEXT;
2280 	    count++;
2281 	}
2282 	if (RAW == ';') {
2283 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2284 	    ctxt->input->col++;
2285 	    ctxt->nbChars ++;
2286 	    ctxt->input->cur++;
2287 	}
2288     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2289 	SKIP(2);
2290 	GROW;
2291 	while (RAW != ';') { /* loop blocked by count */
2292 	    if (count++ > 20) {
2293 		count = 0;
2294 		GROW;
2295                 if (ctxt->instate == XML_PARSER_EOF)
2296                     return(0);
2297 	    }
2298 	    if ((RAW >= '0') && (RAW <= '9'))
2299 	        val = val * 10 + (CUR - '0');
2300 	    else {
2301 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2302 		val = 0;
2303 		break;
2304 	    }
2305 	    if (val > 0x10FFFF)
2306 	        outofrange = val;
2307 
2308 	    NEXT;
2309 	    count++;
2310 	}
2311 	if (RAW == ';') {
2312 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2313 	    ctxt->input->col++;
2314 	    ctxt->nbChars ++;
2315 	    ctxt->input->cur++;
2316 	}
2317     } else {
2318         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2319     }
2320 
2321     /*
2322      * [ WFC: Legal Character ]
2323      * Characters referred to using character references must match the
2324      * production for Char.
2325      */
2326     if ((IS_CHAR(val) && (outofrange == 0))) {
2327         return(val);
2328     } else {
2329         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2330                           "xmlParseCharRef: invalid xmlChar value %d\n",
2331 	                  val);
2332     }
2333     return(0);
2334 }
2335 
2336 /**
2337  * xmlParseStringCharRef:
2338  * @ctxt:  an XML parser context
2339  * @str:  a pointer to an index in the string
2340  *
2341  * parse Reference declarations, variant parsing from a string rather
2342  * than an an input flow.
2343  *
2344  * [66] CharRef ::= '&#' [0-9]+ ';' |
2345  *                  '&#x' [0-9a-fA-F]+ ';'
2346  *
2347  * [ WFC: Legal Character ]
2348  * Characters referred to using character references must match the
2349  * production for Char.
2350  *
2351  * Returns the value parsed (as an int), 0 in case of error, str will be
2352  *         updated to the current value of the index
2353  */
2354 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2355 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2356     const xmlChar *ptr;
2357     xmlChar cur;
2358     unsigned int val = 0;
2359     unsigned int outofrange = 0;
2360 
2361     if ((str == NULL) || (*str == NULL)) return(0);
2362     ptr = *str;
2363     cur = *ptr;
2364     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2365 	ptr += 3;
2366 	cur = *ptr;
2367 	while (cur != ';') { /* Non input consuming loop */
2368 	    if ((cur >= '0') && (cur <= '9'))
2369 	        val = val * 16 + (cur - '0');
2370 	    else if ((cur >= 'a') && (cur <= 'f'))
2371 	        val = val * 16 + (cur - 'a') + 10;
2372 	    else if ((cur >= 'A') && (cur <= 'F'))
2373 	        val = val * 16 + (cur - 'A') + 10;
2374 	    else {
2375 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2376 		val = 0;
2377 		break;
2378 	    }
2379 	    if (val > 0x10FFFF)
2380 	        outofrange = val;
2381 
2382 	    ptr++;
2383 	    cur = *ptr;
2384 	}
2385 	if (cur == ';')
2386 	    ptr++;
2387     } else if  ((cur == '&') && (ptr[1] == '#')){
2388 	ptr += 2;
2389 	cur = *ptr;
2390 	while (cur != ';') { /* Non input consuming loops */
2391 	    if ((cur >= '0') && (cur <= '9'))
2392 	        val = val * 10 + (cur - '0');
2393 	    else {
2394 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2395 		val = 0;
2396 		break;
2397 	    }
2398 	    if (val > 0x10FFFF)
2399 	        outofrange = val;
2400 
2401 	    ptr++;
2402 	    cur = *ptr;
2403 	}
2404 	if (cur == ';')
2405 	    ptr++;
2406     } else {
2407 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2408 	return(0);
2409     }
2410     *str = ptr;
2411 
2412     /*
2413      * [ WFC: Legal Character ]
2414      * Characters referred to using character references must match the
2415      * production for Char.
2416      */
2417     if ((IS_CHAR(val) && (outofrange == 0))) {
2418         return(val);
2419     } else {
2420         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2421 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2422 			  val);
2423     }
2424     return(0);
2425 }
2426 
2427 /**
2428  * xmlNewBlanksWrapperInputStream:
2429  * @ctxt:  an XML parser context
2430  * @entity:  an Entity pointer
2431  *
2432  * Create a new input stream for wrapping
2433  * blanks around a PEReference
2434  *
2435  * Returns the new input stream or NULL
2436  */
2437 
deallocblankswrapper(xmlChar * str)2438 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2439 
2440 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2441 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2442     xmlParserInputPtr input;
2443     xmlChar *buffer;
2444     size_t length;
2445     if (entity == NULL) {
2446 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2447 	            "xmlNewBlanksWrapperInputStream entity\n");
2448 	return(NULL);
2449     }
2450     if (xmlParserDebugEntities)
2451 	xmlGenericError(xmlGenericErrorContext,
2452 		"new blanks wrapper for entity: %s\n", entity->name);
2453     input = xmlNewInputStream(ctxt);
2454     if (input == NULL) {
2455 	return(NULL);
2456     }
2457     length = xmlStrlen(entity->name) + 5;
2458     buffer = xmlMallocAtomic(length);
2459     if (buffer == NULL) {
2460 	xmlErrMemory(ctxt, NULL);
2461         xmlFree(input);
2462 	return(NULL);
2463     }
2464     buffer [0] = ' ';
2465     buffer [1] = '%';
2466     buffer [length-3] = ';';
2467     buffer [length-2] = ' ';
2468     buffer [length-1] = 0;
2469     memcpy(buffer + 2, entity->name, length - 5);
2470     input->free = deallocblankswrapper;
2471     input->base = buffer;
2472     input->cur = buffer;
2473     input->length = length;
2474     input->end = &buffer[length];
2475     return(input);
2476 }
2477 
2478 /**
2479  * xmlParserHandlePEReference:
2480  * @ctxt:  the parser context
2481  *
2482  * [69] PEReference ::= '%' Name ';'
2483  *
2484  * [ WFC: No Recursion ]
2485  * A parsed entity must not contain a recursive
2486  * reference to itself, either directly or indirectly.
2487  *
2488  * [ WFC: Entity Declared ]
2489  * In a document without any DTD, a document with only an internal DTD
2490  * subset which contains no parameter entity references, or a document
2491  * with "standalone='yes'", ...  ... The declaration of a parameter
2492  * entity must precede any reference to it...
2493  *
2494  * [ VC: Entity Declared ]
2495  * In a document with an external subset or external parameter entities
2496  * with "standalone='no'", ...  ... The declaration of a parameter entity
2497  * must precede any reference to it...
2498  *
2499  * [ WFC: In DTD ]
2500  * Parameter-entity references may only appear in the DTD.
2501  * NOTE: misleading but this is handled.
2502  *
2503  * A PEReference may have been detected in the current input stream
2504  * the handling is done accordingly to
2505  *      http://www.w3.org/TR/REC-xml#entproc
2506  * i.e.
2507  *   - Included in literal in entity values
2508  *   - Included as Parameter Entity reference within DTDs
2509  */
2510 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2511 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2512     const xmlChar *name;
2513     xmlEntityPtr entity = NULL;
2514     xmlParserInputPtr input;
2515 
2516     if (RAW != '%') return;
2517     switch(ctxt->instate) {
2518 	case XML_PARSER_CDATA_SECTION:
2519 	    return;
2520         case XML_PARSER_COMMENT:
2521 	    return;
2522 	case XML_PARSER_START_TAG:
2523 	    return;
2524 	case XML_PARSER_END_TAG:
2525 	    return;
2526         case XML_PARSER_EOF:
2527 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2528 	    return;
2529         case XML_PARSER_PROLOG:
2530 	case XML_PARSER_START:
2531 	case XML_PARSER_MISC:
2532 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2533 	    return;
2534 	case XML_PARSER_ENTITY_DECL:
2535         case XML_PARSER_CONTENT:
2536         case XML_PARSER_ATTRIBUTE_VALUE:
2537         case XML_PARSER_PI:
2538 	case XML_PARSER_SYSTEM_LITERAL:
2539 	case XML_PARSER_PUBLIC_LITERAL:
2540 	    /* we just ignore it there */
2541 	    return;
2542         case XML_PARSER_EPILOG:
2543 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2544 	    return;
2545 	case XML_PARSER_ENTITY_VALUE:
2546 	    /*
2547 	     * NOTE: in the case of entity values, we don't do the
2548 	     *       substitution here since we need the literal
2549 	     *       entity value to be able to save the internal
2550 	     *       subset of the document.
2551 	     *       This will be handled by xmlStringDecodeEntities
2552 	     */
2553 	    return;
2554         case XML_PARSER_DTD:
2555 	    /*
2556 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2557 	     * In the internal DTD subset, parameter-entity references
2558 	     * can occur only where markup declarations can occur, not
2559 	     * within markup declarations.
2560 	     * In that case this is handled in xmlParseMarkupDecl
2561 	     */
2562 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2563 		return;
2564 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2565 		return;
2566             break;
2567         case XML_PARSER_IGNORE:
2568             return;
2569     }
2570 
2571     NEXT;
2572     name = xmlParseName(ctxt);
2573     if (xmlParserDebugEntities)
2574 	xmlGenericError(xmlGenericErrorContext,
2575 		"PEReference: %s\n", name);
2576     if (name == NULL) {
2577 	xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2578     } else {
2579 	if (RAW == ';') {
2580 	    NEXT;
2581 	    if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2582 		entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2583 	    if (ctxt->instate == XML_PARSER_EOF)
2584 	        return;
2585 	    if (entity == NULL) {
2586 
2587 		/*
2588 		 * [ WFC: Entity Declared ]
2589 		 * In a document without any DTD, a document with only an
2590 		 * internal DTD subset which contains no parameter entity
2591 		 * references, or a document with "standalone='yes'", ...
2592 		 * ... The declaration of a parameter entity must precede
2593 		 * any reference to it...
2594 		 */
2595 		if ((ctxt->standalone == 1) ||
2596 		    ((ctxt->hasExternalSubset == 0) &&
2597 		     (ctxt->hasPErefs == 0))) {
2598 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2599 			 "PEReference: %%%s; not found\n", name);
2600 	        } else {
2601 		    /*
2602 		     * [ VC: Entity Declared ]
2603 		     * In a document with an external subset or external
2604 		     * parameter entities with "standalone='no'", ...
2605 		     * ... The declaration of a parameter entity must precede
2606 		     * any reference to it...
2607 		     */
2608 		    if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2609 		        xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2610 			                 "PEReference: %%%s; not found\n",
2611 				         name, NULL);
2612 		    } else
2613 		        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2614 			              "PEReference: %%%s; not found\n",
2615 				      name, NULL);
2616 		    ctxt->valid = 0;
2617 		}
2618 		xmlParserEntityCheck(ctxt, 0, NULL, 0);
2619 	    } else if (ctxt->input->free != deallocblankswrapper) {
2620 		    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2621 		    if (xmlPushInput(ctxt, input) < 0)
2622 		        return;
2623 	    } else {
2624 	        if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2625 		    (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2626 		    xmlChar start[4];
2627 		    xmlCharEncoding enc;
2628 
2629 		    /*
2630 		     * Note: external parameter entities will not be loaded, it
2631 		     * is not required for a non-validating parser, unless the
2632 		     * option of validating, or substituting entities were
2633 		     * given. Doing so is far more secure as the parser will
2634 		     * only process data coming from the document entity by
2635 		     * default.
2636 		     */
2637                     if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2638 		        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2639 			((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2640 			((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2641 			((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2642 			(ctxt->replaceEntities == 0) &&
2643 			(ctxt->validate == 0))
2644 			return;
2645 
2646 		    /*
2647 		     * handle the extra spaces added before and after
2648 		     * c.f. http://www.w3.org/TR/REC-xml#as-PE
2649 		     * this is done independently.
2650 		     */
2651 		    input = xmlNewEntityInputStream(ctxt, entity);
2652 		    if (xmlPushInput(ctxt, input) < 0)
2653 		        return;
2654 
2655 		    /*
2656 		     * Get the 4 first bytes and decode the charset
2657 		     * if enc != XML_CHAR_ENCODING_NONE
2658 		     * plug some encoding conversion routines.
2659 		     * Note that, since we may have some non-UTF8
2660 		     * encoding (like UTF16, bug 135229), the 'length'
2661 		     * is not known, but we can calculate based upon
2662 		     * the amount of data in the buffer.
2663 		     */
2664 		    GROW
2665                     if (ctxt->instate == XML_PARSER_EOF)
2666                         return;
2667 		    if ((ctxt->input->end - ctxt->input->cur)>=4) {
2668 			start[0] = RAW;
2669 			start[1] = NXT(1);
2670 			start[2] = NXT(2);
2671 			start[3] = NXT(3);
2672 			enc = xmlDetectCharEncoding(start, 4);
2673 			if (enc != XML_CHAR_ENCODING_NONE) {
2674 			    xmlSwitchEncoding(ctxt, enc);
2675 			}
2676 		    }
2677 
2678 		    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2679 			(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2680 			(IS_BLANK_CH(NXT(5)))) {
2681 			xmlParseTextDecl(ctxt);
2682 		    }
2683 		} else {
2684 		    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2685 			     "PEReference: %s is not a parameter entity\n",
2686 				      name);
2687 		}
2688 	    }
2689 	} else {
2690 	    xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2691 	}
2692     }
2693 }
2694 
2695 /*
2696  * Macro used to grow the current buffer.
2697  * buffer##_size is expected to be a size_t
2698  * mem_error: is expected to handle memory allocation failures
2699  */
2700 #define growBuffer(buffer, n) {						\
2701     xmlChar *tmp;							\
2702     size_t new_size = buffer##_size * 2 + n;                            \
2703     if (new_size < buffer##_size) goto mem_error;                       \
2704     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2705     if (tmp == NULL) goto mem_error;					\
2706     buffer = tmp;							\
2707     buffer##_size = new_size;                                           \
2708 }
2709 
2710 /**
2711  * xmlStringLenDecodeEntities:
2712  * @ctxt:  the parser context
2713  * @str:  the input string
2714  * @len: the string length
2715  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2716  * @end:  an end marker xmlChar, 0 if none
2717  * @end2:  an end marker xmlChar, 0 if none
2718  * @end3:  an end marker xmlChar, 0 if none
2719  *
2720  * Takes a entity string content and process to do the adequate substitutions.
2721  *
2722  * [67] Reference ::= EntityRef | CharRef
2723  *
2724  * [69] PEReference ::= '%' Name ';'
2725  *
2726  * Returns A newly allocated string with the substitution done. The caller
2727  *      must deallocate it !
2728  */
2729 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2730 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2731 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2732     xmlChar *buffer = NULL;
2733     size_t buffer_size = 0;
2734     size_t nbchars = 0;
2735 
2736     xmlChar *current = NULL;
2737     xmlChar *rep = NULL;
2738     const xmlChar *last;
2739     xmlEntityPtr ent;
2740     int c,l;
2741 
2742     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2743 	return(NULL);
2744     last = str + len;
2745 
2746     if (((ctxt->depth > 40) &&
2747          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2748 	(ctxt->depth > 1024)) {
2749 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2750 	return(NULL);
2751     }
2752 
2753     /*
2754      * allocate a translation buffer.
2755      */
2756     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2757     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2758     if (buffer == NULL) goto mem_error;
2759 
2760     /*
2761      * OK loop until we reach one of the ending char or a size limit.
2762      * we are operating on already parsed values.
2763      */
2764     if (str < last)
2765 	c = CUR_SCHAR(str, l);
2766     else
2767         c = 0;
2768     while ((c != 0) && (c != end) && /* non input consuming loop */
2769 	   (c != end2) && (c != end3)) {
2770 
2771 	if (c == 0) break;
2772         if ((c == '&') && (str[1] == '#')) {
2773 	    int val = xmlParseStringCharRef(ctxt, &str);
2774 	    if (val != 0) {
2775 		COPY_BUF(0,buffer,nbchars,val);
2776 	    }
2777 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2778 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2779 	    }
2780 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2781 	    if (xmlParserDebugEntities)
2782 		xmlGenericError(xmlGenericErrorContext,
2783 			"String decoding Entity Reference: %.30s\n",
2784 			str);
2785 	    ent = xmlParseStringEntityRef(ctxt, &str);
2786 	    if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2787 	        (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2788 	        goto int_error;
2789 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2790 	    if (ent != NULL)
2791 	        ctxt->nbentities += ent->checked / 2;
2792 	    if ((ent != NULL) &&
2793 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2794 		if (ent->content != NULL) {
2795 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2796 		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798 		    }
2799 		} else {
2800 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2801 			    "predefined entity has no content\n");
2802 		}
2803 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2804 		ctxt->depth++;
2805 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2806 			                      0, 0, 0);
2807 		ctxt->depth--;
2808 
2809 		if (rep != NULL) {
2810 		    current = rep;
2811 		    while (*current != 0) { /* non input consuming loop */
2812 			buffer[nbchars++] = *current++;
2813 			if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2814 			    if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2815 				goto int_error;
2816 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2817 			}
2818 		    }
2819 		    xmlFree(rep);
2820 		    rep = NULL;
2821 		}
2822 	    } else if (ent != NULL) {
2823 		int i = xmlStrlen(ent->name);
2824 		const xmlChar *cur = ent->name;
2825 
2826 		buffer[nbchars++] = '&';
2827 		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2828 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2829 		}
2830 		for (;i > 0;i--)
2831 		    buffer[nbchars++] = *cur++;
2832 		buffer[nbchars++] = ';';
2833 	    }
2834 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2835 	    if (xmlParserDebugEntities)
2836 		xmlGenericError(xmlGenericErrorContext,
2837 			"String decoding PE Reference: %.30s\n", str);
2838 	    ent = xmlParseStringPEReference(ctxt, &str);
2839 	    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2840 	        goto int_error;
2841 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2842 	    if (ent != NULL)
2843 	        ctxt->nbentities += ent->checked / 2;
2844 	    if (ent != NULL) {
2845                 if (ent->content == NULL) {
2846 		    xmlLoadEntityContent(ctxt, ent);
2847 		}
2848 		ctxt->depth++;
2849 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2850 			                      0, 0, 0);
2851 		ctxt->depth--;
2852 		if (rep != NULL) {
2853 		    current = rep;
2854 		    while (*current != 0) { /* non input consuming loop */
2855 			buffer[nbchars++] = *current++;
2856 			if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2857 			    if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2858 			        goto int_error;
2859 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2860 			}
2861 		    }
2862 		    xmlFree(rep);
2863 		    rep = NULL;
2864 		}
2865 	    }
2866 	} else {
2867 	    COPY_BUF(l,buffer,nbchars,c);
2868 	    str += l;
2869 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2870 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2871 	    }
2872 	}
2873 	if (str < last)
2874 	    c = CUR_SCHAR(str, l);
2875 	else
2876 	    c = 0;
2877     }
2878     buffer[nbchars] = 0;
2879     return(buffer);
2880 
2881 mem_error:
2882     xmlErrMemory(ctxt, NULL);
2883 int_error:
2884     if (rep != NULL)
2885         xmlFree(rep);
2886     if (buffer != NULL)
2887         xmlFree(buffer);
2888     return(NULL);
2889 }
2890 
2891 /**
2892  * xmlStringDecodeEntities:
2893  * @ctxt:  the parser context
2894  * @str:  the input string
2895  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2896  * @end:  an end marker xmlChar, 0 if none
2897  * @end2:  an end marker xmlChar, 0 if none
2898  * @end3:  an end marker xmlChar, 0 if none
2899  *
2900  * Takes a entity string content and process to do the adequate substitutions.
2901  *
2902  * [67] Reference ::= EntityRef | CharRef
2903  *
2904  * [69] PEReference ::= '%' Name ';'
2905  *
2906  * Returns A newly allocated string with the substitution done. The caller
2907  *      must deallocate it !
2908  */
2909 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2910 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2911 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2912     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2913     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2914            end, end2, end3));
2915 }
2916 
2917 /************************************************************************
2918  *									*
2919  *		Commodity functions, cleanup needed ?			*
2920  *									*
2921  ************************************************************************/
2922 
2923 /**
2924  * areBlanks:
2925  * @ctxt:  an XML parser context
2926  * @str:  a xmlChar *
2927  * @len:  the size of @str
2928  * @blank_chars: we know the chars are blanks
2929  *
2930  * Is this a sequence of blank chars that one can ignore ?
2931  *
2932  * Returns 1 if ignorable 0 otherwise.
2933  */
2934 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2935 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2936                      int blank_chars) {
2937     int i, ret;
2938     xmlNodePtr lastChild;
2939 
2940     /*
2941      * Don't spend time trying to differentiate them, the same callback is
2942      * used !
2943      */
2944     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2945 	return(0);
2946 
2947     /*
2948      * Check for xml:space value.
2949      */
2950     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2951         (*(ctxt->space) == -2))
2952 	return(0);
2953 
2954     /*
2955      * Check that the string is made of blanks
2956      */
2957     if (blank_chars == 0) {
2958 	for (i = 0;i < len;i++)
2959 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2960     }
2961 
2962     /*
2963      * Look if the element is mixed content in the DTD if available
2964      */
2965     if (ctxt->node == NULL) return(0);
2966     if (ctxt->myDoc != NULL) {
2967 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2968         if (ret == 0) return(1);
2969         if (ret == 1) return(0);
2970     }
2971 
2972     /*
2973      * Otherwise, heuristic :-\
2974      */
2975     if ((RAW != '<') && (RAW != 0xD)) return(0);
2976     if ((ctxt->node->children == NULL) &&
2977 	(RAW == '<') && (NXT(1) == '/')) return(0);
2978 
2979     lastChild = xmlGetLastChild(ctxt->node);
2980     if (lastChild == NULL) {
2981         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2982             (ctxt->node->content != NULL)) return(0);
2983     } else if (xmlNodeIsText(lastChild))
2984         return(0);
2985     else if ((ctxt->node->children != NULL) &&
2986              (xmlNodeIsText(ctxt->node->children)))
2987         return(0);
2988     return(1);
2989 }
2990 
2991 /************************************************************************
2992  *									*
2993  *		Extra stuff for namespace support			*
2994  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2995  *									*
2996  ************************************************************************/
2997 
2998 /**
2999  * xmlSplitQName:
3000  * @ctxt:  an XML parser context
3001  * @name:  an XML parser context
3002  * @prefix:  a xmlChar **
3003  *
3004  * parse an UTF8 encoded XML qualified name string
3005  *
3006  * [NS 5] QName ::= (Prefix ':')? LocalPart
3007  *
3008  * [NS 6] Prefix ::= NCName
3009  *
3010  * [NS 7] LocalPart ::= NCName
3011  *
3012  * Returns the local part, and prefix is updated
3013  *   to get the Prefix if any.
3014  */
3015 
3016 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)3017 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3018     xmlChar buf[XML_MAX_NAMELEN + 5];
3019     xmlChar *buffer = NULL;
3020     int len = 0;
3021     int max = XML_MAX_NAMELEN;
3022     xmlChar *ret = NULL;
3023     const xmlChar *cur = name;
3024     int c;
3025 
3026     if (prefix == NULL) return(NULL);
3027     *prefix = NULL;
3028 
3029     if (cur == NULL) return(NULL);
3030 
3031 #ifndef XML_XML_NAMESPACE
3032     /* xml: prefix is not really a namespace */
3033     if ((cur[0] == 'x') && (cur[1] == 'm') &&
3034         (cur[2] == 'l') && (cur[3] == ':'))
3035 	return(xmlStrdup(name));
3036 #endif
3037 
3038     /* nasty but well=formed */
3039     if (cur[0] == ':')
3040 	return(xmlStrdup(name));
3041 
3042     c = *cur++;
3043     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3044 	buf[len++] = c;
3045 	c = *cur++;
3046     }
3047     if (len >= max) {
3048 	/*
3049 	 * Okay someone managed to make a huge name, so he's ready to pay
3050 	 * for the processing speed.
3051 	 */
3052 	max = len * 2;
3053 
3054 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3055 	if (buffer == NULL) {
3056 	    xmlErrMemory(ctxt, NULL);
3057 	    return(NULL);
3058 	}
3059 	memcpy(buffer, buf, len);
3060 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3061 	    if (len + 10 > max) {
3062 	        xmlChar *tmp;
3063 
3064 		max *= 2;
3065 		tmp = (xmlChar *) xmlRealloc(buffer,
3066 						max * sizeof(xmlChar));
3067 		if (tmp == NULL) {
3068 		    xmlFree(buffer);
3069 		    xmlErrMemory(ctxt, NULL);
3070 		    return(NULL);
3071 		}
3072 		buffer = tmp;
3073 	    }
3074 	    buffer[len++] = c;
3075 	    c = *cur++;
3076 	}
3077 	buffer[len] = 0;
3078     }
3079 
3080     if ((c == ':') && (*cur == 0)) {
3081         if (buffer != NULL)
3082 	    xmlFree(buffer);
3083 	*prefix = NULL;
3084 	return(xmlStrdup(name));
3085     }
3086 
3087     if (buffer == NULL)
3088 	ret = xmlStrndup(buf, len);
3089     else {
3090 	ret = buffer;
3091 	buffer = NULL;
3092 	max = XML_MAX_NAMELEN;
3093     }
3094 
3095 
3096     if (c == ':') {
3097 	c = *cur;
3098         *prefix = ret;
3099 	if (c == 0) {
3100 	    return(xmlStrndup(BAD_CAST "", 0));
3101 	}
3102 	len = 0;
3103 
3104 	/*
3105 	 * Check that the first character is proper to start
3106 	 * a new name
3107 	 */
3108 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3109 	      ((c >= 0x41) && (c <= 0x5A)) ||
3110 	      (c == '_') || (c == ':'))) {
3111 	    int l;
3112 	    int first = CUR_SCHAR(cur, l);
3113 
3114 	    if (!IS_LETTER(first) && (first != '_')) {
3115 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3116 			    "Name %s is not XML Namespace compliant\n",
3117 				  name);
3118 	    }
3119 	}
3120 	cur++;
3121 
3122 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3123 	    buf[len++] = c;
3124 	    c = *cur++;
3125 	}
3126 	if (len >= max) {
3127 	    /*
3128 	     * Okay someone managed to make a huge name, so he's ready to pay
3129 	     * for the processing speed.
3130 	     */
3131 	    max = len * 2;
3132 
3133 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3134 	    if (buffer == NULL) {
3135 	        xmlErrMemory(ctxt, NULL);
3136 		return(NULL);
3137 	    }
3138 	    memcpy(buffer, buf, len);
3139 	    while (c != 0) { /* tested bigname2.xml */
3140 		if (len + 10 > max) {
3141 		    xmlChar *tmp;
3142 
3143 		    max *= 2;
3144 		    tmp = (xmlChar *) xmlRealloc(buffer,
3145 						    max * sizeof(xmlChar));
3146 		    if (tmp == NULL) {
3147 			xmlErrMemory(ctxt, NULL);
3148 			xmlFree(buffer);
3149 			return(NULL);
3150 		    }
3151 		    buffer = tmp;
3152 		}
3153 		buffer[len++] = c;
3154 		c = *cur++;
3155 	    }
3156 	    buffer[len] = 0;
3157 	}
3158 
3159 	if (buffer == NULL)
3160 	    ret = xmlStrndup(buf, len);
3161 	else {
3162 	    ret = buffer;
3163 	}
3164     }
3165 
3166     return(ret);
3167 }
3168 
3169 /************************************************************************
3170  *									*
3171  *			The parser itself				*
3172  *	Relates to http://www.w3.org/TR/REC-xml				*
3173  *									*
3174  ************************************************************************/
3175 
3176 /************************************************************************
3177  *									*
3178  *	Routines to parse Name, NCName and NmToken			*
3179  *									*
3180  ************************************************************************/
3181 #ifdef DEBUG
3182 static unsigned long nbParseName = 0;
3183 static unsigned long nbParseNmToken = 0;
3184 static unsigned long nbParseNCName = 0;
3185 static unsigned long nbParseNCNameComplex = 0;
3186 static unsigned long nbParseNameComplex = 0;
3187 static unsigned long nbParseStringName = 0;
3188 #endif
3189 
3190 /*
3191  * The two following functions are related to the change of accepted
3192  * characters for Name and NmToken in the Revision 5 of XML-1.0
3193  * They correspond to the modified production [4] and the new production [4a]
3194  * changes in that revision. Also note that the macros used for the
3195  * productions Letter, Digit, CombiningChar and Extender are not needed
3196  * anymore.
3197  * We still keep compatibility to pre-revision5 parsing semantic if the
3198  * new XML_PARSE_OLD10 option is given to the parser.
3199  */
3200 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3201 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3202     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3203         /*
3204 	 * Use the new checks of production [4] [4a] amd [5] of the
3205 	 * Update 5 of XML-1.0
3206 	 */
3207 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 	    (((c >= 'a') && (c <= 'z')) ||
3209 	     ((c >= 'A') && (c <= 'Z')) ||
3210 	     (c == '_') || (c == ':') ||
3211 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3212 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3213 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3214 	     ((c >= 0x370) && (c <= 0x37D)) ||
3215 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3216 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3217 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3218 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3219 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3220 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3221 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3222 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3223 	    return(1);
3224     } else {
3225         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3226 	    return(1);
3227     }
3228     return(0);
3229 }
3230 
3231 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3232 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3233     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3234         /*
3235 	 * Use the new checks of production [4] [4a] amd [5] of the
3236 	 * Update 5 of XML-1.0
3237 	 */
3238 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3239 	    (((c >= 'a') && (c <= 'z')) ||
3240 	     ((c >= 'A') && (c <= 'Z')) ||
3241 	     ((c >= '0') && (c <= '9')) || /* !start */
3242 	     (c == '_') || (c == ':') ||
3243 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3244 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3245 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3246 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3247 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3248 	     ((c >= 0x370) && (c <= 0x37D)) ||
3249 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3250 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3251 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3252 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3253 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3254 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3255 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3256 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3257 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3258 	     return(1);
3259     } else {
3260         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3261             (c == '.') || (c == '-') ||
3262 	    (c == '_') || (c == ':') ||
3263 	    (IS_COMBINING(c)) ||
3264 	    (IS_EXTENDER(c)))
3265 	    return(1);
3266     }
3267     return(0);
3268 }
3269 
3270 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3271                                           int *len, int *alloc, int normalize);
3272 
3273 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3274 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3275     int len = 0, l;
3276     int c;
3277     int count = 0;
3278 
3279 #ifdef DEBUG
3280     nbParseNameComplex++;
3281 #endif
3282 
3283     /*
3284      * Handler for more complex cases
3285      */
3286     GROW;
3287     if (ctxt->instate == XML_PARSER_EOF)
3288         return(NULL);
3289     c = CUR_CHAR(l);
3290     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3291         /*
3292 	 * Use the new checks of production [4] [4a] amd [5] of the
3293 	 * Update 5 of XML-1.0
3294 	 */
3295 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296 	    (!(((c >= 'a') && (c <= 'z')) ||
3297 	       ((c >= 'A') && (c <= 'Z')) ||
3298 	       (c == '_') || (c == ':') ||
3299 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3300 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3301 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3302 	       ((c >= 0x370) && (c <= 0x37D)) ||
3303 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3304 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3305 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3306 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3307 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3308 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3309 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3310 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3311 	    return(NULL);
3312 	}
3313 	len += l;
3314 	NEXTL(l);
3315 	c = CUR_CHAR(l);
3316 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3317 	       (((c >= 'a') && (c <= 'z')) ||
3318 	        ((c >= 'A') && (c <= 'Z')) ||
3319 	        ((c >= '0') && (c <= '9')) || /* !start */
3320 	        (c == '_') || (c == ':') ||
3321 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3322 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3323 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3324 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3325 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3326 	        ((c >= 0x370) && (c <= 0x37D)) ||
3327 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3328 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3329 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3330 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3331 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3332 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3333 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3334 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3335 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3336 		)) {
3337 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3338 		count = 0;
3339 		GROW;
3340                 if (ctxt->instate == XML_PARSER_EOF)
3341                     return(NULL);
3342 	    }
3343 	    len += l;
3344 	    NEXTL(l);
3345 	    c = CUR_CHAR(l);
3346 	}
3347     } else {
3348 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349 	    (!IS_LETTER(c) && (c != '_') &&
3350 	     (c != ':'))) {
3351 	    return(NULL);
3352 	}
3353 	len += l;
3354 	NEXTL(l);
3355 	c = CUR_CHAR(l);
3356 
3357 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3358 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3359 		(c == '.') || (c == '-') ||
3360 		(c == '_') || (c == ':') ||
3361 		(IS_COMBINING(c)) ||
3362 		(IS_EXTENDER(c)))) {
3363 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3364 		count = 0;
3365 		GROW;
3366                 if (ctxt->instate == XML_PARSER_EOF)
3367                     return(NULL);
3368 	    }
3369 	    len += l;
3370 	    NEXTL(l);
3371 	    c = CUR_CHAR(l);
3372 	    if (c == 0) {
3373 		count = 0;
3374 		GROW;
3375                 if (ctxt->instate == XML_PARSER_EOF)
3376                     return(NULL);
3377 		c = CUR_CHAR(l);
3378 	    }
3379 	}
3380     }
3381     if ((len > XML_MAX_NAME_LENGTH) &&
3382         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3383         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384         return(NULL);
3385     }
3386     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3387         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3388     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3389 }
3390 
3391 /**
3392  * xmlParseName:
3393  * @ctxt:  an XML parser context
3394  *
3395  * parse an XML name.
3396  *
3397  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3398  *                  CombiningChar | Extender
3399  *
3400  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3401  *
3402  * [6] Names ::= Name (#x20 Name)*
3403  *
3404  * Returns the Name parsed or NULL
3405  */
3406 
3407 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3408 xmlParseName(xmlParserCtxtPtr ctxt) {
3409     const xmlChar *in;
3410     const xmlChar *ret;
3411     int count = 0;
3412 
3413     GROW;
3414 
3415 #ifdef DEBUG
3416     nbParseName++;
3417 #endif
3418 
3419     /*
3420      * Accelerator for simple ASCII names
3421      */
3422     in = ctxt->input->cur;
3423     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3424 	((*in >= 0x41) && (*in <= 0x5A)) ||
3425 	(*in == '_') || (*in == ':')) {
3426 	in++;
3427 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3428 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3429 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3430 	       (*in == '_') || (*in == '-') ||
3431 	       (*in == ':') || (*in == '.'))
3432 	    in++;
3433 	if ((*in > 0) && (*in < 0x80)) {
3434 	    count = in - ctxt->input->cur;
3435             if ((count > XML_MAX_NAME_LENGTH) &&
3436                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3437                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3438                 return(NULL);
3439             }
3440 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3441 	    ctxt->input->cur = in;
3442 	    ctxt->nbChars += count;
3443 	    ctxt->input->col += count;
3444 	    if (ret == NULL)
3445 	        xmlErrMemory(ctxt, NULL);
3446 	    return(ret);
3447 	}
3448     }
3449     /* accelerator for special cases */
3450     return(xmlParseNameComplex(ctxt));
3451 }
3452 
3453 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3454 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3455     int len = 0, l;
3456     int c;
3457     int count = 0;
3458     const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
3459 
3460 #ifdef DEBUG
3461     nbParseNCNameComplex++;
3462 #endif
3463 
3464     /*
3465      * Handler for more complex cases
3466      */
3467     GROW;
3468     end = ctxt->input->cur;
3469     c = CUR_CHAR(l);
3470     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3471 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3472 	return(NULL);
3473     }
3474 
3475     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3476 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3477 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3478             if ((len > XML_MAX_NAME_LENGTH) &&
3479                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3480                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3481                 return(NULL);
3482             }
3483 	    count = 0;
3484 	    GROW;
3485             if (ctxt->instate == XML_PARSER_EOF)
3486                 return(NULL);
3487 	}
3488 	len += l;
3489 	NEXTL(l);
3490 	end = ctxt->input->cur;
3491 	c = CUR_CHAR(l);
3492 	if (c == 0) {
3493 	    count = 0;
3494 	    GROW;
3495             if (ctxt->instate == XML_PARSER_EOF)
3496                 return(NULL);
3497 	    end = ctxt->input->cur;
3498 	    c = CUR_CHAR(l);
3499 	}
3500     }
3501     if ((len > XML_MAX_NAME_LENGTH) &&
3502         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3503         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3504         return(NULL);
3505     }
3506     return(xmlDictLookup(ctxt->dict, end - len, len));
3507 }
3508 
3509 /**
3510  * xmlParseNCName:
3511  * @ctxt:  an XML parser context
3512  * @len:  length of the string parsed
3513  *
3514  * parse an XML name.
3515  *
3516  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3517  *                      CombiningChar | Extender
3518  *
3519  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3520  *
3521  * Returns the Name parsed or NULL
3522  */
3523 
3524 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3525 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3526     const xmlChar *in;
3527     const xmlChar *ret;
3528     int count = 0;
3529 
3530 #ifdef DEBUG
3531     nbParseNCName++;
3532 #endif
3533 
3534     /*
3535      * Accelerator for simple ASCII names
3536      */
3537     in = ctxt->input->cur;
3538     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3539 	((*in >= 0x41) && (*in <= 0x5A)) ||
3540 	(*in == '_')) {
3541 	in++;
3542 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3543 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3544 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3545 	       (*in == '_') || (*in == '-') ||
3546 	       (*in == '.'))
3547 	    in++;
3548 	if ((*in > 0) && (*in < 0x80)) {
3549 	    count = in - ctxt->input->cur;
3550             if ((count > XML_MAX_NAME_LENGTH) &&
3551                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553                 return(NULL);
3554             }
3555 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3556 	    ctxt->input->cur = in;
3557 	    ctxt->nbChars += count;
3558 	    ctxt->input->col += count;
3559 	    if (ret == NULL) {
3560 	        xmlErrMemory(ctxt, NULL);
3561 	    }
3562 	    return(ret);
3563 	}
3564     }
3565     return(xmlParseNCNameComplex(ctxt));
3566 }
3567 
3568 /**
3569  * xmlParseNameAndCompare:
3570  * @ctxt:  an XML parser context
3571  *
3572  * parse an XML name and compares for match
3573  * (specialized for endtag parsing)
3574  *
3575  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3576  * and the name for mismatch
3577  */
3578 
3579 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3580 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3581     register const xmlChar *cmp = other;
3582     register const xmlChar *in;
3583     const xmlChar *ret;
3584 
3585     GROW;
3586     if (ctxt->instate == XML_PARSER_EOF)
3587         return(NULL);
3588 
3589     in = ctxt->input->cur;
3590     while (*in != 0 && *in == *cmp) {
3591 	++in;
3592 	++cmp;
3593 	ctxt->input->col++;
3594     }
3595     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3596 	/* success */
3597 	ctxt->input->cur = in;
3598 	return (const xmlChar*) 1;
3599     }
3600     /* failure (or end of input buffer), check with full function */
3601     ret = xmlParseName (ctxt);
3602     /* strings coming from the dictionnary direct compare possible */
3603     if (ret == other) {
3604 	return (const xmlChar*) 1;
3605     }
3606     return ret;
3607 }
3608 
3609 /**
3610  * xmlParseStringName:
3611  * @ctxt:  an XML parser context
3612  * @str:  a pointer to the string pointer (IN/OUT)
3613  *
3614  * parse an XML name.
3615  *
3616  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3617  *                  CombiningChar | Extender
3618  *
3619  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3620  *
3621  * [6] Names ::= Name (#x20 Name)*
3622  *
3623  * Returns the Name parsed or NULL. The @str pointer
3624  * is updated to the current location in the string.
3625  */
3626 
3627 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3628 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3629     xmlChar buf[XML_MAX_NAMELEN + 5];
3630     const xmlChar *cur = *str;
3631     int len = 0, l;
3632     int c;
3633 
3634 #ifdef DEBUG
3635     nbParseStringName++;
3636 #endif
3637 
3638     c = CUR_SCHAR(cur, l);
3639     if (!xmlIsNameStartChar(ctxt, c)) {
3640 	return(NULL);
3641     }
3642 
3643     COPY_BUF(l,buf,len,c);
3644     cur += l;
3645     c = CUR_SCHAR(cur, l);
3646     while (xmlIsNameChar(ctxt, c)) {
3647 	COPY_BUF(l,buf,len,c);
3648 	cur += l;
3649 	c = CUR_SCHAR(cur, l);
3650 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3651 	    /*
3652 	     * Okay someone managed to make a huge name, so he's ready to pay
3653 	     * for the processing speed.
3654 	     */
3655 	    xmlChar *buffer;
3656 	    int max = len * 2;
3657 
3658 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3659 	    if (buffer == NULL) {
3660 	        xmlErrMemory(ctxt, NULL);
3661 		return(NULL);
3662 	    }
3663 	    memcpy(buffer, buf, len);
3664 	    while (xmlIsNameChar(ctxt, c)) {
3665 		if (len + 10 > max) {
3666 		    xmlChar *tmp;
3667 
3668                     if ((len > XML_MAX_NAME_LENGTH) &&
3669                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3670                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3671 			xmlFree(buffer);
3672                         return(NULL);
3673                     }
3674 		    max *= 2;
3675 		    tmp = (xmlChar *) xmlRealloc(buffer,
3676 			                            max * sizeof(xmlChar));
3677 		    if (tmp == NULL) {
3678 			xmlErrMemory(ctxt, NULL);
3679 			xmlFree(buffer);
3680 			return(NULL);
3681 		    }
3682 		    buffer = tmp;
3683 		}
3684 		COPY_BUF(l,buffer,len,c);
3685 		cur += l;
3686 		c = CUR_SCHAR(cur, l);
3687 	    }
3688 	    buffer[len] = 0;
3689 	    *str = cur;
3690 	    return(buffer);
3691 	}
3692     }
3693     if ((len > XML_MAX_NAME_LENGTH) &&
3694         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3695         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3696         return(NULL);
3697     }
3698     *str = cur;
3699     return(xmlStrndup(buf, len));
3700 }
3701 
3702 /**
3703  * xmlParseNmtoken:
3704  * @ctxt:  an XML parser context
3705  *
3706  * parse an XML Nmtoken.
3707  *
3708  * [7] Nmtoken ::= (NameChar)+
3709  *
3710  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3711  *
3712  * Returns the Nmtoken parsed or NULL
3713  */
3714 
3715 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3716 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3717     xmlChar buf[XML_MAX_NAMELEN + 5];
3718     int len = 0, l;
3719     int c;
3720     int count = 0;
3721 
3722 #ifdef DEBUG
3723     nbParseNmToken++;
3724 #endif
3725 
3726     GROW;
3727     if (ctxt->instate == XML_PARSER_EOF)
3728         return(NULL);
3729     c = CUR_CHAR(l);
3730 
3731     while (xmlIsNameChar(ctxt, c)) {
3732 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3733 	    count = 0;
3734 	    GROW;
3735 	}
3736 	COPY_BUF(l,buf,len,c);
3737 	NEXTL(l);
3738 	c = CUR_CHAR(l);
3739 	if (c == 0) {
3740 	    count = 0;
3741 	    GROW;
3742 	    if (ctxt->instate == XML_PARSER_EOF)
3743 		return(NULL);
3744             c = CUR_CHAR(l);
3745 	}
3746 	if (len >= XML_MAX_NAMELEN) {
3747 	    /*
3748 	     * Okay someone managed to make a huge token, so he's ready to pay
3749 	     * for the processing speed.
3750 	     */
3751 	    xmlChar *buffer;
3752 	    int max = len * 2;
3753 
3754 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3755 	    if (buffer == NULL) {
3756 	        xmlErrMemory(ctxt, NULL);
3757 		return(NULL);
3758 	    }
3759 	    memcpy(buffer, buf, len);
3760 	    while (xmlIsNameChar(ctxt, c)) {
3761 		if (count++ > XML_PARSER_CHUNK_SIZE) {
3762 		    count = 0;
3763 		    GROW;
3764                     if (ctxt->instate == XML_PARSER_EOF) {
3765                         xmlFree(buffer);
3766                         return(NULL);
3767                     }
3768 		}
3769 		if (len + 10 > max) {
3770 		    xmlChar *tmp;
3771 
3772                     if ((max > XML_MAX_NAME_LENGTH) &&
3773                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3774                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3775                         xmlFree(buffer);
3776                         return(NULL);
3777                     }
3778 		    max *= 2;
3779 		    tmp = (xmlChar *) xmlRealloc(buffer,
3780 			                            max * sizeof(xmlChar));
3781 		    if (tmp == NULL) {
3782 			xmlErrMemory(ctxt, NULL);
3783 			xmlFree(buffer);
3784 			return(NULL);
3785 		    }
3786 		    buffer = tmp;
3787 		}
3788 		COPY_BUF(l,buffer,len,c);
3789 		NEXTL(l);
3790 		c = CUR_CHAR(l);
3791 	    }
3792 	    buffer[len] = 0;
3793 	    return(buffer);
3794 	}
3795     }
3796     if (len == 0)
3797         return(NULL);
3798     if ((len > XML_MAX_NAME_LENGTH) &&
3799         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3800         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3801         return(NULL);
3802     }
3803     return(xmlStrndup(buf, len));
3804 }
3805 
3806 /**
3807  * xmlParseEntityValue:
3808  * @ctxt:  an XML parser context
3809  * @orig:  if non-NULL store a copy of the original entity value
3810  *
3811  * parse a value for ENTITY declarations
3812  *
3813  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3814  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3815  *
3816  * Returns the EntityValue parsed with reference substituted or NULL
3817  */
3818 
3819 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3820 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3821     xmlChar *buf = NULL;
3822     int len = 0;
3823     int size = XML_PARSER_BUFFER_SIZE;
3824     int c, l;
3825     xmlChar stop;
3826     xmlChar *ret = NULL;
3827     const xmlChar *cur = NULL;
3828     xmlParserInputPtr input;
3829 
3830     if (RAW == '"') stop = '"';
3831     else if (RAW == '\'') stop = '\'';
3832     else {
3833 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3834 	return(NULL);
3835     }
3836     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3837     if (buf == NULL) {
3838 	xmlErrMemory(ctxt, NULL);
3839 	return(NULL);
3840     }
3841 
3842     /*
3843      * The content of the entity definition is copied in a buffer.
3844      */
3845 
3846     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3847     input = ctxt->input;
3848     GROW;
3849     if (ctxt->instate == XML_PARSER_EOF) {
3850         xmlFree(buf);
3851         return(NULL);
3852     }
3853     NEXT;
3854     c = CUR_CHAR(l);
3855     /*
3856      * NOTE: 4.4.5 Included in Literal
3857      * When a parameter entity reference appears in a literal entity
3858      * value, ... a single or double quote character in the replacement
3859      * text is always treated as a normal data character and will not
3860      * terminate the literal.
3861      * In practice it means we stop the loop only when back at parsing
3862      * the initial entity and the quote is found
3863      */
3864     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3865 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3866 	if (len + 5 >= size) {
3867 	    xmlChar *tmp;
3868 
3869 	    size *= 2;
3870 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3871 	    if (tmp == NULL) {
3872 		xmlErrMemory(ctxt, NULL);
3873 		xmlFree(buf);
3874 		return(NULL);
3875 	    }
3876 	    buf = tmp;
3877 	}
3878 	COPY_BUF(l,buf,len,c);
3879 	NEXTL(l);
3880 	/*
3881 	 * Pop-up of finished entities.
3882 	 */
3883 	while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3884 	    xmlPopInput(ctxt);
3885 
3886 	GROW;
3887 	c = CUR_CHAR(l);
3888 	if (c == 0) {
3889 	    GROW;
3890 	    c = CUR_CHAR(l);
3891 	}
3892     }
3893     buf[len] = 0;
3894     if (ctxt->instate == XML_PARSER_EOF) {
3895         xmlFree(buf);
3896         return(NULL);
3897     }
3898 
3899     /*
3900      * Raise problem w.r.t. '&' and '%' being used in non-entities
3901      * reference constructs. Note Charref will be handled in
3902      * xmlStringDecodeEntities()
3903      */
3904     cur = buf;
3905     while (*cur != 0) { /* non input consuming */
3906 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3907 	    xmlChar *name;
3908 	    xmlChar tmp = *cur;
3909 
3910 	    cur++;
3911 	    name = xmlParseStringName(ctxt, &cur);
3912             if ((name == NULL) || (*cur != ';')) {
3913 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3914 	    "EntityValue: '%c' forbidden except for entities references\n",
3915 	                          tmp);
3916 	    }
3917 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3918 		(ctxt->inputNr == 1)) {
3919 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3920 	    }
3921 	    if (name != NULL)
3922 		xmlFree(name);
3923 	    if (*cur == 0)
3924 	        break;
3925 	}
3926 	cur++;
3927     }
3928 
3929     /*
3930      * Then PEReference entities are substituted.
3931      */
3932     if (c != stop) {
3933 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3934 	xmlFree(buf);
3935     } else {
3936 	NEXT;
3937 	/*
3938 	 * NOTE: 4.4.7 Bypassed
3939 	 * When a general entity reference appears in the EntityValue in
3940 	 * an entity declaration, it is bypassed and left as is.
3941 	 * so XML_SUBSTITUTE_REF is not set here.
3942 	 */
3943 	ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3944 				      0, 0, 0);
3945 	if (orig != NULL)
3946 	    *orig = buf;
3947 	else
3948 	    xmlFree(buf);
3949     }
3950 
3951     return(ret);
3952 }
3953 
3954 /**
3955  * xmlParseAttValueComplex:
3956  * @ctxt:  an XML parser context
3957  * @len:   the resulting attribute len
3958  * @normalize:  wether to apply the inner normalization
3959  *
3960  * parse a value for an attribute, this is the fallback function
3961  * of xmlParseAttValue() when the attribute parsing requires handling
3962  * of non-ASCII characters, or normalization compaction.
3963  *
3964  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3965  */
3966 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3967 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3968     xmlChar limit = 0;
3969     xmlChar *buf = NULL;
3970     xmlChar *rep = NULL;
3971     size_t len = 0;
3972     size_t buf_size = 0;
3973     int c, l, in_space = 0;
3974     xmlChar *current = NULL;
3975     xmlEntityPtr ent;
3976 
3977     if (NXT(0) == '"') {
3978 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3979 	limit = '"';
3980         NEXT;
3981     } else if (NXT(0) == '\'') {
3982 	limit = '\'';
3983 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3984         NEXT;
3985     } else {
3986 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3987 	return(NULL);
3988     }
3989 
3990     /*
3991      * allocate a translation buffer.
3992      */
3993     buf_size = XML_PARSER_BUFFER_SIZE;
3994     buf = (xmlChar *) xmlMallocAtomic(buf_size);
3995     if (buf == NULL) goto mem_error;
3996 
3997     /*
3998      * OK loop until we reach one of the ending char or a size limit.
3999      */
4000     c = CUR_CHAR(l);
4001     while (((NXT(0) != limit) && /* checked */
4002             (IS_CHAR(c)) && (c != '<')) &&
4003             (ctxt->instate != XML_PARSER_EOF)) {
4004         /*
4005          * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4006          * special option is given
4007          */
4008         if ((len > XML_MAX_TEXT_LENGTH) &&
4009             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4010             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4011                            "AttValue length too long\n");
4012             goto mem_error;
4013         }
4014 	if (c == 0) break;
4015 	if (c == '&') {
4016 	    in_space = 0;
4017 	    if (NXT(1) == '#') {
4018 		int val = xmlParseCharRef(ctxt);
4019 
4020 		if (val == '&') {
4021 		    if (ctxt->replaceEntities) {
4022 			if (len + 10 > buf_size) {
4023 			    growBuffer(buf, 10);
4024 			}
4025 			buf[len++] = '&';
4026 		    } else {
4027 			/*
4028 			 * The reparsing will be done in xmlStringGetNodeList()
4029 			 * called by the attribute() function in SAX.c
4030 			 */
4031 			if (len + 10 > buf_size) {
4032 			    growBuffer(buf, 10);
4033 			}
4034 			buf[len++] = '&';
4035 			buf[len++] = '#';
4036 			buf[len++] = '3';
4037 			buf[len++] = '8';
4038 			buf[len++] = ';';
4039 		    }
4040 		} else if (val != 0) {
4041 		    if (len + 10 > buf_size) {
4042 			growBuffer(buf, 10);
4043 		    }
4044 		    len += xmlCopyChar(0, &buf[len], val);
4045 		}
4046 	    } else {
4047 		ent = xmlParseEntityRef(ctxt);
4048 		ctxt->nbentities++;
4049 		if (ent != NULL)
4050 		    ctxt->nbentities += ent->owner;
4051 		if ((ent != NULL) &&
4052 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4053 		    if (len + 10 > buf_size) {
4054 			growBuffer(buf, 10);
4055 		    }
4056 		    if ((ctxt->replaceEntities == 0) &&
4057 		        (ent->content[0] == '&')) {
4058 			buf[len++] = '&';
4059 			buf[len++] = '#';
4060 			buf[len++] = '3';
4061 			buf[len++] = '8';
4062 			buf[len++] = ';';
4063 		    } else {
4064 			buf[len++] = ent->content[0];
4065 		    }
4066 		} else if ((ent != NULL) &&
4067 		           (ctxt->replaceEntities != 0)) {
4068 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4069 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4070 						      XML_SUBSTITUTE_REF,
4071 						      0, 0, 0);
4072 			if (rep != NULL) {
4073 			    current = rep;
4074 			    while (*current != 0) { /* non input consuming */
4075                                 if ((*current == 0xD) || (*current == 0xA) ||
4076                                     (*current == 0x9)) {
4077                                     buf[len++] = 0x20;
4078                                     current++;
4079                                 } else
4080                                     buf[len++] = *current++;
4081 				if (len + 10 > buf_size) {
4082 				    growBuffer(buf, 10);
4083 				}
4084 			    }
4085 			    xmlFree(rep);
4086 			    rep = NULL;
4087 			}
4088 		    } else {
4089 			if (len + 10 > buf_size) {
4090 			    growBuffer(buf, 10);
4091 			}
4092 			if (ent->content != NULL)
4093 			    buf[len++] = ent->content[0];
4094 		    }
4095 		} else if (ent != NULL) {
4096 		    int i = xmlStrlen(ent->name);
4097 		    const xmlChar *cur = ent->name;
4098 
4099 		    /*
4100 		     * This may look absurd but is needed to detect
4101 		     * entities problems
4102 		     */
4103 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4104 			(ent->content != NULL) && (ent->checked == 0)) {
4105 			unsigned long oldnbent = ctxt->nbentities;
4106 
4107 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4108 						  XML_SUBSTITUTE_REF, 0, 0, 0);
4109 
4110 			ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4111 			if (rep != NULL) {
4112 			    if (xmlStrchr(rep, '<'))
4113 			        ent->checked |= 1;
4114 			    xmlFree(rep);
4115 			    rep = NULL;
4116 			}
4117 		    }
4118 
4119 		    /*
4120 		     * Just output the reference
4121 		     */
4122 		    buf[len++] = '&';
4123 		    while (len + i + 10 > buf_size) {
4124 			growBuffer(buf, i + 10);
4125 		    }
4126 		    for (;i > 0;i--)
4127 			buf[len++] = *cur++;
4128 		    buf[len++] = ';';
4129 		}
4130 	    }
4131 	} else {
4132 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133 	        if ((len != 0) || (!normalize)) {
4134 		    if ((!normalize) || (!in_space)) {
4135 			COPY_BUF(l,buf,len,0x20);
4136 			while (len + 10 > buf_size) {
4137 			    growBuffer(buf, 10);
4138 			}
4139 		    }
4140 		    in_space = 1;
4141 		}
4142 	    } else {
4143 	        in_space = 0;
4144 		COPY_BUF(l,buf,len,c);
4145 		if (len + 10 > buf_size) {
4146 		    growBuffer(buf, 10);
4147 		}
4148 	    }
4149 	    NEXTL(l);
4150 	}
4151 	GROW;
4152 	c = CUR_CHAR(l);
4153     }
4154     if (ctxt->instate == XML_PARSER_EOF)
4155         goto error;
4156 
4157     if ((in_space) && (normalize)) {
4158         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4159     }
4160     buf[len] = 0;
4161     if (RAW == '<') {
4162 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4163     } else if (RAW != limit) {
4164 	if ((c != 0) && (!IS_CHAR(c))) {
4165 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4166 			   "invalid character in attribute value\n");
4167 	} else {
4168 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4169 			   "AttValue: ' expected\n");
4170         }
4171     } else
4172 	NEXT;
4173 
4174     /*
4175      * There we potentially risk an overflow, don't allow attribute value of
4176      * length more than INT_MAX it is a very reasonnable assumption !
4177      */
4178     if (len >= INT_MAX) {
4179         xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4180                        "AttValue length too long\n");
4181         goto mem_error;
4182     }
4183 
4184     if (attlen != NULL) *attlen = (int) len;
4185     return(buf);
4186 
4187 mem_error:
4188     xmlErrMemory(ctxt, NULL);
4189 error:
4190     if (buf != NULL)
4191         xmlFree(buf);
4192     if (rep != NULL)
4193         xmlFree(rep);
4194     return(NULL);
4195 }
4196 
4197 /**
4198  * xmlParseAttValue:
4199  * @ctxt:  an XML parser context
4200  *
4201  * parse a value for an attribute
4202  * Note: the parser won't do substitution of entities here, this
4203  * will be handled later in xmlStringGetNodeList
4204  *
4205  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4206  *                   "'" ([^<&'] | Reference)* "'"
4207  *
4208  * 3.3.3 Attribute-Value Normalization:
4209  * Before the value of an attribute is passed to the application or
4210  * checked for validity, the XML processor must normalize it as follows:
4211  * - a character reference is processed by appending the referenced
4212  *   character to the attribute value
4213  * - an entity reference is processed by recursively processing the
4214  *   replacement text of the entity
4215  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4216  *   appending #x20 to the normalized value, except that only a single
4217  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4218  *   parsed entity or the literal entity value of an internal parsed entity
4219  * - other characters are processed by appending them to the normalized value
4220  * If the declared value is not CDATA, then the XML processor must further
4221  * process the normalized attribute value by discarding any leading and
4222  * trailing space (#x20) characters, and by replacing sequences of space
4223  * (#x20) characters by a single space (#x20) character.
4224  * All attributes for which no declaration has been read should be treated
4225  * by a non-validating parser as if declared CDATA.
4226  *
4227  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4228  */
4229 
4230 
4231 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4232 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4233     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4234     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4235 }
4236 
4237 /**
4238  * xmlParseSystemLiteral:
4239  * @ctxt:  an XML parser context
4240  *
4241  * parse an XML Literal
4242  *
4243  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4244  *
4245  * Returns the SystemLiteral parsed or NULL
4246  */
4247 
4248 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4249 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4250     xmlChar *buf = NULL;
4251     int len = 0;
4252     int size = XML_PARSER_BUFFER_SIZE;
4253     int cur, l;
4254     xmlChar stop;
4255     int state = ctxt->instate;
4256     int count = 0;
4257 
4258     SHRINK;
4259     if (RAW == '"') {
4260         NEXT;
4261 	stop = '"';
4262     } else if (RAW == '\'') {
4263         NEXT;
4264 	stop = '\'';
4265     } else {
4266 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4267 	return(NULL);
4268     }
4269 
4270     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4271     if (buf == NULL) {
4272         xmlErrMemory(ctxt, NULL);
4273 	return(NULL);
4274     }
4275     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4276     cur = CUR_CHAR(l);
4277     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4278 	if (len + 5 >= size) {
4279 	    xmlChar *tmp;
4280 
4281             if ((size > XML_MAX_NAME_LENGTH) &&
4282                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4283                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4284                 xmlFree(buf);
4285 		ctxt->instate = (xmlParserInputState) state;
4286                 return(NULL);
4287             }
4288 	    size *= 2;
4289 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4290 	    if (tmp == NULL) {
4291 	        xmlFree(buf);
4292 		xmlErrMemory(ctxt, NULL);
4293 		ctxt->instate = (xmlParserInputState) state;
4294 		return(NULL);
4295 	    }
4296 	    buf = tmp;
4297 	}
4298 	count++;
4299 	if (count > 50) {
4300 	    GROW;
4301 	    count = 0;
4302             if (ctxt->instate == XML_PARSER_EOF) {
4303 	        xmlFree(buf);
4304 		return(NULL);
4305             }
4306 	}
4307 	COPY_BUF(l,buf,len,cur);
4308 	NEXTL(l);
4309 	cur = CUR_CHAR(l);
4310 	if (cur == 0) {
4311 	    GROW;
4312 	    SHRINK;
4313 	    cur = CUR_CHAR(l);
4314 	}
4315     }
4316     buf[len] = 0;
4317     ctxt->instate = (xmlParserInputState) state;
4318     if (!IS_CHAR(cur)) {
4319 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4320     } else {
4321 	NEXT;
4322     }
4323     return(buf);
4324 }
4325 
4326 /**
4327  * xmlParsePubidLiteral:
4328  * @ctxt:  an XML parser context
4329  *
4330  * parse an XML public literal
4331  *
4332  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4333  *
4334  * Returns the PubidLiteral parsed or NULL.
4335  */
4336 
4337 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4338 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4339     xmlChar *buf = NULL;
4340     int len = 0;
4341     int size = XML_PARSER_BUFFER_SIZE;
4342     xmlChar cur;
4343     xmlChar stop;
4344     int count = 0;
4345     xmlParserInputState oldstate = ctxt->instate;
4346 
4347     SHRINK;
4348     if (RAW == '"') {
4349         NEXT;
4350 	stop = '"';
4351     } else if (RAW == '\'') {
4352         NEXT;
4353 	stop = '\'';
4354     } else {
4355 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4356 	return(NULL);
4357     }
4358     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4359     if (buf == NULL) {
4360 	xmlErrMemory(ctxt, NULL);
4361 	return(NULL);
4362     }
4363     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4364     cur = CUR;
4365     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4366 	if (len + 1 >= size) {
4367 	    xmlChar *tmp;
4368 
4369             if ((size > XML_MAX_NAME_LENGTH) &&
4370                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4371                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4372                 xmlFree(buf);
4373                 return(NULL);
4374             }
4375 	    size *= 2;
4376 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4377 	    if (tmp == NULL) {
4378 		xmlErrMemory(ctxt, NULL);
4379 		xmlFree(buf);
4380 		return(NULL);
4381 	    }
4382 	    buf = tmp;
4383 	}
4384 	buf[len++] = cur;
4385 	count++;
4386 	if (count > 50) {
4387 	    GROW;
4388 	    count = 0;
4389             if (ctxt->instate == XML_PARSER_EOF) {
4390 		xmlFree(buf);
4391 		return(NULL);
4392             }
4393 	}
4394 	NEXT;
4395 	cur = CUR;
4396 	if (cur == 0) {
4397 	    GROW;
4398 	    SHRINK;
4399 	    cur = CUR;
4400 	}
4401     }
4402     buf[len] = 0;
4403     if (cur != stop) {
4404 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4405     } else {
4406 	NEXT;
4407     }
4408     ctxt->instate = oldstate;
4409     return(buf);
4410 }
4411 
4412 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4413 
4414 /*
4415  * used for the test in the inner loop of the char data testing
4416  */
4417 static const unsigned char test_char_data[256] = {
4418     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4420     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4422     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4423     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4424     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4425     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4426     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4427     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4428     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4429     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4430     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4431     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4432     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4433     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4434     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4435     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4436     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4437     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4438     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4439     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4440     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4441     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4442     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4450 };
4451 
4452 /**
4453  * xmlParseCharData:
4454  * @ctxt:  an XML parser context
4455  * @cdata:  int indicating whether we are within a CDATA section
4456  *
4457  * parse a CharData section.
4458  * if we are within a CDATA section ']]>' marks an end of section.
4459  *
4460  * The right angle bracket (>) may be represented using the string "&gt;",
4461  * and must, for compatibility, be escaped using "&gt;" or a character
4462  * reference when it appears in the string "]]>" in content, when that
4463  * string is not marking the end of a CDATA section.
4464  *
4465  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4466  */
4467 
4468 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4469 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4470     const xmlChar *in;
4471     int nbchar = 0;
4472     int line = ctxt->input->line;
4473     int col = ctxt->input->col;
4474     int ccol;
4475 
4476     SHRINK;
4477     GROW;
4478     /*
4479      * Accelerated common case where input don't need to be
4480      * modified before passing it to the handler.
4481      */
4482     if (!cdata) {
4483 	in = ctxt->input->cur;
4484 	do {
4485 get_more_space:
4486 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4487 	    if (*in == 0xA) {
4488 		do {
4489 		    ctxt->input->line++; ctxt->input->col = 1;
4490 		    in++;
4491 		} while (*in == 0xA);
4492 		goto get_more_space;
4493 	    }
4494 	    if (*in == '<') {
4495 		nbchar = in - ctxt->input->cur;
4496 		if (nbchar > 0) {
4497 		    const xmlChar *tmp = ctxt->input->cur;
4498 		    ctxt->input->cur = in;
4499 
4500 		    if ((ctxt->sax != NULL) &&
4501 		        (ctxt->sax->ignorableWhitespace !=
4502 		         ctxt->sax->characters)) {
4503 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4504 			    if (ctxt->sax->ignorableWhitespace != NULL)
4505 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4506 						       tmp, nbchar);
4507 			} else {
4508 			    if (ctxt->sax->characters != NULL)
4509 				ctxt->sax->characters(ctxt->userData,
4510 						      tmp, nbchar);
4511 			    if (*ctxt->space == -1)
4512 			        *ctxt->space = -2;
4513 			}
4514 		    } else if ((ctxt->sax != NULL) &&
4515 		               (ctxt->sax->characters != NULL)) {
4516 			ctxt->sax->characters(ctxt->userData,
4517 					      tmp, nbchar);
4518 		    }
4519 		}
4520 		return;
4521 	    }
4522 
4523 get_more:
4524             ccol = ctxt->input->col;
4525 	    while (test_char_data[*in]) {
4526 		in++;
4527 		ccol++;
4528 	    }
4529 	    ctxt->input->col = ccol;
4530 	    if (*in == 0xA) {
4531 		do {
4532 		    ctxt->input->line++; ctxt->input->col = 1;
4533 		    in++;
4534 		} while (*in == 0xA);
4535 		goto get_more;
4536 	    }
4537 	    if (*in == ']') {
4538 		if ((in[1] == ']') && (in[2] == '>')) {
4539 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4540 		    ctxt->input->cur = in;
4541 		    return;
4542 		}
4543 		in++;
4544 		ctxt->input->col++;
4545 		goto get_more;
4546 	    }
4547 	    nbchar = in - ctxt->input->cur;
4548 	    if (nbchar > 0) {
4549 		if ((ctxt->sax != NULL) &&
4550 		    (ctxt->sax->ignorableWhitespace !=
4551 		     ctxt->sax->characters) &&
4552 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4553 		    const xmlChar *tmp = ctxt->input->cur;
4554 		    ctxt->input->cur = in;
4555 
4556 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4557 		        if (ctxt->sax->ignorableWhitespace != NULL)
4558 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4559 							   tmp, nbchar);
4560 		    } else {
4561 		        if (ctxt->sax->characters != NULL)
4562 			    ctxt->sax->characters(ctxt->userData,
4563 						  tmp, nbchar);
4564 			if (*ctxt->space == -1)
4565 			    *ctxt->space = -2;
4566 		    }
4567                     line = ctxt->input->line;
4568                     col = ctxt->input->col;
4569 		} else if (ctxt->sax != NULL) {
4570 		    if (ctxt->sax->characters != NULL)
4571 			ctxt->sax->characters(ctxt->userData,
4572 					      ctxt->input->cur, nbchar);
4573                     line = ctxt->input->line;
4574                     col = ctxt->input->col;
4575 		}
4576                 /* something really bad happened in the SAX callback */
4577                 if (ctxt->instate != XML_PARSER_CONTENT)
4578                     return;
4579 	    }
4580 	    ctxt->input->cur = in;
4581 	    if (*in == 0xD) {
4582 		in++;
4583 		if (*in == 0xA) {
4584 		    ctxt->input->cur = in;
4585 		    in++;
4586 		    ctxt->input->line++; ctxt->input->col = 1;
4587 		    continue; /* while */
4588 		}
4589 		in--;
4590 	    }
4591 	    if (*in == '<') {
4592 		return;
4593 	    }
4594 	    if (*in == '&') {
4595 		return;
4596 	    }
4597 	    SHRINK;
4598 	    GROW;
4599             if (ctxt->instate == XML_PARSER_EOF)
4600 		return;
4601 	    in = ctxt->input->cur;
4602 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4603 	nbchar = 0;
4604     }
4605     ctxt->input->line = line;
4606     ctxt->input->col = col;
4607     xmlParseCharDataComplex(ctxt, cdata);
4608 }
4609 
4610 /**
4611  * xmlParseCharDataComplex:
4612  * @ctxt:  an XML parser context
4613  * @cdata:  int indicating whether we are within a CDATA section
4614  *
4615  * parse a CharData section.this is the fallback function
4616  * of xmlParseCharData() when the parsing requires handling
4617  * of non-ASCII characters.
4618  */
4619 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4620 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4621     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4622     int nbchar = 0;
4623     int cur, l;
4624     int count = 0;
4625 
4626     SHRINK;
4627     GROW;
4628     cur = CUR_CHAR(l);
4629     while ((cur != '<') && /* checked */
4630            (cur != '&') &&
4631 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4632 	if ((cur == ']') && (NXT(1) == ']') &&
4633 	    (NXT(2) == '>')) {
4634 	    if (cdata) break;
4635 	    else {
4636 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4637 	    }
4638 	}
4639 	COPY_BUF(l,buf,nbchar,cur);
4640 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4641 	    buf[nbchar] = 0;
4642 
4643 	    /*
4644 	     * OK the segment is to be consumed as chars.
4645 	     */
4646 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4647 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4648 		    if (ctxt->sax->ignorableWhitespace != NULL)
4649 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4650 			                               buf, nbchar);
4651 		} else {
4652 		    if (ctxt->sax->characters != NULL)
4653 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4654 		    if ((ctxt->sax->characters !=
4655 		         ctxt->sax->ignorableWhitespace) &&
4656 			(*ctxt->space == -1))
4657 			*ctxt->space = -2;
4658 		}
4659 	    }
4660 	    nbchar = 0;
4661             /* something really bad happened in the SAX callback */
4662             if (ctxt->instate != XML_PARSER_CONTENT)
4663                 return;
4664 	}
4665 	count++;
4666 	if (count > 50) {
4667 	    GROW;
4668 	    count = 0;
4669             if (ctxt->instate == XML_PARSER_EOF)
4670 		return;
4671 	}
4672 	NEXTL(l);
4673 	cur = CUR_CHAR(l);
4674     }
4675     if (nbchar != 0) {
4676         buf[nbchar] = 0;
4677 	/*
4678 	 * OK the segment is to be consumed as chars.
4679 	 */
4680 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4681 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4682 		if (ctxt->sax->ignorableWhitespace != NULL)
4683 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4684 	    } else {
4685 		if (ctxt->sax->characters != NULL)
4686 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4687 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4688 		    (*ctxt->space == -1))
4689 		    *ctxt->space = -2;
4690 	    }
4691 	}
4692     }
4693     if ((cur != 0) && (!IS_CHAR(cur))) {
4694 	/* Generate the error and skip the offending character */
4695         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4696                           "PCDATA invalid Char value %d\n",
4697 	                  cur);
4698 	NEXTL(l);
4699     }
4700 }
4701 
4702 /**
4703  * xmlParseExternalID:
4704  * @ctxt:  an XML parser context
4705  * @publicID:  a xmlChar** receiving PubidLiteral
4706  * @strict: indicate whether we should restrict parsing to only
4707  *          production [75], see NOTE below
4708  *
4709  * Parse an External ID or a Public ID
4710  *
4711  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4712  *       'PUBLIC' S PubidLiteral S SystemLiteral
4713  *
4714  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4715  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4716  *
4717  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4718  *
4719  * Returns the function returns SystemLiteral and in the second
4720  *                case publicID receives PubidLiteral, is strict is off
4721  *                it is possible to return NULL and have publicID set.
4722  */
4723 
4724 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4725 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4726     xmlChar *URI = NULL;
4727 
4728     SHRINK;
4729 
4730     *publicID = NULL;
4731     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4732         SKIP(6);
4733 	if (!IS_BLANK_CH(CUR)) {
4734 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4735 	                   "Space required after 'SYSTEM'\n");
4736 	}
4737         SKIP_BLANKS;
4738 	URI = xmlParseSystemLiteral(ctxt);
4739 	if (URI == NULL) {
4740 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4741         }
4742     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4743         SKIP(6);
4744 	if (!IS_BLANK_CH(CUR)) {
4745 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4746 		    "Space required after 'PUBLIC'\n");
4747 	}
4748         SKIP_BLANKS;
4749 	*publicID = xmlParsePubidLiteral(ctxt);
4750 	if (*publicID == NULL) {
4751 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4752 	}
4753 	if (strict) {
4754 	    /*
4755 	     * We don't handle [83] so "S SystemLiteral" is required.
4756 	     */
4757 	    if (!IS_BLANK_CH(CUR)) {
4758 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4759 			"Space required after the Public Identifier\n");
4760 	    }
4761 	} else {
4762 	    /*
4763 	     * We handle [83] so we return immediately, if
4764 	     * "S SystemLiteral" is not detected. From a purely parsing
4765 	     * point of view that's a nice mess.
4766 	     */
4767 	    const xmlChar *ptr;
4768 	    GROW;
4769 
4770 	    ptr = CUR_PTR;
4771 	    if (!IS_BLANK_CH(*ptr)) return(NULL);
4772 
4773 	    while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4774 	    if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4775 	}
4776         SKIP_BLANKS;
4777 	URI = xmlParseSystemLiteral(ctxt);
4778 	if (URI == NULL) {
4779 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780         }
4781     }
4782     return(URI);
4783 }
4784 
4785 /**
4786  * xmlParseCommentComplex:
4787  * @ctxt:  an XML parser context
4788  * @buf:  the already parsed part of the buffer
4789  * @len:  number of bytes filles in the buffer
4790  * @size:  allocated size of the buffer
4791  *
4792  * Skip an XML (SGML) comment <!-- .... -->
4793  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794  *  must not occur within comments. "
4795  * This is the slow routine in case the accelerator for ascii didn't work
4796  *
4797  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798  */
4799 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4800 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801                        size_t len, size_t size) {
4802     int q, ql;
4803     int r, rl;
4804     int cur, l;
4805     size_t count = 0;
4806     int inputid;
4807 
4808     inputid = ctxt->input->id;
4809 
4810     if (buf == NULL) {
4811         len = 0;
4812 	size = XML_PARSER_BUFFER_SIZE;
4813 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4814 	if (buf == NULL) {
4815 	    xmlErrMemory(ctxt, NULL);
4816 	    return;
4817 	}
4818     }
4819     GROW;	/* Assure there's enough input data */
4820     q = CUR_CHAR(ql);
4821     if (q == 0)
4822         goto not_terminated;
4823     if (!IS_CHAR(q)) {
4824         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4825                           "xmlParseComment: invalid xmlChar value %d\n",
4826 	                  q);
4827 	xmlFree (buf);
4828 	return;
4829     }
4830     NEXTL(ql);
4831     r = CUR_CHAR(rl);
4832     if (r == 0)
4833         goto not_terminated;
4834     if (!IS_CHAR(r)) {
4835         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4836                           "xmlParseComment: invalid xmlChar value %d\n",
4837 	                  q);
4838 	xmlFree (buf);
4839 	return;
4840     }
4841     NEXTL(rl);
4842     cur = CUR_CHAR(l);
4843     if (cur == 0)
4844         goto not_terminated;
4845     while (IS_CHAR(cur) && /* checked */
4846            ((cur != '>') ||
4847 	    (r != '-') || (q != '-'))) {
4848 	if ((r == '-') && (q == '-')) {
4849 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4850 	}
4851         if ((len > XML_MAX_TEXT_LENGTH) &&
4852             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4853             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4854                          "Comment too big found", NULL);
4855             xmlFree (buf);
4856             return;
4857         }
4858 	if (len + 5 >= size) {
4859 	    xmlChar *new_buf;
4860             size_t new_size;
4861 
4862 	    new_size = size * 2;
4863 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4864 	    if (new_buf == NULL) {
4865 		xmlFree (buf);
4866 		xmlErrMemory(ctxt, NULL);
4867 		return;
4868 	    }
4869 	    buf = new_buf;
4870             size = new_size;
4871 	}
4872 	COPY_BUF(ql,buf,len,q);
4873 	q = r;
4874 	ql = rl;
4875 	r = cur;
4876 	rl = l;
4877 
4878 	count++;
4879 	if (count > 50) {
4880 	    GROW;
4881 	    count = 0;
4882             if (ctxt->instate == XML_PARSER_EOF) {
4883 		xmlFree(buf);
4884 		return;
4885             }
4886 	}
4887 	NEXTL(l);
4888 	cur = CUR_CHAR(l);
4889 	if (cur == 0) {
4890 	    SHRINK;
4891 	    GROW;
4892 	    cur = CUR_CHAR(l);
4893 	}
4894     }
4895     buf[len] = 0;
4896     if (cur == 0) {
4897 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4898 	                     "Comment not terminated \n<!--%.50s\n", buf);
4899     } else if (!IS_CHAR(cur)) {
4900         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4901                           "xmlParseComment: invalid xmlChar value %d\n",
4902 	                  cur);
4903     } else {
4904 	if (inputid != ctxt->input->id) {
4905 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4906 		"Comment doesn't start and stop in the same entity\n");
4907 	}
4908         NEXT;
4909 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4910 	    (!ctxt->disableSAX))
4911 	    ctxt->sax->comment(ctxt->userData, buf);
4912     }
4913     xmlFree(buf);
4914     return;
4915 not_terminated:
4916     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4917 			 "Comment not terminated\n", NULL);
4918     xmlFree(buf);
4919     return;
4920 }
4921 
4922 /**
4923  * xmlParseComment:
4924  * @ctxt:  an XML parser context
4925  *
4926  * Skip an XML (SGML) comment <!-- .... -->
4927  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4928  *  must not occur within comments. "
4929  *
4930  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4931  */
4932 void
xmlParseComment(xmlParserCtxtPtr ctxt)4933 xmlParseComment(xmlParserCtxtPtr ctxt) {
4934     xmlChar *buf = NULL;
4935     size_t size = XML_PARSER_BUFFER_SIZE;
4936     size_t len = 0;
4937     xmlParserInputState state;
4938     const xmlChar *in;
4939     size_t nbchar = 0;
4940     int ccol;
4941     int inputid;
4942 
4943     /*
4944      * Check that there is a comment right here.
4945      */
4946     if ((RAW != '<') || (NXT(1) != '!') ||
4947         (NXT(2) != '-') || (NXT(3) != '-')) return;
4948     state = ctxt->instate;
4949     ctxt->instate = XML_PARSER_COMMENT;
4950     inputid = ctxt->input->id;
4951     SKIP(4);
4952     SHRINK;
4953     GROW;
4954 
4955     /*
4956      * Accelerated common case where input don't need to be
4957      * modified before passing it to the handler.
4958      */
4959     in = ctxt->input->cur;
4960     do {
4961 	if (*in == 0xA) {
4962 	    do {
4963 		ctxt->input->line++; ctxt->input->col = 1;
4964 		in++;
4965 	    } while (*in == 0xA);
4966 	}
4967 get_more:
4968         ccol = ctxt->input->col;
4969 	while (((*in > '-') && (*in <= 0x7F)) ||
4970 	       ((*in >= 0x20) && (*in < '-')) ||
4971 	       (*in == 0x09)) {
4972 		    in++;
4973 		    ccol++;
4974 	}
4975 	ctxt->input->col = ccol;
4976 	if (*in == 0xA) {
4977 	    do {
4978 		ctxt->input->line++; ctxt->input->col = 1;
4979 		in++;
4980 	    } while (*in == 0xA);
4981 	    goto get_more;
4982 	}
4983 	nbchar = in - ctxt->input->cur;
4984 	/*
4985 	 * save current set of data
4986 	 */
4987 	if (nbchar > 0) {
4988 	    if ((ctxt->sax != NULL) &&
4989 		(ctxt->sax->comment != NULL)) {
4990 		if (buf == NULL) {
4991 		    if ((*in == '-') && (in[1] == '-'))
4992 		        size = nbchar + 1;
4993 		    else
4994 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4995 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4996 		    if (buf == NULL) {
4997 		        xmlErrMemory(ctxt, NULL);
4998 			ctxt->instate = state;
4999 			return;
5000 		    }
5001 		    len = 0;
5002 		} else if (len + nbchar + 1 >= size) {
5003 		    xmlChar *new_buf;
5004 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5005 		    new_buf = (xmlChar *) xmlRealloc(buf,
5006 		                                     size * sizeof(xmlChar));
5007 		    if (new_buf == NULL) {
5008 		        xmlFree (buf);
5009 			xmlErrMemory(ctxt, NULL);
5010 			ctxt->instate = state;
5011 			return;
5012 		    }
5013 		    buf = new_buf;
5014 		}
5015 		memcpy(&buf[len], ctxt->input->cur, nbchar);
5016 		len += nbchar;
5017 		buf[len] = 0;
5018 	    }
5019 	}
5020         if ((len > XML_MAX_TEXT_LENGTH) &&
5021             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5022             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5023                          "Comment too big found", NULL);
5024             xmlFree (buf);
5025             return;
5026         }
5027 	ctxt->input->cur = in;
5028 	if (*in == 0xA) {
5029 	    in++;
5030 	    ctxt->input->line++; ctxt->input->col = 1;
5031 	}
5032 	if (*in == 0xD) {
5033 	    in++;
5034 	    if (*in == 0xA) {
5035 		ctxt->input->cur = in;
5036 		in++;
5037 		ctxt->input->line++; ctxt->input->col = 1;
5038 		continue; /* while */
5039 	    }
5040 	    in--;
5041 	}
5042 	SHRINK;
5043 	GROW;
5044         if (ctxt->instate == XML_PARSER_EOF) {
5045             xmlFree(buf);
5046             return;
5047         }
5048 	in = ctxt->input->cur;
5049 	if (*in == '-') {
5050 	    if (in[1] == '-') {
5051 	        if (in[2] == '>') {
5052 		    if (ctxt->input->id != inputid) {
5053 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5054 			"comment doesn't start and stop in the same entity\n");
5055 		    }
5056 		    SKIP(3);
5057 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5058 		        (!ctxt->disableSAX)) {
5059 			if (buf != NULL)
5060 			    ctxt->sax->comment(ctxt->userData, buf);
5061 			else
5062 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5063 		    }
5064 		    if (buf != NULL)
5065 		        xmlFree(buf);
5066 		    if (ctxt->instate != XML_PARSER_EOF)
5067 			ctxt->instate = state;
5068 		    return;
5069 		}
5070 		if (buf != NULL) {
5071 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5072 		                      "Double hyphen within comment: "
5073                                       "<!--%.50s\n",
5074 				      buf);
5075 		} else
5076 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5077 		                      "Double hyphen within comment\n", NULL);
5078 		in++;
5079 		ctxt->input->col++;
5080 	    }
5081 	    in++;
5082 	    ctxt->input->col++;
5083 	    goto get_more;
5084 	}
5085     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5086     xmlParseCommentComplex(ctxt, buf, len, size);
5087     ctxt->instate = state;
5088     return;
5089 }
5090 
5091 
5092 /**
5093  * xmlParsePITarget:
5094  * @ctxt:  an XML parser context
5095  *
5096  * parse the name of a PI
5097  *
5098  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5099  *
5100  * Returns the PITarget name or NULL
5101  */
5102 
5103 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5104 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5105     const xmlChar *name;
5106 
5107     name = xmlParseName(ctxt);
5108     if ((name != NULL) &&
5109         ((name[0] == 'x') || (name[0] == 'X')) &&
5110         ((name[1] == 'm') || (name[1] == 'M')) &&
5111         ((name[2] == 'l') || (name[2] == 'L'))) {
5112 	int i;
5113 	if ((name[0] == 'x') && (name[1] == 'm') &&
5114 	    (name[2] == 'l') && (name[3] == 0)) {
5115 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5116 		 "XML declaration allowed only at the start of the document\n");
5117 	    return(name);
5118 	} else if (name[3] == 0) {
5119 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5120 	    return(name);
5121 	}
5122 	for (i = 0;;i++) {
5123 	    if (xmlW3CPIs[i] == NULL) break;
5124 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5125 	        return(name);
5126 	}
5127 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5128 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5129 		      NULL, NULL);
5130     }
5131     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5132 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5133 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5134     }
5135     return(name);
5136 }
5137 
5138 #ifdef LIBXML_CATALOG_ENABLED
5139 /**
5140  * xmlParseCatalogPI:
5141  * @ctxt:  an XML parser context
5142  * @catalog:  the PI value string
5143  *
5144  * parse an XML Catalog Processing Instruction.
5145  *
5146  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5147  *
5148  * Occurs only if allowed by the user and if happening in the Misc
5149  * part of the document before any doctype informations
5150  * This will add the given catalog to the parsing context in order
5151  * to be used if there is a resolution need further down in the document
5152  */
5153 
5154 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5155 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5156     xmlChar *URL = NULL;
5157     const xmlChar *tmp, *base;
5158     xmlChar marker;
5159 
5160     tmp = catalog;
5161     while (IS_BLANK_CH(*tmp)) tmp++;
5162     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5163 	goto error;
5164     tmp += 7;
5165     while (IS_BLANK_CH(*tmp)) tmp++;
5166     if (*tmp != '=') {
5167 	return;
5168     }
5169     tmp++;
5170     while (IS_BLANK_CH(*tmp)) tmp++;
5171     marker = *tmp;
5172     if ((marker != '\'') && (marker != '"'))
5173 	goto error;
5174     tmp++;
5175     base = tmp;
5176     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5177     if (*tmp == 0)
5178 	goto error;
5179     URL = xmlStrndup(base, tmp - base);
5180     tmp++;
5181     while (IS_BLANK_CH(*tmp)) tmp++;
5182     if (*tmp != 0)
5183 	goto error;
5184 
5185     if (URL != NULL) {
5186 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5187 	xmlFree(URL);
5188     }
5189     return;
5190 
5191 error:
5192     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5193 	          "Catalog PI syntax error: %s\n",
5194 		  catalog, NULL);
5195     if (URL != NULL)
5196 	xmlFree(URL);
5197 }
5198 #endif
5199 
5200 /**
5201  * xmlParsePI:
5202  * @ctxt:  an XML parser context
5203  *
5204  * parse an XML Processing Instruction.
5205  *
5206  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5207  *
5208  * The processing is transfered to SAX once parsed.
5209  */
5210 
5211 void
xmlParsePI(xmlParserCtxtPtr ctxt)5212 xmlParsePI(xmlParserCtxtPtr ctxt) {
5213     xmlChar *buf = NULL;
5214     size_t len = 0;
5215     size_t size = XML_PARSER_BUFFER_SIZE;
5216     int cur, l;
5217     const xmlChar *target;
5218     xmlParserInputState state;
5219     int count = 0;
5220 
5221     if ((RAW == '<') && (NXT(1) == '?')) {
5222 	xmlParserInputPtr input = ctxt->input;
5223 	state = ctxt->instate;
5224         ctxt->instate = XML_PARSER_PI;
5225 	/*
5226 	 * this is a Processing Instruction.
5227 	 */
5228 	SKIP(2);
5229 	SHRINK;
5230 
5231 	/*
5232 	 * Parse the target name and check for special support like
5233 	 * namespace.
5234 	 */
5235         target = xmlParsePITarget(ctxt);
5236 	if (target != NULL) {
5237 	    if ((RAW == '?') && (NXT(1) == '>')) {
5238 		if (input != ctxt->input) {
5239 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5240 	    "PI declaration doesn't start and stop in the same entity\n");
5241 		}
5242 		SKIP(2);
5243 
5244 		/*
5245 		 * SAX: PI detected.
5246 		 */
5247 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5248 		    (ctxt->sax->processingInstruction != NULL))
5249 		    ctxt->sax->processingInstruction(ctxt->userData,
5250 		                                     target, NULL);
5251 		if (ctxt->instate != XML_PARSER_EOF)
5252 		    ctxt->instate = state;
5253 		return;
5254 	    }
5255 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5256 	    if (buf == NULL) {
5257 		xmlErrMemory(ctxt, NULL);
5258 		ctxt->instate = state;
5259 		return;
5260 	    }
5261 	    cur = CUR;
5262 	    if (!IS_BLANK(cur)) {
5263 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5264 			  "ParsePI: PI %s space expected\n", target);
5265 	    }
5266             SKIP_BLANKS;
5267 	    cur = CUR_CHAR(l);
5268 	    while (IS_CHAR(cur) && /* checked */
5269 		   ((cur != '?') || (NXT(1) != '>'))) {
5270 		if (len + 5 >= size) {
5271 		    xmlChar *tmp;
5272                     size_t new_size = size * 2;
5273 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5274 		    if (tmp == NULL) {
5275 			xmlErrMemory(ctxt, NULL);
5276 			xmlFree(buf);
5277 			ctxt->instate = state;
5278 			return;
5279 		    }
5280 		    buf = tmp;
5281                     size = new_size;
5282 		}
5283 		count++;
5284 		if (count > 50) {
5285 		    GROW;
5286                     if (ctxt->instate == XML_PARSER_EOF) {
5287                         xmlFree(buf);
5288                         return;
5289                     }
5290 		    count = 0;
5291                     if ((len > XML_MAX_TEXT_LENGTH) &&
5292                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5293                         xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5294                                           "PI %s too big found", target);
5295                         xmlFree(buf);
5296                         ctxt->instate = state;
5297                         return;
5298                     }
5299 		}
5300 		COPY_BUF(l,buf,len,cur);
5301 		NEXTL(l);
5302 		cur = CUR_CHAR(l);
5303 		if (cur == 0) {
5304 		    SHRINK;
5305 		    GROW;
5306 		    cur = CUR_CHAR(l);
5307 		}
5308 	    }
5309             if ((len > XML_MAX_TEXT_LENGTH) &&
5310                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5311                 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5312                                   "PI %s too big found", target);
5313                 xmlFree(buf);
5314                 ctxt->instate = state;
5315                 return;
5316             }
5317 	    buf[len] = 0;
5318 	    if (cur != '?') {
5319 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5320 		      "ParsePI: PI %s never end ...\n", target);
5321 	    } else {
5322 		if (input != ctxt->input) {
5323 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5324 	    "PI declaration doesn't start and stop in the same entity\n");
5325 		}
5326 		SKIP(2);
5327 
5328 #ifdef LIBXML_CATALOG_ENABLED
5329 		if (((state == XML_PARSER_MISC) ||
5330 	             (state == XML_PARSER_START)) &&
5331 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5332 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5333 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5334 			(allow == XML_CATA_ALLOW_ALL))
5335 			xmlParseCatalogPI(ctxt, buf);
5336 		}
5337 #endif
5338 
5339 
5340 		/*
5341 		 * SAX: PI detected.
5342 		 */
5343 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5344 		    (ctxt->sax->processingInstruction != NULL))
5345 		    ctxt->sax->processingInstruction(ctxt->userData,
5346 		                                     target, buf);
5347 	    }
5348 	    xmlFree(buf);
5349 	} else {
5350 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5351 	}
5352 	if (ctxt->instate != XML_PARSER_EOF)
5353 	    ctxt->instate = state;
5354     }
5355 }
5356 
5357 /**
5358  * xmlParseNotationDecl:
5359  * @ctxt:  an XML parser context
5360  *
5361  * parse a notation declaration
5362  *
5363  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5364  *
5365  * Hence there is actually 3 choices:
5366  *     'PUBLIC' S PubidLiteral
5367  *     'PUBLIC' S PubidLiteral S SystemLiteral
5368  * and 'SYSTEM' S SystemLiteral
5369  *
5370  * See the NOTE on xmlParseExternalID().
5371  */
5372 
5373 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5374 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5375     const xmlChar *name;
5376     xmlChar *Pubid;
5377     xmlChar *Systemid;
5378 
5379     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5380 	xmlParserInputPtr input = ctxt->input;
5381 	SHRINK;
5382 	SKIP(10);
5383 	if (!IS_BLANK_CH(CUR)) {
5384 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385 			   "Space required after '<!NOTATION'\n");
5386 	    return;
5387 	}
5388 	SKIP_BLANKS;
5389 
5390         name = xmlParseName(ctxt);
5391 	if (name == NULL) {
5392 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5393 	    return;
5394 	}
5395 	if (!IS_BLANK_CH(CUR)) {
5396 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5397 		     "Space required after the NOTATION name'\n");
5398 	    return;
5399 	}
5400 	if (xmlStrchr(name, ':') != NULL) {
5401 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5402 		     "colons are forbidden from notation names '%s'\n",
5403 		     name, NULL, NULL);
5404 	}
5405 	SKIP_BLANKS;
5406 
5407 	/*
5408 	 * Parse the IDs.
5409 	 */
5410 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5411 	SKIP_BLANKS;
5412 
5413 	if (RAW == '>') {
5414 	    if (input != ctxt->input) {
5415 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5416 	"Notation declaration doesn't start and stop in the same entity\n");
5417 	    }
5418 	    NEXT;
5419 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5420 		(ctxt->sax->notationDecl != NULL))
5421 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5422 	} else {
5423 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5424 	}
5425 	if (Systemid != NULL) xmlFree(Systemid);
5426 	if (Pubid != NULL) xmlFree(Pubid);
5427     }
5428 }
5429 
5430 /**
5431  * xmlParseEntityDecl:
5432  * @ctxt:  an XML parser context
5433  *
5434  * parse <!ENTITY declarations
5435  *
5436  * [70] EntityDecl ::= GEDecl | PEDecl
5437  *
5438  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5439  *
5440  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5441  *
5442  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5443  *
5444  * [74] PEDef ::= EntityValue | ExternalID
5445  *
5446  * [76] NDataDecl ::= S 'NDATA' S Name
5447  *
5448  * [ VC: Notation Declared ]
5449  * The Name must match the declared name of a notation.
5450  */
5451 
5452 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5453 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5454     const xmlChar *name = NULL;
5455     xmlChar *value = NULL;
5456     xmlChar *URI = NULL, *literal = NULL;
5457     const xmlChar *ndata = NULL;
5458     int isParameter = 0;
5459     xmlChar *orig = NULL;
5460     int skipped;
5461 
5462     /* GROW; done in the caller */
5463     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5464 	xmlParserInputPtr input = ctxt->input;
5465 	SHRINK;
5466 	SKIP(8);
5467 	skipped = SKIP_BLANKS;
5468 	if (skipped == 0) {
5469 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5470 			   "Space required after '<!ENTITY'\n");
5471 	}
5472 
5473 	if (RAW == '%') {
5474 	    NEXT;
5475 	    skipped = SKIP_BLANKS;
5476 	    if (skipped == 0) {
5477 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478 			       "Space required after '%'\n");
5479 	    }
5480 	    isParameter = 1;
5481 	}
5482 
5483         name = xmlParseName(ctxt);
5484 	if (name == NULL) {
5485 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5486 	                   "xmlParseEntityDecl: no name\n");
5487             return;
5488 	}
5489 	if (xmlStrchr(name, ':') != NULL) {
5490 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5491 		     "colons are forbidden from entities names '%s'\n",
5492 		     name, NULL, NULL);
5493 	}
5494         skipped = SKIP_BLANKS;
5495 	if (skipped == 0) {
5496 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5497 			   "Space required after the entity name\n");
5498 	}
5499 
5500 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5501 	/*
5502 	 * handle the various case of definitions...
5503 	 */
5504 	if (isParameter) {
5505 	    if ((RAW == '"') || (RAW == '\'')) {
5506 	        value = xmlParseEntityValue(ctxt, &orig);
5507 		if (value) {
5508 		    if ((ctxt->sax != NULL) &&
5509 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5510 			ctxt->sax->entityDecl(ctxt->userData, name,
5511 		                    XML_INTERNAL_PARAMETER_ENTITY,
5512 				    NULL, NULL, value);
5513 		}
5514 	    } else {
5515 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5516 		if ((URI == NULL) && (literal == NULL)) {
5517 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5518 		}
5519 		if (URI) {
5520 		    xmlURIPtr uri;
5521 
5522 		    uri = xmlParseURI((const char *) URI);
5523 		    if (uri == NULL) {
5524 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5525 				     "Invalid URI: %s\n", URI);
5526 			/*
5527 			 * This really ought to be a well formedness error
5528 			 * but the XML Core WG decided otherwise c.f. issue
5529 			 * E26 of the XML erratas.
5530 			 */
5531 		    } else {
5532 			if (uri->fragment != NULL) {
5533 			    /*
5534 			     * Okay this is foolish to block those but not
5535 			     * invalid URIs.
5536 			     */
5537 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5538 			} else {
5539 			    if ((ctxt->sax != NULL) &&
5540 				(!ctxt->disableSAX) &&
5541 				(ctxt->sax->entityDecl != NULL))
5542 				ctxt->sax->entityDecl(ctxt->userData, name,
5543 					    XML_EXTERNAL_PARAMETER_ENTITY,
5544 					    literal, URI, NULL);
5545 			}
5546 			xmlFreeURI(uri);
5547 		    }
5548 		}
5549 	    }
5550 	} else {
5551 	    if ((RAW == '"') || (RAW == '\'')) {
5552 	        value = xmlParseEntityValue(ctxt, &orig);
5553 		if ((ctxt->sax != NULL) &&
5554 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5555 		    ctxt->sax->entityDecl(ctxt->userData, name,
5556 				XML_INTERNAL_GENERAL_ENTITY,
5557 				NULL, NULL, value);
5558 		/*
5559 		 * For expat compatibility in SAX mode.
5560 		 */
5561 		if ((ctxt->myDoc == NULL) ||
5562 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5563 		    if (ctxt->myDoc == NULL) {
5564 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5565 			if (ctxt->myDoc == NULL) {
5566 			    xmlErrMemory(ctxt, "New Doc failed");
5567 			    return;
5568 			}
5569 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5570 		    }
5571 		    if (ctxt->myDoc->intSubset == NULL)
5572 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5573 					    BAD_CAST "fake", NULL, NULL);
5574 
5575 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5576 			              NULL, NULL, value);
5577 		}
5578 	    } else {
5579 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5580 		if ((URI == NULL) && (literal == NULL)) {
5581 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5582 		}
5583 		if (URI) {
5584 		    xmlURIPtr uri;
5585 
5586 		    uri = xmlParseURI((const char *)URI);
5587 		    if (uri == NULL) {
5588 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5589 				     "Invalid URI: %s\n", URI);
5590 			/*
5591 			 * This really ought to be a well formedness error
5592 			 * but the XML Core WG decided otherwise c.f. issue
5593 			 * E26 of the XML erratas.
5594 			 */
5595 		    } else {
5596 			if (uri->fragment != NULL) {
5597 			    /*
5598 			     * Okay this is foolish to block those but not
5599 			     * invalid URIs.
5600 			     */
5601 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5602 			}
5603 			xmlFreeURI(uri);
5604 		    }
5605 		}
5606 		if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5607 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5608 				   "Space required before 'NDATA'\n");
5609 		}
5610 		SKIP_BLANKS;
5611 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5612 		    SKIP(5);
5613 		    if (!IS_BLANK_CH(CUR)) {
5614 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5615 				       "Space required after 'NDATA'\n");
5616 		    }
5617 		    SKIP_BLANKS;
5618 		    ndata = xmlParseName(ctxt);
5619 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5620 		        (ctxt->sax->unparsedEntityDecl != NULL))
5621 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5622 				    literal, URI, ndata);
5623 		} else {
5624 		    if ((ctxt->sax != NULL) &&
5625 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5626 			ctxt->sax->entityDecl(ctxt->userData, name,
5627 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5628 				    literal, URI, NULL);
5629 		    /*
5630 		     * For expat compatibility in SAX mode.
5631 		     * assuming the entity repalcement was asked for
5632 		     */
5633 		    if ((ctxt->replaceEntities != 0) &&
5634 			((ctxt->myDoc == NULL) ||
5635 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5636 			if (ctxt->myDoc == NULL) {
5637 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5638 			    if (ctxt->myDoc == NULL) {
5639 			        xmlErrMemory(ctxt, "New Doc failed");
5640 				return;
5641 			    }
5642 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5643 			}
5644 
5645 			if (ctxt->myDoc->intSubset == NULL)
5646 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5647 						BAD_CAST "fake", NULL, NULL);
5648 			xmlSAX2EntityDecl(ctxt, name,
5649 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5650 				          literal, URI, NULL);
5651 		    }
5652 		}
5653 	    }
5654 	}
5655 	if (ctxt->instate == XML_PARSER_EOF)
5656 	    return;
5657 	SKIP_BLANKS;
5658 	if (RAW != '>') {
5659 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5660 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5661 	} else {
5662 	    if (input != ctxt->input) {
5663 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5664 	"Entity declaration doesn't start and stop in the same entity\n");
5665 	    }
5666 	    NEXT;
5667 	}
5668 	if (orig != NULL) {
5669 	    /*
5670 	     * Ugly mechanism to save the raw entity value.
5671 	     */
5672 	    xmlEntityPtr cur = NULL;
5673 
5674 	    if (isParameter) {
5675 	        if ((ctxt->sax != NULL) &&
5676 		    (ctxt->sax->getParameterEntity != NULL))
5677 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5678 	    } else {
5679 	        if ((ctxt->sax != NULL) &&
5680 		    (ctxt->sax->getEntity != NULL))
5681 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5682 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5683 		    cur = xmlSAX2GetEntity(ctxt, name);
5684 		}
5685 	    }
5686             if (cur != NULL) {
5687 	        if (cur->orig != NULL)
5688 		    xmlFree(orig);
5689 		else
5690 		    cur->orig = orig;
5691 	    } else
5692 		xmlFree(orig);
5693 	}
5694 	if (value != NULL) xmlFree(value);
5695 	if (URI != NULL) xmlFree(URI);
5696 	if (literal != NULL) xmlFree(literal);
5697     }
5698 }
5699 
5700 /**
5701  * xmlParseDefaultDecl:
5702  * @ctxt:  an XML parser context
5703  * @value:  Receive a possible fixed default value for the attribute
5704  *
5705  * Parse an attribute default declaration
5706  *
5707  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5708  *
5709  * [ VC: Required Attribute ]
5710  * if the default declaration is the keyword #REQUIRED, then the
5711  * attribute must be specified for all elements of the type in the
5712  * attribute-list declaration.
5713  *
5714  * [ VC: Attribute Default Legal ]
5715  * The declared default value must meet the lexical constraints of
5716  * the declared attribute type c.f. xmlValidateAttributeDecl()
5717  *
5718  * [ VC: Fixed Attribute Default ]
5719  * if an attribute has a default value declared with the #FIXED
5720  * keyword, instances of that attribute must match the default value.
5721  *
5722  * [ WFC: No < in Attribute Values ]
5723  * handled in xmlParseAttValue()
5724  *
5725  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5726  *          or XML_ATTRIBUTE_FIXED.
5727  */
5728 
5729 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5730 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5731     int val;
5732     xmlChar *ret;
5733 
5734     *value = NULL;
5735     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5736 	SKIP(9);
5737 	return(XML_ATTRIBUTE_REQUIRED);
5738     }
5739     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5740 	SKIP(8);
5741 	return(XML_ATTRIBUTE_IMPLIED);
5742     }
5743     val = XML_ATTRIBUTE_NONE;
5744     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5745 	SKIP(6);
5746 	val = XML_ATTRIBUTE_FIXED;
5747 	if (!IS_BLANK_CH(CUR)) {
5748 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5749 			   "Space required after '#FIXED'\n");
5750 	}
5751 	SKIP_BLANKS;
5752     }
5753     ret = xmlParseAttValue(ctxt);
5754     ctxt->instate = XML_PARSER_DTD;
5755     if (ret == NULL) {
5756 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5757 		       "Attribute default value declaration error\n");
5758     } else
5759         *value = ret;
5760     return(val);
5761 }
5762 
5763 /**
5764  * xmlParseNotationType:
5765  * @ctxt:  an XML parser context
5766  *
5767  * parse an Notation attribute type.
5768  *
5769  * Note: the leading 'NOTATION' S part has already being parsed...
5770  *
5771  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5772  *
5773  * [ VC: Notation Attributes ]
5774  * Values of this type must match one of the notation names included
5775  * in the declaration; all notation names in the declaration must be declared.
5776  *
5777  * Returns: the notation attribute tree built while parsing
5778  */
5779 
5780 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5781 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5782     const xmlChar *name;
5783     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5784 
5785     if (RAW != '(') {
5786 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5787 	return(NULL);
5788     }
5789     SHRINK;
5790     do {
5791         NEXT;
5792 	SKIP_BLANKS;
5793         name = xmlParseName(ctxt);
5794 	if (name == NULL) {
5795 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5796 			   "Name expected in NOTATION declaration\n");
5797             xmlFreeEnumeration(ret);
5798 	    return(NULL);
5799 	}
5800 	tmp = ret;
5801 	while (tmp != NULL) {
5802 	    if (xmlStrEqual(name, tmp->name)) {
5803 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5804 	  "standalone: attribute notation value token %s duplicated\n",
5805 				 name, NULL);
5806 		if (!xmlDictOwns(ctxt->dict, name))
5807 		    xmlFree((xmlChar *) name);
5808 		break;
5809 	    }
5810 	    tmp = tmp->next;
5811 	}
5812 	if (tmp == NULL) {
5813 	    cur = xmlCreateEnumeration(name);
5814 	    if (cur == NULL) {
5815                 xmlFreeEnumeration(ret);
5816                 return(NULL);
5817             }
5818 	    if (last == NULL) ret = last = cur;
5819 	    else {
5820 		last->next = cur;
5821 		last = cur;
5822 	    }
5823 	}
5824 	SKIP_BLANKS;
5825     } while (RAW == '|');
5826     if (RAW != ')') {
5827 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5828         xmlFreeEnumeration(ret);
5829 	return(NULL);
5830     }
5831     NEXT;
5832     return(ret);
5833 }
5834 
5835 /**
5836  * xmlParseEnumerationType:
5837  * @ctxt:  an XML parser context
5838  *
5839  * parse an Enumeration attribute type.
5840  *
5841  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5842  *
5843  * [ VC: Enumeration ]
5844  * Values of this type must match one of the Nmtoken tokens in
5845  * the declaration
5846  *
5847  * Returns: the enumeration attribute tree built while parsing
5848  */
5849 
5850 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5851 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5852     xmlChar *name;
5853     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5854 
5855     if (RAW != '(') {
5856 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5857 	return(NULL);
5858     }
5859     SHRINK;
5860     do {
5861         NEXT;
5862 	SKIP_BLANKS;
5863         name = xmlParseNmtoken(ctxt);
5864 	if (name == NULL) {
5865 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5866 	    return(ret);
5867 	}
5868 	tmp = ret;
5869 	while (tmp != NULL) {
5870 	    if (xmlStrEqual(name, tmp->name)) {
5871 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5872 	  "standalone: attribute enumeration value token %s duplicated\n",
5873 				 name, NULL);
5874 		if (!xmlDictOwns(ctxt->dict, name))
5875 		    xmlFree(name);
5876 		break;
5877 	    }
5878 	    tmp = tmp->next;
5879 	}
5880 	if (tmp == NULL) {
5881 	    cur = xmlCreateEnumeration(name);
5882 	    if (!xmlDictOwns(ctxt->dict, name))
5883 		xmlFree(name);
5884 	    if (cur == NULL) {
5885                 xmlFreeEnumeration(ret);
5886                 return(NULL);
5887             }
5888 	    if (last == NULL) ret = last = cur;
5889 	    else {
5890 		last->next = cur;
5891 		last = cur;
5892 	    }
5893 	}
5894 	SKIP_BLANKS;
5895     } while (RAW == '|');
5896     if (RAW != ')') {
5897 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5898 	return(ret);
5899     }
5900     NEXT;
5901     return(ret);
5902 }
5903 
5904 /**
5905  * xmlParseEnumeratedType:
5906  * @ctxt:  an XML parser context
5907  * @tree:  the enumeration tree built while parsing
5908  *
5909  * parse an Enumerated attribute type.
5910  *
5911  * [57] EnumeratedType ::= NotationType | Enumeration
5912  *
5913  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5914  *
5915  *
5916  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5917  */
5918 
5919 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5920 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5921     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5922 	SKIP(8);
5923 	if (!IS_BLANK_CH(CUR)) {
5924 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5925 			   "Space required after 'NOTATION'\n");
5926 	    return(0);
5927 	}
5928         SKIP_BLANKS;
5929 	*tree = xmlParseNotationType(ctxt);
5930 	if (*tree == NULL) return(0);
5931 	return(XML_ATTRIBUTE_NOTATION);
5932     }
5933     *tree = xmlParseEnumerationType(ctxt);
5934     if (*tree == NULL) return(0);
5935     return(XML_ATTRIBUTE_ENUMERATION);
5936 }
5937 
5938 /**
5939  * xmlParseAttributeType:
5940  * @ctxt:  an XML parser context
5941  * @tree:  the enumeration tree built while parsing
5942  *
5943  * parse the Attribute list def for an element
5944  *
5945  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5946  *
5947  * [55] StringType ::= 'CDATA'
5948  *
5949  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5950  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5951  *
5952  * Validity constraints for attribute values syntax are checked in
5953  * xmlValidateAttributeValue()
5954  *
5955  * [ VC: ID ]
5956  * Values of type ID must match the Name production. A name must not
5957  * appear more than once in an XML document as a value of this type;
5958  * i.e., ID values must uniquely identify the elements which bear them.
5959  *
5960  * [ VC: One ID per Element Type ]
5961  * No element type may have more than one ID attribute specified.
5962  *
5963  * [ VC: ID Attribute Default ]
5964  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5965  *
5966  * [ VC: IDREF ]
5967  * Values of type IDREF must match the Name production, and values
5968  * of type IDREFS must match Names; each IDREF Name must match the value
5969  * of an ID attribute on some element in the XML document; i.e. IDREF
5970  * values must match the value of some ID attribute.
5971  *
5972  * [ VC: Entity Name ]
5973  * Values of type ENTITY must match the Name production, values
5974  * of type ENTITIES must match Names; each Entity Name must match the
5975  * name of an unparsed entity declared in the DTD.
5976  *
5977  * [ VC: Name Token ]
5978  * Values of type NMTOKEN must match the Nmtoken production; values
5979  * of type NMTOKENS must match Nmtokens.
5980  *
5981  * Returns the attribute type
5982  */
5983 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5984 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5985     SHRINK;
5986     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5987 	SKIP(5);
5988 	return(XML_ATTRIBUTE_CDATA);
5989      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5990 	SKIP(6);
5991 	return(XML_ATTRIBUTE_IDREFS);
5992      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5993 	SKIP(5);
5994 	return(XML_ATTRIBUTE_IDREF);
5995      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5996         SKIP(2);
5997 	return(XML_ATTRIBUTE_ID);
5998      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5999 	SKIP(6);
6000 	return(XML_ATTRIBUTE_ENTITY);
6001      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6002 	SKIP(8);
6003 	return(XML_ATTRIBUTE_ENTITIES);
6004      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6005 	SKIP(8);
6006 	return(XML_ATTRIBUTE_NMTOKENS);
6007      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6008 	SKIP(7);
6009 	return(XML_ATTRIBUTE_NMTOKEN);
6010      }
6011      return(xmlParseEnumeratedType(ctxt, tree));
6012 }
6013 
6014 /**
6015  * xmlParseAttributeListDecl:
6016  * @ctxt:  an XML parser context
6017  *
6018  * : parse the Attribute list def for an element
6019  *
6020  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6021  *
6022  * [53] AttDef ::= S Name S AttType S DefaultDecl
6023  *
6024  */
6025 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6026 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6027     const xmlChar *elemName;
6028     const xmlChar *attrName;
6029     xmlEnumerationPtr tree;
6030 
6031     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6032 	xmlParserInputPtr input = ctxt->input;
6033 
6034 	SKIP(9);
6035 	if (!IS_BLANK_CH(CUR)) {
6036 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6037 		                 "Space required after '<!ATTLIST'\n");
6038 	}
6039         SKIP_BLANKS;
6040         elemName = xmlParseName(ctxt);
6041 	if (elemName == NULL) {
6042 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6043 			   "ATTLIST: no name for Element\n");
6044 	    return;
6045 	}
6046 	SKIP_BLANKS;
6047 	GROW;
6048 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6049 	    const xmlChar *check = CUR_PTR;
6050 	    int type;
6051 	    int def;
6052 	    xmlChar *defaultValue = NULL;
6053 
6054 	    GROW;
6055             tree = NULL;
6056 	    attrName = xmlParseName(ctxt);
6057 	    if (attrName == NULL) {
6058 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6059 			       "ATTLIST: no name for Attribute\n");
6060 		break;
6061 	    }
6062 	    GROW;
6063 	    if (!IS_BLANK_CH(CUR)) {
6064 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6065 		        "Space required after the attribute name\n");
6066 		break;
6067 	    }
6068 	    SKIP_BLANKS;
6069 
6070 	    type = xmlParseAttributeType(ctxt, &tree);
6071 	    if (type <= 0) {
6072 	        break;
6073 	    }
6074 
6075 	    GROW;
6076 	    if (!IS_BLANK_CH(CUR)) {
6077 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6078 			       "Space required after the attribute type\n");
6079 	        if (tree != NULL)
6080 		    xmlFreeEnumeration(tree);
6081 		break;
6082 	    }
6083 	    SKIP_BLANKS;
6084 
6085 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6086 	    if (def <= 0) {
6087                 if (defaultValue != NULL)
6088 		    xmlFree(defaultValue);
6089 	        if (tree != NULL)
6090 		    xmlFreeEnumeration(tree);
6091 	        break;
6092 	    }
6093 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6094 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6095 
6096 	    GROW;
6097             if (RAW != '>') {
6098 		if (!IS_BLANK_CH(CUR)) {
6099 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100 			"Space required after the attribute default value\n");
6101 		    if (defaultValue != NULL)
6102 			xmlFree(defaultValue);
6103 		    if (tree != NULL)
6104 			xmlFreeEnumeration(tree);
6105 		    break;
6106 		}
6107 		SKIP_BLANKS;
6108 	    }
6109 	    if (check == CUR_PTR) {
6110 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6111 		            "in xmlParseAttributeListDecl\n");
6112 		if (defaultValue != NULL)
6113 		    xmlFree(defaultValue);
6114 	        if (tree != NULL)
6115 		    xmlFreeEnumeration(tree);
6116 		break;
6117 	    }
6118 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6119 		(ctxt->sax->attributeDecl != NULL))
6120 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6121 	                        type, def, defaultValue, tree);
6122 	    else if (tree != NULL)
6123 		xmlFreeEnumeration(tree);
6124 
6125 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6126 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6127 		(def != XML_ATTRIBUTE_REQUIRED)) {
6128 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6129 	    }
6130 	    if (ctxt->sax2) {
6131 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6132 	    }
6133 	    if (defaultValue != NULL)
6134 	        xmlFree(defaultValue);
6135 	    GROW;
6136 	}
6137 	if (RAW == '>') {
6138 	    if (input != ctxt->input) {
6139 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6140     "Attribute list declaration doesn't start and stop in the same entity\n",
6141                                  NULL, NULL);
6142 	    }
6143 	    NEXT;
6144 	}
6145     }
6146 }
6147 
6148 /**
6149  * xmlParseElementMixedContentDecl:
6150  * @ctxt:  an XML parser context
6151  * @inputchk:  the input used for the current entity, needed for boundary checks
6152  *
6153  * parse the declaration for a Mixed Element content
6154  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6155  *
6156  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6157  *                '(' S? '#PCDATA' S? ')'
6158  *
6159  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6160  *
6161  * [ VC: No Duplicate Types ]
6162  * The same name must not appear more than once in a single
6163  * mixed-content declaration.
6164  *
6165  * returns: the list of the xmlElementContentPtr describing the element choices
6166  */
6167 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6168 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6169     xmlElementContentPtr ret = NULL, cur = NULL, n;
6170     const xmlChar *elem = NULL;
6171 
6172     GROW;
6173     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6174 	SKIP(7);
6175 	SKIP_BLANKS;
6176 	SHRINK;
6177 	if (RAW == ')') {
6178 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6179 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6180 "Element content declaration doesn't start and stop in the same entity\n",
6181                                  NULL, NULL);
6182 	    }
6183 	    NEXT;
6184 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6185 	    if (ret == NULL)
6186 	        return(NULL);
6187 	    if (RAW == '*') {
6188 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6189 		NEXT;
6190 	    }
6191 	    return(ret);
6192 	}
6193 	if ((RAW == '(') || (RAW == '|')) {
6194 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6195 	    if (ret == NULL) return(NULL);
6196 	}
6197 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6198 	    NEXT;
6199 	    if (elem == NULL) {
6200 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6201 		if (ret == NULL) return(NULL);
6202 		ret->c1 = cur;
6203 		if (cur != NULL)
6204 		    cur->parent = ret;
6205 		cur = ret;
6206 	    } else {
6207 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6208 		if (n == NULL) return(NULL);
6209 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6210 		if (n->c1 != NULL)
6211 		    n->c1->parent = n;
6212 	        cur->c2 = n;
6213 		if (n != NULL)
6214 		    n->parent = cur;
6215 		cur = n;
6216 	    }
6217 	    SKIP_BLANKS;
6218 	    elem = xmlParseName(ctxt);
6219 	    if (elem == NULL) {
6220 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6221 			"xmlParseElementMixedContentDecl : Name expected\n");
6222 		xmlFreeDocElementContent(ctxt->myDoc, cur);
6223 		return(NULL);
6224 	    }
6225 	    SKIP_BLANKS;
6226 	    GROW;
6227 	}
6228 	if ((RAW == ')') && (NXT(1) == '*')) {
6229 	    if (elem != NULL) {
6230 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6231 		                               XML_ELEMENT_CONTENT_ELEMENT);
6232 		if (cur->c2 != NULL)
6233 		    cur->c2->parent = cur;
6234             }
6235             if (ret != NULL)
6236                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6237 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6238 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6239 "Element content declaration doesn't start and stop in the same entity\n",
6240 				 NULL, NULL);
6241 	    }
6242 	    SKIP(2);
6243 	} else {
6244 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6245 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6246 	    return(NULL);
6247 	}
6248 
6249     } else {
6250 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6251     }
6252     return(ret);
6253 }
6254 
6255 /**
6256  * xmlParseElementChildrenContentDeclPriv:
6257  * @ctxt:  an XML parser context
6258  * @inputchk:  the input used for the current entity, needed for boundary checks
6259  * @depth: the level of recursion
6260  *
6261  * parse the declaration for a Mixed Element content
6262  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6263  *
6264  *
6265  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6266  *
6267  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6268  *
6269  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6270  *
6271  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6272  *
6273  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6274  * TODO Parameter-entity replacement text must be properly nested
6275  *	with parenthesized groups. That is to say, if either of the
6276  *	opening or closing parentheses in a choice, seq, or Mixed
6277  *	construct is contained in the replacement text for a parameter
6278  *	entity, both must be contained in the same replacement text. For
6279  *	interoperability, if a parameter-entity reference appears in a
6280  *	choice, seq, or Mixed construct, its replacement text should not
6281  *	be empty, and neither the first nor last non-blank character of
6282  *	the replacement text should be a connector (| or ,).
6283  *
6284  * Returns the tree of xmlElementContentPtr describing the element
6285  *          hierarchy.
6286  */
6287 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6288 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6289                                        int depth) {
6290     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6291     const xmlChar *elem;
6292     xmlChar type = 0;
6293 
6294     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6295         (depth >  2048)) {
6296         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6297 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6298                           depth);
6299 	return(NULL);
6300     }
6301     SKIP_BLANKS;
6302     GROW;
6303     if (RAW == '(') {
6304 	int inputid = ctxt->input->id;
6305 
6306         /* Recurse on first child */
6307 	NEXT;
6308 	SKIP_BLANKS;
6309         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6310                                                            depth + 1);
6311 	SKIP_BLANKS;
6312 	GROW;
6313     } else {
6314 	elem = xmlParseName(ctxt);
6315 	if (elem == NULL) {
6316 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6317 	    return(NULL);
6318 	}
6319         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6320 	if (cur == NULL) {
6321 	    xmlErrMemory(ctxt, NULL);
6322 	    return(NULL);
6323 	}
6324 	GROW;
6325 	if (RAW == '?') {
6326 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6327 	    NEXT;
6328 	} else if (RAW == '*') {
6329 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6330 	    NEXT;
6331 	} else if (RAW == '+') {
6332 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6333 	    NEXT;
6334 	} else {
6335 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6336 	}
6337 	GROW;
6338     }
6339     SKIP_BLANKS;
6340     SHRINK;
6341     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6342         /*
6343 	 * Each loop we parse one separator and one element.
6344 	 */
6345         if (RAW == ',') {
6346 	    if (type == 0) type = CUR;
6347 
6348 	    /*
6349 	     * Detect "Name | Name , Name" error
6350 	     */
6351 	    else if (type != CUR) {
6352 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6353 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6354 		                  type);
6355 		if ((last != NULL) && (last != ret))
6356 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6357 		if (ret != NULL)
6358 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6359 		return(NULL);
6360 	    }
6361 	    NEXT;
6362 
6363 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6364 	    if (op == NULL) {
6365 		if ((last != NULL) && (last != ret))
6366 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6367 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6368 		return(NULL);
6369 	    }
6370 	    if (last == NULL) {
6371 		op->c1 = ret;
6372 		if (ret != NULL)
6373 		    ret->parent = op;
6374 		ret = cur = op;
6375 	    } else {
6376 	        cur->c2 = op;
6377 		if (op != NULL)
6378 		    op->parent = cur;
6379 		op->c1 = last;
6380 		if (last != NULL)
6381 		    last->parent = op;
6382 		cur =op;
6383 		last = NULL;
6384 	    }
6385 	} else if (RAW == '|') {
6386 	    if (type == 0) type = CUR;
6387 
6388 	    /*
6389 	     * Detect "Name , Name | Name" error
6390 	     */
6391 	    else if (type != CUR) {
6392 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6393 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6394 				  type);
6395 		if ((last != NULL) && (last != ret))
6396 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6397 		if (ret != NULL)
6398 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6399 		return(NULL);
6400 	    }
6401 	    NEXT;
6402 
6403 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6404 	    if (op == NULL) {
6405 		if ((last != NULL) && (last != ret))
6406 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6407 		if (ret != NULL)
6408 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6409 		return(NULL);
6410 	    }
6411 	    if (last == NULL) {
6412 		op->c1 = ret;
6413 		if (ret != NULL)
6414 		    ret->parent = op;
6415 		ret = cur = op;
6416 	    } else {
6417 	        cur->c2 = op;
6418 		if (op != NULL)
6419 		    op->parent = cur;
6420 		op->c1 = last;
6421 		if (last != NULL)
6422 		    last->parent = op;
6423 		cur =op;
6424 		last = NULL;
6425 	    }
6426 	} else {
6427 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6428 	    if ((last != NULL) && (last != ret))
6429 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6430 	    if (ret != NULL)
6431 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6432 	    return(NULL);
6433 	}
6434 	GROW;
6435 	SKIP_BLANKS;
6436 	GROW;
6437 	if (RAW == '(') {
6438 	    int inputid = ctxt->input->id;
6439 	    /* Recurse on second child */
6440 	    NEXT;
6441 	    SKIP_BLANKS;
6442 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6443                                                           depth + 1);
6444 	    SKIP_BLANKS;
6445 	} else {
6446 	    elem = xmlParseName(ctxt);
6447 	    if (elem == NULL) {
6448 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6449 		if (ret != NULL)
6450 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6451 		return(NULL);
6452 	    }
6453 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6454 	    if (last == NULL) {
6455 		if (ret != NULL)
6456 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6457 		return(NULL);
6458 	    }
6459 	    if (RAW == '?') {
6460 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6461 		NEXT;
6462 	    } else if (RAW == '*') {
6463 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6464 		NEXT;
6465 	    } else if (RAW == '+') {
6466 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6467 		NEXT;
6468 	    } else {
6469 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6470 	    }
6471 	}
6472 	SKIP_BLANKS;
6473 	GROW;
6474     }
6475     if ((cur != NULL) && (last != NULL)) {
6476         cur->c2 = last;
6477 	if (last != NULL)
6478 	    last->parent = cur;
6479     }
6480     if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6481 	xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6482 "Element content declaration doesn't start and stop in the same entity\n",
6483 			 NULL, NULL);
6484     }
6485     NEXT;
6486     if (RAW == '?') {
6487 	if (ret != NULL) {
6488 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6489 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6490 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6491 	    else
6492 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6493 	}
6494 	NEXT;
6495     } else if (RAW == '*') {
6496 	if (ret != NULL) {
6497 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6498 	    cur = ret;
6499 	    /*
6500 	     * Some normalization:
6501 	     * (a | b* | c?)* == (a | b | c)*
6502 	     */
6503 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6504 		if ((cur->c1 != NULL) &&
6505 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6506 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6507 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6508 		if ((cur->c2 != NULL) &&
6509 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6510 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6511 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6512 		cur = cur->c2;
6513 	    }
6514 	}
6515 	NEXT;
6516     } else if (RAW == '+') {
6517 	if (ret != NULL) {
6518 	    int found = 0;
6519 
6520 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6521 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6522 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6523 	    else
6524 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6525 	    /*
6526 	     * Some normalization:
6527 	     * (a | b*)+ == (a | b)*
6528 	     * (a | b?)+ == (a | b)*
6529 	     */
6530 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6531 		if ((cur->c1 != NULL) &&
6532 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6533 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6534 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6535 		    found = 1;
6536 		}
6537 		if ((cur->c2 != NULL) &&
6538 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6539 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6540 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6541 		    found = 1;
6542 		}
6543 		cur = cur->c2;
6544 	    }
6545 	    if (found)
6546 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6547 	}
6548 	NEXT;
6549     }
6550     return(ret);
6551 }
6552 
6553 /**
6554  * xmlParseElementChildrenContentDecl:
6555  * @ctxt:  an XML parser context
6556  * @inputchk:  the input used for the current entity, needed for boundary checks
6557  *
6558  * parse the declaration for a Mixed Element content
6559  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6560  *
6561  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6562  *
6563  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6564  *
6565  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6566  *
6567  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6568  *
6569  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6570  * TODO Parameter-entity replacement text must be properly nested
6571  *	with parenthesized groups. That is to say, if either of the
6572  *	opening or closing parentheses in a choice, seq, or Mixed
6573  *	construct is contained in the replacement text for a parameter
6574  *	entity, both must be contained in the same replacement text. For
6575  *	interoperability, if a parameter-entity reference appears in a
6576  *	choice, seq, or Mixed construct, its replacement text should not
6577  *	be empty, and neither the first nor last non-blank character of
6578  *	the replacement text should be a connector (| or ,).
6579  *
6580  * Returns the tree of xmlElementContentPtr describing the element
6581  *          hierarchy.
6582  */
6583 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6584 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6585     /* stub left for API/ABI compat */
6586     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6587 }
6588 
6589 /**
6590  * xmlParseElementContentDecl:
6591  * @ctxt:  an XML parser context
6592  * @name:  the name of the element being defined.
6593  * @result:  the Element Content pointer will be stored here if any
6594  *
6595  * parse the declaration for an Element content either Mixed or Children,
6596  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6597  *
6598  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6599  *
6600  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6601  */
6602 
6603 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6604 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6605                            xmlElementContentPtr *result) {
6606 
6607     xmlElementContentPtr tree = NULL;
6608     int inputid = ctxt->input->id;
6609     int res;
6610 
6611     *result = NULL;
6612 
6613     if (RAW != '(') {
6614 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6615 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6616 	return(-1);
6617     }
6618     NEXT;
6619     GROW;
6620     if (ctxt->instate == XML_PARSER_EOF)
6621         return(-1);
6622     SKIP_BLANKS;
6623     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6624         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6625 	res = XML_ELEMENT_TYPE_MIXED;
6626     } else {
6627         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6628 	res = XML_ELEMENT_TYPE_ELEMENT;
6629     }
6630     SKIP_BLANKS;
6631     *result = tree;
6632     return(res);
6633 }
6634 
6635 /**
6636  * xmlParseElementDecl:
6637  * @ctxt:  an XML parser context
6638  *
6639  * parse an Element declaration.
6640  *
6641  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6642  *
6643  * [ VC: Unique Element Type Declaration ]
6644  * No element type may be declared more than once
6645  *
6646  * Returns the type of the element, or -1 in case of error
6647  */
6648 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6649 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6650     const xmlChar *name;
6651     int ret = -1;
6652     xmlElementContentPtr content  = NULL;
6653 
6654     /* GROW; done in the caller */
6655     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6656 	xmlParserInputPtr input = ctxt->input;
6657 
6658 	SKIP(9);
6659 	if (!IS_BLANK_CH(CUR)) {
6660 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6661 		           "Space required after 'ELEMENT'\n");
6662 	}
6663         SKIP_BLANKS;
6664         name = xmlParseName(ctxt);
6665 	if (name == NULL) {
6666 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6667 			   "xmlParseElementDecl: no name for Element\n");
6668 	    return(-1);
6669 	}
6670 	while ((RAW == 0) && (ctxt->inputNr > 1))
6671 	    xmlPopInput(ctxt);
6672 	if (!IS_BLANK_CH(CUR)) {
6673 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6674 			   "Space required after the element name\n");
6675 	}
6676         SKIP_BLANKS;
6677 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6678 	    SKIP(5);
6679 	    /*
6680 	     * Element must always be empty.
6681 	     */
6682 	    ret = XML_ELEMENT_TYPE_EMPTY;
6683 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6684 	           (NXT(2) == 'Y')) {
6685 	    SKIP(3);
6686 	    /*
6687 	     * Element is a generic container.
6688 	     */
6689 	    ret = XML_ELEMENT_TYPE_ANY;
6690 	} else if (RAW == '(') {
6691 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6692 	} else {
6693 	    /*
6694 	     * [ WFC: PEs in Internal Subset ] error handling.
6695 	     */
6696 	    if ((RAW == '%') && (ctxt->external == 0) &&
6697 	        (ctxt->inputNr == 1)) {
6698 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6699 	  "PEReference: forbidden within markup decl in internal subset\n");
6700 	    } else {
6701 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6702 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6703             }
6704 	    return(-1);
6705 	}
6706 
6707 	SKIP_BLANKS;
6708 	/*
6709 	 * Pop-up of finished entities.
6710 	 */
6711 	while ((RAW == 0) && (ctxt->inputNr > 1))
6712 	    xmlPopInput(ctxt);
6713 	SKIP_BLANKS;
6714 
6715 	if (RAW != '>') {
6716 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6717 	    if (content != NULL) {
6718 		xmlFreeDocElementContent(ctxt->myDoc, content);
6719 	    }
6720 	} else {
6721 	    if (input != ctxt->input) {
6722 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6723     "Element declaration doesn't start and stop in the same entity\n");
6724 	    }
6725 
6726 	    NEXT;
6727 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6728 		(ctxt->sax->elementDecl != NULL)) {
6729 		if (content != NULL)
6730 		    content->parent = NULL;
6731 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6732 		                       content);
6733 		if ((content != NULL) && (content->parent == NULL)) {
6734 		    /*
6735 		     * this is a trick: if xmlAddElementDecl is called,
6736 		     * instead of copying the full tree it is plugged directly
6737 		     * if called from the parser. Avoid duplicating the
6738 		     * interfaces or change the API/ABI
6739 		     */
6740 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6741 		}
6742 	    } else if (content != NULL) {
6743 		xmlFreeDocElementContent(ctxt->myDoc, content);
6744 	    }
6745 	}
6746     }
6747     return(ret);
6748 }
6749 
6750 /**
6751  * xmlParseConditionalSections
6752  * @ctxt:  an XML parser context
6753  *
6754  * [61] conditionalSect ::= includeSect | ignoreSect
6755  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6756  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6757  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6758  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6759  */
6760 
6761 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6762 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6763     int id = ctxt->input->id;
6764 
6765     SKIP(3);
6766     SKIP_BLANKS;
6767     if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6768 	SKIP(7);
6769 	SKIP_BLANKS;
6770 	if (RAW != '[') {
6771 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6772 	} else {
6773 	    if (ctxt->input->id != id) {
6774 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6775 	    "All markup of the conditional section is not in the same entity\n",
6776 				     NULL, NULL);
6777 	    }
6778 	    NEXT;
6779 	}
6780 	if (xmlParserDebugEntities) {
6781 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6782 		xmlGenericError(xmlGenericErrorContext,
6783 			"%s(%d): ", ctxt->input->filename,
6784 			ctxt->input->line);
6785 	    xmlGenericError(xmlGenericErrorContext,
6786 		    "Entering INCLUDE Conditional Section\n");
6787 	}
6788 
6789 	while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6790 	        (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6791 	    const xmlChar *check = CUR_PTR;
6792 	    unsigned int cons = ctxt->input->consumed;
6793 
6794 	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6795 		xmlParseConditionalSections(ctxt);
6796 	    } else if (IS_BLANK_CH(CUR)) {
6797 		NEXT;
6798 	    } else if (RAW == '%') {
6799 		xmlParsePEReference(ctxt);
6800 	    } else
6801 		xmlParseMarkupDecl(ctxt);
6802 
6803 	    /*
6804 	     * Pop-up of finished entities.
6805 	     */
6806 	    while ((RAW == 0) && (ctxt->inputNr > 1))
6807 		xmlPopInput(ctxt);
6808 
6809 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6810 		xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6811 		break;
6812 	    }
6813 	}
6814 	if (xmlParserDebugEntities) {
6815 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6816 		xmlGenericError(xmlGenericErrorContext,
6817 			"%s(%d): ", ctxt->input->filename,
6818 			ctxt->input->line);
6819 	    xmlGenericError(xmlGenericErrorContext,
6820 		    "Leaving INCLUDE Conditional Section\n");
6821 	}
6822 
6823     } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6824 	int state;
6825 	xmlParserInputState instate;
6826 	int depth = 0;
6827 
6828 	SKIP(6);
6829 	SKIP_BLANKS;
6830 	if (RAW != '[') {
6831 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6832 	} else {
6833 	    if (ctxt->input->id != id) {
6834 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6835 	    "All markup of the conditional section is not in the same entity\n",
6836 				     NULL, NULL);
6837 	    }
6838 	    NEXT;
6839 	}
6840 	if (xmlParserDebugEntities) {
6841 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6842 		xmlGenericError(xmlGenericErrorContext,
6843 			"%s(%d): ", ctxt->input->filename,
6844 			ctxt->input->line);
6845 	    xmlGenericError(xmlGenericErrorContext,
6846 		    "Entering IGNORE Conditional Section\n");
6847 	}
6848 
6849 	/*
6850 	 * Parse up to the end of the conditional section
6851 	 * But disable SAX event generating DTD building in the meantime
6852 	 */
6853 	state = ctxt->disableSAX;
6854 	instate = ctxt->instate;
6855 	if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6856 	ctxt->instate = XML_PARSER_IGNORE;
6857 
6858 	while (((depth >= 0) && (RAW != 0)) &&
6859                (ctxt->instate != XML_PARSER_EOF)) {
6860 	  if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6861 	    depth++;
6862 	    SKIP(3);
6863 	    continue;
6864 	  }
6865 	  if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6866 	    if (--depth >= 0) SKIP(3);
6867 	    continue;
6868 	  }
6869 	  NEXT;
6870 	  continue;
6871 	}
6872 
6873 	ctxt->disableSAX = state;
6874 	ctxt->instate = instate;
6875 
6876 	if (xmlParserDebugEntities) {
6877 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6878 		xmlGenericError(xmlGenericErrorContext,
6879 			"%s(%d): ", ctxt->input->filename,
6880 			ctxt->input->line);
6881 	    xmlGenericError(xmlGenericErrorContext,
6882 		    "Leaving IGNORE Conditional Section\n");
6883 	}
6884 
6885     } else {
6886 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6887     }
6888 
6889     if (RAW == 0)
6890         SHRINK;
6891 
6892     if (RAW == 0) {
6893 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6894     } else {
6895 	if (ctxt->input->id != id) {
6896 	    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6897 	"All markup of the conditional section is not in the same entity\n",
6898 				 NULL, NULL);
6899 	}
6900         SKIP(3);
6901     }
6902 }
6903 
6904 /**
6905  * xmlParseMarkupDecl:
6906  * @ctxt:  an XML parser context
6907  *
6908  * parse Markup declarations
6909  *
6910  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6911  *                     NotationDecl | PI | Comment
6912  *
6913  * [ VC: Proper Declaration/PE Nesting ]
6914  * Parameter-entity replacement text must be properly nested with
6915  * markup declarations. That is to say, if either the first character
6916  * or the last character of a markup declaration (markupdecl above) is
6917  * contained in the replacement text for a parameter-entity reference,
6918  * both must be contained in the same replacement text.
6919  *
6920  * [ WFC: PEs in Internal Subset ]
6921  * In the internal DTD subset, parameter-entity references can occur
6922  * only where markup declarations can occur, not within markup declarations.
6923  * (This does not apply to references that occur in external parameter
6924  * entities or to the external subset.)
6925  */
6926 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6927 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6928     GROW;
6929     if (CUR == '<') {
6930         if (NXT(1) == '!') {
6931 	    switch (NXT(2)) {
6932 	        case 'E':
6933 		    if (NXT(3) == 'L')
6934 			xmlParseElementDecl(ctxt);
6935 		    else if (NXT(3) == 'N')
6936 			xmlParseEntityDecl(ctxt);
6937 		    break;
6938 	        case 'A':
6939 		    xmlParseAttributeListDecl(ctxt);
6940 		    break;
6941 	        case 'N':
6942 		    xmlParseNotationDecl(ctxt);
6943 		    break;
6944 	        case '-':
6945 		    xmlParseComment(ctxt);
6946 		    break;
6947 		default:
6948 		    /* there is an error but it will be detected later */
6949 		    break;
6950 	    }
6951 	} else if (NXT(1) == '?') {
6952 	    xmlParsePI(ctxt);
6953 	}
6954     }
6955     /*
6956      * This is only for internal subset. On external entities,
6957      * the replacement is done before parsing stage
6958      */
6959     if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6960 	xmlParsePEReference(ctxt);
6961 
6962     /*
6963      * Conditional sections are allowed from entities included
6964      * by PE References in the internal subset.
6965      */
6966     if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6967         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6968 	    xmlParseConditionalSections(ctxt);
6969 	}
6970     }
6971 
6972     ctxt->instate = XML_PARSER_DTD;
6973 }
6974 
6975 /**
6976  * xmlParseTextDecl:
6977  * @ctxt:  an XML parser context
6978  *
6979  * parse an XML declaration header for external entities
6980  *
6981  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6982  */
6983 
6984 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6985 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6986     xmlChar *version;
6987     const xmlChar *encoding;
6988 
6989     /*
6990      * We know that '<?xml' is here.
6991      */
6992     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6993 	SKIP(5);
6994     } else {
6995 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6996 	return;
6997     }
6998 
6999     if (!IS_BLANK_CH(CUR)) {
7000 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7001 		       "Space needed after '<?xml'\n");
7002     }
7003     SKIP_BLANKS;
7004 
7005     /*
7006      * We may have the VersionInfo here.
7007      */
7008     version = xmlParseVersionInfo(ctxt);
7009     if (version == NULL)
7010 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
7011     else {
7012 	if (!IS_BLANK_CH(CUR)) {
7013 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7014 		           "Space needed here\n");
7015 	}
7016     }
7017     ctxt->input->version = version;
7018 
7019     /*
7020      * We must have the encoding declaration
7021      */
7022     encoding = xmlParseEncodingDecl(ctxt);
7023     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7024 	/*
7025 	 * The XML REC instructs us to stop parsing right here
7026 	 */
7027         return;
7028     }
7029     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7030 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7031 		       "Missing encoding in text declaration\n");
7032     }
7033 
7034     SKIP_BLANKS;
7035     if ((RAW == '?') && (NXT(1) == '>')) {
7036         SKIP(2);
7037     } else if (RAW == '>') {
7038         /* Deprecated old WD ... */
7039 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7040 	NEXT;
7041     } else {
7042 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7043 	MOVETO_ENDTAG(CUR_PTR);
7044 	NEXT;
7045     }
7046 }
7047 
7048 /**
7049  * xmlParseExternalSubset:
7050  * @ctxt:  an XML parser context
7051  * @ExternalID: the external identifier
7052  * @SystemID: the system identifier (or URL)
7053  *
7054  * parse Markup declarations from an external subset
7055  *
7056  * [30] extSubset ::= textDecl? extSubsetDecl
7057  *
7058  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7059  */
7060 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7061 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7062                        const xmlChar *SystemID) {
7063     xmlDetectSAX2(ctxt);
7064     GROW;
7065 
7066     if ((ctxt->encoding == NULL) &&
7067         (ctxt->input->end - ctxt->input->cur >= 4)) {
7068         xmlChar start[4];
7069 	xmlCharEncoding enc;
7070 
7071 	start[0] = RAW;
7072 	start[1] = NXT(1);
7073 	start[2] = NXT(2);
7074 	start[3] = NXT(3);
7075 	enc = xmlDetectCharEncoding(start, 4);
7076 	if (enc != XML_CHAR_ENCODING_NONE)
7077 	    xmlSwitchEncoding(ctxt, enc);
7078     }
7079 
7080     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7081 	xmlParseTextDecl(ctxt);
7082 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7083 	    /*
7084 	     * The XML REC instructs us to stop parsing right here
7085 	     */
7086 	    ctxt->instate = XML_PARSER_EOF;
7087 	    return;
7088 	}
7089     }
7090     if (ctxt->myDoc == NULL) {
7091         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7092 	if (ctxt->myDoc == NULL) {
7093 	    xmlErrMemory(ctxt, "New Doc failed");
7094 	    return;
7095 	}
7096 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7097     }
7098     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7099         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7100 
7101     ctxt->instate = XML_PARSER_DTD;
7102     ctxt->external = 1;
7103     while (((RAW == '<') && (NXT(1) == '?')) ||
7104            ((RAW == '<') && (NXT(1) == '!')) ||
7105 	   (RAW == '%') || IS_BLANK_CH(CUR)) {
7106 	const xmlChar *check = CUR_PTR;
7107 	unsigned int cons = ctxt->input->consumed;
7108 
7109 	GROW;
7110         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7111 	    xmlParseConditionalSections(ctxt);
7112 	} else if (IS_BLANK_CH(CUR)) {
7113 	    NEXT;
7114 	} else if (RAW == '%') {
7115             xmlParsePEReference(ctxt);
7116 	} else
7117 	    xmlParseMarkupDecl(ctxt);
7118 
7119 	/*
7120 	 * Pop-up of finished entities.
7121 	 */
7122 	while ((RAW == 0) && (ctxt->inputNr > 1))
7123 	    xmlPopInput(ctxt);
7124 
7125 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7126 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7127 	    break;
7128 	}
7129     }
7130 
7131     if (RAW != 0) {
7132 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7133     }
7134 
7135 }
7136 
7137 /**
7138  * xmlParseReference:
7139  * @ctxt:  an XML parser context
7140  *
7141  * parse and handle entity references in content, depending on the SAX
7142  * interface, this may end-up in a call to character() if this is a
7143  * CharRef, a predefined entity, if there is no reference() callback.
7144  * or if the parser was asked to switch to that mode.
7145  *
7146  * [67] Reference ::= EntityRef | CharRef
7147  */
7148 void
xmlParseReference(xmlParserCtxtPtr ctxt)7149 xmlParseReference(xmlParserCtxtPtr ctxt) {
7150     xmlEntityPtr ent;
7151     xmlChar *val;
7152     int was_checked;
7153     xmlNodePtr list = NULL;
7154     xmlParserErrors ret = XML_ERR_OK;
7155 
7156 
7157     if (RAW != '&')
7158         return;
7159 
7160     /*
7161      * Simple case of a CharRef
7162      */
7163     if (NXT(1) == '#') {
7164 	int i = 0;
7165 	xmlChar out[10];
7166 	int hex = NXT(2);
7167 	int value = xmlParseCharRef(ctxt);
7168 
7169 	if (value == 0)
7170 	    return;
7171 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7172 	    /*
7173 	     * So we are using non-UTF-8 buffers
7174 	     * Check that the char fit on 8bits, if not
7175 	     * generate a CharRef.
7176 	     */
7177 	    if (value <= 0xFF) {
7178 		out[0] = value;
7179 		out[1] = 0;
7180 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7181 		    (!ctxt->disableSAX))
7182 		    ctxt->sax->characters(ctxt->userData, out, 1);
7183 	    } else {
7184 		if ((hex == 'x') || (hex == 'X'))
7185 		    snprintf((char *)out, sizeof(out), "#x%X", value);
7186 		else
7187 		    snprintf((char *)out, sizeof(out), "#%d", value);
7188 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7189 		    (!ctxt->disableSAX))
7190 		    ctxt->sax->reference(ctxt->userData, out);
7191 	    }
7192 	} else {
7193 	    /*
7194 	     * Just encode the value in UTF-8
7195 	     */
7196 	    COPY_BUF(0 ,out, i, value);
7197 	    out[i] = 0;
7198 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7199 		(!ctxt->disableSAX))
7200 		ctxt->sax->characters(ctxt->userData, out, i);
7201 	}
7202 	return;
7203     }
7204 
7205     /*
7206      * We are seeing an entity reference
7207      */
7208     ent = xmlParseEntityRef(ctxt);
7209     if (ent == NULL) return;
7210     if (!ctxt->wellFormed)
7211 	return;
7212     was_checked = ent->checked;
7213 
7214     /* special case of predefined entities */
7215     if ((ent->name == NULL) ||
7216         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7217 	val = ent->content;
7218 	if (val == NULL) return;
7219 	/*
7220 	 * inline the entity.
7221 	 */
7222 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223 	    (!ctxt->disableSAX))
7224 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7225 	return;
7226     }
7227 
7228     /*
7229      * The first reference to the entity trigger a parsing phase
7230      * where the ent->children is filled with the result from
7231      * the parsing.
7232      * Note: external parsed entities will not be loaded, it is not
7233      * required for a non-validating parser, unless the parsing option
7234      * of validating, or substituting entities were given. Doing so is
7235      * far more secure as the parser will only process data coming from
7236      * the document entity by default.
7237      */
7238     if (((ent->checked == 0) ||
7239          ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7240         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7241          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7242 	unsigned long oldnbent = ctxt->nbentities;
7243 
7244 	/*
7245 	 * This is a bit hackish but this seems the best
7246 	 * way to make sure both SAX and DOM entity support
7247 	 * behaves okay.
7248 	 */
7249 	void *user_data;
7250 	if (ctxt->userData == ctxt)
7251 	    user_data = NULL;
7252 	else
7253 	    user_data = ctxt->userData;
7254 
7255 	/*
7256 	 * Check that this entity is well formed
7257 	 * 4.3.2: An internal general parsed entity is well-formed
7258 	 * if its replacement text matches the production labeled
7259 	 * content.
7260 	 */
7261 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7262 	    ctxt->depth++;
7263 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7264 	                                              user_data, &list);
7265 	    ctxt->depth--;
7266 
7267 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7268 	    ctxt->depth++;
7269 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7270 	                                   user_data, ctxt->depth, ent->URI,
7271 					   ent->ExternalID, &list);
7272 	    ctxt->depth--;
7273 	} else {
7274 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7275 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7276 			 "invalid entity type found\n", NULL);
7277 	}
7278 
7279 	/*
7280 	 * Store the number of entities needing parsing for this entity
7281 	 * content and do checkings
7282 	 */
7283 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7284 	if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7285 	    ent->checked |= 1;
7286 	if (ret == XML_ERR_ENTITY_LOOP) {
7287 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7288 	    xmlFreeNodeList(list);
7289 	    return;
7290 	}
7291 	if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7292 	    xmlFreeNodeList(list);
7293 	    return;
7294 	}
7295 
7296 	if ((ret == XML_ERR_OK) && (list != NULL)) {
7297 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7298 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7299 		(ent->children == NULL)) {
7300 		ent->children = list;
7301 		if (ctxt->replaceEntities) {
7302 		    /*
7303 		     * Prune it directly in the generated document
7304 		     * except for single text nodes.
7305 		     */
7306 		    if (((list->type == XML_TEXT_NODE) &&
7307 			 (list->next == NULL)) ||
7308 			(ctxt->parseMode == XML_PARSE_READER)) {
7309 			list->parent = (xmlNodePtr) ent;
7310 			list = NULL;
7311 			ent->owner = 1;
7312 		    } else {
7313 			ent->owner = 0;
7314 			while (list != NULL) {
7315 			    list->parent = (xmlNodePtr) ctxt->node;
7316 			    list->doc = ctxt->myDoc;
7317 			    if (list->next == NULL)
7318 				ent->last = list;
7319 			    list = list->next;
7320 			}
7321 			list = ent->children;
7322 #ifdef LIBXML_LEGACY_ENABLED
7323 			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7324 			  xmlAddEntityReference(ent, list, NULL);
7325 #endif /* LIBXML_LEGACY_ENABLED */
7326 		    }
7327 		} else {
7328 		    ent->owner = 1;
7329 		    while (list != NULL) {
7330 			list->parent = (xmlNodePtr) ent;
7331 			xmlSetTreeDoc(list, ent->doc);
7332 			if (list->next == NULL)
7333 			    ent->last = list;
7334 			list = list->next;
7335 		    }
7336 		}
7337 	    } else {
7338 		xmlFreeNodeList(list);
7339 		list = NULL;
7340 	    }
7341 	} else if ((ret != XML_ERR_OK) &&
7342 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7343 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7344 		     "Entity '%s' failed to parse\n", ent->name);
7345 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
7346 	} else if (list != NULL) {
7347 	    xmlFreeNodeList(list);
7348 	    list = NULL;
7349 	}
7350 	if (ent->checked == 0)
7351 	    ent->checked = 2;
7352     } else if (ent->checked != 1) {
7353 	ctxt->nbentities += ent->checked / 2;
7354     }
7355 
7356     /*
7357      * Now that the entity content has been gathered
7358      * provide it to the application, this can take different forms based
7359      * on the parsing modes.
7360      */
7361     if (ent->children == NULL) {
7362 	/*
7363 	 * Probably running in SAX mode and the callbacks don't
7364 	 * build the entity content. So unless we already went
7365 	 * though parsing for first checking go though the entity
7366 	 * content to generate callbacks associated to the entity
7367 	 */
7368 	if (was_checked != 0) {
7369 	    void *user_data;
7370 	    /*
7371 	     * This is a bit hackish but this seems the best
7372 	     * way to make sure both SAX and DOM entity support
7373 	     * behaves okay.
7374 	     */
7375 	    if (ctxt->userData == ctxt)
7376 		user_data = NULL;
7377 	    else
7378 		user_data = ctxt->userData;
7379 
7380 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7381 		ctxt->depth++;
7382 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7383 				   ent->content, user_data, NULL);
7384 		ctxt->depth--;
7385 	    } else if (ent->etype ==
7386 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7387 		ctxt->depth++;
7388 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7389 			   ctxt->sax, user_data, ctxt->depth,
7390 			   ent->URI, ent->ExternalID, NULL);
7391 		ctxt->depth--;
7392 	    } else {
7393 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7394 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7395 			     "invalid entity type found\n", NULL);
7396 	    }
7397 	    if (ret == XML_ERR_ENTITY_LOOP) {
7398 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7399 		return;
7400 	    }
7401 	}
7402 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7403 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7404 	    /*
7405 	     * Entity reference callback comes second, it's somewhat
7406 	     * superfluous but a compatibility to historical behaviour
7407 	     */
7408 	    ctxt->sax->reference(ctxt->userData, ent->name);
7409 	}
7410 	return;
7411     }
7412 
7413     /*
7414      * If we didn't get any children for the entity being built
7415      */
7416     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7417 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7418 	/*
7419 	 * Create a node.
7420 	 */
7421 	ctxt->sax->reference(ctxt->userData, ent->name);
7422 	return;
7423     }
7424 
7425     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7426 	/*
7427 	 * There is a problem on the handling of _private for entities
7428 	 * (bug 155816): Should we copy the content of the field from
7429 	 * the entity (possibly overwriting some value set by the user
7430 	 * when a copy is created), should we leave it alone, or should
7431 	 * we try to take care of different situations?  The problem
7432 	 * is exacerbated by the usage of this field by the xmlReader.
7433 	 * To fix this bug, we look at _private on the created node
7434 	 * and, if it's NULL, we copy in whatever was in the entity.
7435 	 * If it's not NULL we leave it alone.  This is somewhat of a
7436 	 * hack - maybe we should have further tests to determine
7437 	 * what to do.
7438 	 */
7439 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7440 	    /*
7441 	     * Seems we are generating the DOM content, do
7442 	     * a simple tree copy for all references except the first
7443 	     * In the first occurrence list contains the replacement.
7444 	     */
7445 	    if (((list == NULL) && (ent->owner == 0)) ||
7446 		(ctxt->parseMode == XML_PARSE_READER)) {
7447 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7448 
7449 		/*
7450 		 * We are copying here, make sure there is no abuse
7451 		 */
7452 		ctxt->sizeentcopy += ent->length + 5;
7453 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7454 		    return;
7455 
7456 		/*
7457 		 * when operating on a reader, the entities definitions
7458 		 * are always owning the entities subtree.
7459 		if (ctxt->parseMode == XML_PARSE_READER)
7460 		    ent->owner = 1;
7461 		 */
7462 
7463 		cur = ent->children;
7464 		while (cur != NULL) {
7465 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7466 		    if (nw != NULL) {
7467 			if (nw->_private == NULL)
7468 			    nw->_private = cur->_private;
7469 			if (firstChild == NULL){
7470 			    firstChild = nw;
7471 			}
7472 			nw = xmlAddChild(ctxt->node, nw);
7473 		    }
7474 		    if (cur == ent->last) {
7475 			/*
7476 			 * needed to detect some strange empty
7477 			 * node cases in the reader tests
7478 			 */
7479 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7480 			    (nw != NULL) &&
7481 			    (nw->type == XML_ELEMENT_NODE) &&
7482 			    (nw->children == NULL))
7483 			    nw->extra = 1;
7484 
7485 			break;
7486 		    }
7487 		    cur = cur->next;
7488 		}
7489 #ifdef LIBXML_LEGACY_ENABLED
7490 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7491 		  xmlAddEntityReference(ent, firstChild, nw);
7492 #endif /* LIBXML_LEGACY_ENABLED */
7493 	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7494 		xmlNodePtr nw = NULL, cur, next, last,
7495 			   firstChild = NULL;
7496 
7497 		/*
7498 		 * We are copying here, make sure there is no abuse
7499 		 */
7500 		ctxt->sizeentcopy += ent->length + 5;
7501 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7502 		    return;
7503 
7504 		/*
7505 		 * Copy the entity child list and make it the new
7506 		 * entity child list. The goal is to make sure any
7507 		 * ID or REF referenced will be the one from the
7508 		 * document content and not the entity copy.
7509 		 */
7510 		cur = ent->children;
7511 		ent->children = NULL;
7512 		last = ent->last;
7513 		ent->last = NULL;
7514 		while (cur != NULL) {
7515 		    next = cur->next;
7516 		    cur->next = NULL;
7517 		    cur->parent = NULL;
7518 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7519 		    if (nw != NULL) {
7520 			if (nw->_private == NULL)
7521 			    nw->_private = cur->_private;
7522 			if (firstChild == NULL){
7523 			    firstChild = cur;
7524 			}
7525 			xmlAddChild((xmlNodePtr) ent, nw);
7526 			xmlAddChild(ctxt->node, cur);
7527 		    }
7528 		    if (cur == last)
7529 			break;
7530 		    cur = next;
7531 		}
7532 		if (ent->owner == 0)
7533 		    ent->owner = 1;
7534 #ifdef LIBXML_LEGACY_ENABLED
7535 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7536 		  xmlAddEntityReference(ent, firstChild, nw);
7537 #endif /* LIBXML_LEGACY_ENABLED */
7538 	    } else {
7539 		const xmlChar *nbktext;
7540 
7541 		/*
7542 		 * the name change is to avoid coalescing of the
7543 		 * node with a possible previous text one which
7544 		 * would make ent->children a dangling pointer
7545 		 */
7546 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7547 					-1);
7548 		if (ent->children->type == XML_TEXT_NODE)
7549 		    ent->children->name = nbktext;
7550 		if ((ent->last != ent->children) &&
7551 		    (ent->last->type == XML_TEXT_NODE))
7552 		    ent->last->name = nbktext;
7553 		xmlAddChildList(ctxt->node, ent->children);
7554 	    }
7555 
7556 	    /*
7557 	     * This is to avoid a nasty side effect, see
7558 	     * characters() in SAX.c
7559 	     */
7560 	    ctxt->nodemem = 0;
7561 	    ctxt->nodelen = 0;
7562 	    return;
7563 	}
7564     }
7565 }
7566 
7567 /**
7568  * xmlParseEntityRef:
7569  * @ctxt:  an XML parser context
7570  *
7571  * parse ENTITY references declarations
7572  *
7573  * [68] EntityRef ::= '&' Name ';'
7574  *
7575  * [ WFC: Entity Declared ]
7576  * In a document without any DTD, a document with only an internal DTD
7577  * subset which contains no parameter entity references, or a document
7578  * with "standalone='yes'", the Name given in the entity reference
7579  * must match that in an entity declaration, except that well-formed
7580  * documents need not declare any of the following entities: amp, lt,
7581  * gt, apos, quot.  The declaration of a parameter entity must precede
7582  * any reference to it.  Similarly, the declaration of a general entity
7583  * must precede any reference to it which appears in a default value in an
7584  * attribute-list declaration. Note that if entities are declared in the
7585  * external subset or in external parameter entities, a non-validating
7586  * processor is not obligated to read and process their declarations;
7587  * for such documents, the rule that an entity must be declared is a
7588  * well-formedness constraint only if standalone='yes'.
7589  *
7590  * [ WFC: Parsed Entity ]
7591  * An entity reference must not contain the name of an unparsed entity
7592  *
7593  * Returns the xmlEntityPtr if found, or NULL otherwise.
7594  */
7595 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7596 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7597     const xmlChar *name;
7598     xmlEntityPtr ent = NULL;
7599 
7600     GROW;
7601     if (ctxt->instate == XML_PARSER_EOF)
7602         return(NULL);
7603 
7604     if (RAW != '&')
7605         return(NULL);
7606     NEXT;
7607     name = xmlParseName(ctxt);
7608     if (name == NULL) {
7609 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7610 		       "xmlParseEntityRef: no name\n");
7611         return(NULL);
7612     }
7613     if (RAW != ';') {
7614 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7615 	return(NULL);
7616     }
7617     NEXT;
7618 
7619     /*
7620      * Predefined entities override any extra definition
7621      */
7622     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7623         ent = xmlGetPredefinedEntity(name);
7624         if (ent != NULL)
7625             return(ent);
7626     }
7627 
7628     /*
7629      * Increase the number of entity references parsed
7630      */
7631     ctxt->nbentities++;
7632 
7633     /*
7634      * Ask first SAX for entity resolution, otherwise try the
7635      * entities which may have stored in the parser context.
7636      */
7637     if (ctxt->sax != NULL) {
7638 	if (ctxt->sax->getEntity != NULL)
7639 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7640 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7641 	    (ctxt->options & XML_PARSE_OLDSAX))
7642 	    ent = xmlGetPredefinedEntity(name);
7643 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7644 	    (ctxt->userData==ctxt)) {
7645 	    ent = xmlSAX2GetEntity(ctxt, name);
7646 	}
7647     }
7648     if (ctxt->instate == XML_PARSER_EOF)
7649 	return(NULL);
7650     /*
7651      * [ WFC: Entity Declared ]
7652      * In a document without any DTD, a document with only an
7653      * internal DTD subset which contains no parameter entity
7654      * references, or a document with "standalone='yes'", the
7655      * Name given in the entity reference must match that in an
7656      * entity declaration, except that well-formed documents
7657      * need not declare any of the following entities: amp, lt,
7658      * gt, apos, quot.
7659      * The declaration of a parameter entity must precede any
7660      * reference to it.
7661      * Similarly, the declaration of a general entity must
7662      * precede any reference to it which appears in a default
7663      * value in an attribute-list declaration. Note that if
7664      * entities are declared in the external subset or in
7665      * external parameter entities, a non-validating processor
7666      * is not obligated to read and process their declarations;
7667      * for such documents, the rule that an entity must be
7668      * declared is a well-formedness constraint only if
7669      * standalone='yes'.
7670      */
7671     if (ent == NULL) {
7672 	if ((ctxt->standalone == 1) ||
7673 	    ((ctxt->hasExternalSubset == 0) &&
7674 	     (ctxt->hasPErefs == 0))) {
7675 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7676 		     "Entity '%s' not defined\n", name);
7677 	} else {
7678 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7679 		     "Entity '%s' not defined\n", name);
7680 	    if ((ctxt->inSubset == 0) &&
7681 		(ctxt->sax != NULL) &&
7682 		(ctxt->sax->reference != NULL)) {
7683 		ctxt->sax->reference(ctxt->userData, name);
7684 	    }
7685 	}
7686 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7687 	ctxt->valid = 0;
7688     }
7689 
7690     /*
7691      * [ WFC: Parsed Entity ]
7692      * An entity reference must not contain the name of an
7693      * unparsed entity
7694      */
7695     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7696 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7697 		 "Entity reference to unparsed entity %s\n", name);
7698     }
7699 
7700     /*
7701      * [ WFC: No External Entity References ]
7702      * Attribute values cannot contain direct or indirect
7703      * entity references to external entities.
7704      */
7705     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7706 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7707 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7708 	     "Attribute references external entity '%s'\n", name);
7709     }
7710     /*
7711      * [ WFC: No < in Attribute Values ]
7712      * The replacement text of any entity referred to directly or
7713      * indirectly in an attribute value (other than "&lt;") must
7714      * not contain a <.
7715      */
7716     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7717 	     (ent != NULL) &&
7718 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7719 	if (((ent->checked & 1) || (ent->checked == 0)) &&
7720 	     (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7721 	    xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7722 	"'<' in entity '%s' is not allowed in attributes values\n", name);
7723         }
7724     }
7725 
7726     /*
7727      * Internal check, no parameter entities here ...
7728      */
7729     else {
7730 	switch (ent->etype) {
7731 	    case XML_INTERNAL_PARAMETER_ENTITY:
7732 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7733 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7734 	     "Attempt to reference the parameter entity '%s'\n",
7735 			      name);
7736 	    break;
7737 	    default:
7738 	    break;
7739 	}
7740     }
7741 
7742     /*
7743      * [ WFC: No Recursion ]
7744      * A parsed entity must not contain a recursive reference
7745      * to itself, either directly or indirectly.
7746      * Done somewhere else
7747      */
7748     return(ent);
7749 }
7750 
7751 /**
7752  * xmlParseStringEntityRef:
7753  * @ctxt:  an XML parser context
7754  * @str:  a pointer to an index in the string
7755  *
7756  * parse ENTITY references declarations, but this version parses it from
7757  * a string value.
7758  *
7759  * [68] EntityRef ::= '&' Name ';'
7760  *
7761  * [ WFC: Entity Declared ]
7762  * In a document without any DTD, a document with only an internal DTD
7763  * subset which contains no parameter entity references, or a document
7764  * with "standalone='yes'", the Name given in the entity reference
7765  * must match that in an entity declaration, except that well-formed
7766  * documents need not declare any of the following entities: amp, lt,
7767  * gt, apos, quot.  The declaration of a parameter entity must precede
7768  * any reference to it.  Similarly, the declaration of a general entity
7769  * must precede any reference to it which appears in a default value in an
7770  * attribute-list declaration. Note that if entities are declared in the
7771  * external subset or in external parameter entities, a non-validating
7772  * processor is not obligated to read and process their declarations;
7773  * for such documents, the rule that an entity must be declared is a
7774  * well-formedness constraint only if standalone='yes'.
7775  *
7776  * [ WFC: Parsed Entity ]
7777  * An entity reference must not contain the name of an unparsed entity
7778  *
7779  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7780  * is updated to the current location in the string.
7781  */
7782 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7783 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7784     xmlChar *name;
7785     const xmlChar *ptr;
7786     xmlChar cur;
7787     xmlEntityPtr ent = NULL;
7788 
7789     if ((str == NULL) || (*str == NULL))
7790         return(NULL);
7791     ptr = *str;
7792     cur = *ptr;
7793     if (cur != '&')
7794 	return(NULL);
7795 
7796     ptr++;
7797     name = xmlParseStringName(ctxt, &ptr);
7798     if (name == NULL) {
7799 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7800 		       "xmlParseStringEntityRef: no name\n");
7801 	*str = ptr;
7802 	return(NULL);
7803     }
7804     if (*ptr != ';') {
7805 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7806         xmlFree(name);
7807 	*str = ptr;
7808 	return(NULL);
7809     }
7810     ptr++;
7811 
7812 
7813     /*
7814      * Predefined entities override any extra definition
7815      */
7816     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7817         ent = xmlGetPredefinedEntity(name);
7818         if (ent != NULL) {
7819             xmlFree(name);
7820             *str = ptr;
7821             return(ent);
7822         }
7823     }
7824 
7825     /*
7826      * Increate the number of entity references parsed
7827      */
7828     ctxt->nbentities++;
7829 
7830     /*
7831      * Ask first SAX for entity resolution, otherwise try the
7832      * entities which may have stored in the parser context.
7833      */
7834     if (ctxt->sax != NULL) {
7835 	if (ctxt->sax->getEntity != NULL)
7836 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7837 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7838 	    ent = xmlGetPredefinedEntity(name);
7839 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7840 	    ent = xmlSAX2GetEntity(ctxt, name);
7841 	}
7842     }
7843     if (ctxt->instate == XML_PARSER_EOF) {
7844 	xmlFree(name);
7845 	return(NULL);
7846     }
7847 
7848     /*
7849      * [ WFC: Entity Declared ]
7850      * In a document without any DTD, a document with only an
7851      * internal DTD subset which contains no parameter entity
7852      * references, or a document with "standalone='yes'", the
7853      * Name given in the entity reference must match that in an
7854      * entity declaration, except that well-formed documents
7855      * need not declare any of the following entities: amp, lt,
7856      * gt, apos, quot.
7857      * The declaration of a parameter entity must precede any
7858      * reference to it.
7859      * Similarly, the declaration of a general entity must
7860      * precede any reference to it which appears in a default
7861      * value in an attribute-list declaration. Note that if
7862      * entities are declared in the external subset or in
7863      * external parameter entities, a non-validating processor
7864      * is not obligated to read and process their declarations;
7865      * for such documents, the rule that an entity must be
7866      * declared is a well-formedness constraint only if
7867      * standalone='yes'.
7868      */
7869     if (ent == NULL) {
7870 	if ((ctxt->standalone == 1) ||
7871 	    ((ctxt->hasExternalSubset == 0) &&
7872 	     (ctxt->hasPErefs == 0))) {
7873 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7874 		     "Entity '%s' not defined\n", name);
7875 	} else {
7876 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7877 			  "Entity '%s' not defined\n",
7878 			  name);
7879 	}
7880 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7881 	/* TODO ? check regressions ctxt->valid = 0; */
7882     }
7883 
7884     /*
7885      * [ WFC: Parsed Entity ]
7886      * An entity reference must not contain the name of an
7887      * unparsed entity
7888      */
7889     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7890 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7891 		 "Entity reference to unparsed entity %s\n", name);
7892     }
7893 
7894     /*
7895      * [ WFC: No External Entity References ]
7896      * Attribute values cannot contain direct or indirect
7897      * entity references to external entities.
7898      */
7899     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7900 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7901 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7902 	 "Attribute references external entity '%s'\n", name);
7903     }
7904     /*
7905      * [ WFC: No < in Attribute Values ]
7906      * The replacement text of any entity referred to directly or
7907      * indirectly in an attribute value (other than "&lt;") must
7908      * not contain a <.
7909      */
7910     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7911 	     (ent != NULL) && (ent->content != NULL) &&
7912 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7913 	     (xmlStrchr(ent->content, '<'))) {
7914 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7915      "'<' in entity '%s' is not allowed in attributes values\n",
7916 			  name);
7917     }
7918 
7919     /*
7920      * Internal check, no parameter entities here ...
7921      */
7922     else {
7923 	switch (ent->etype) {
7924 	    case XML_INTERNAL_PARAMETER_ENTITY:
7925 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7926 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7927 	     "Attempt to reference the parameter entity '%s'\n",
7928 				  name);
7929 	    break;
7930 	    default:
7931 	    break;
7932 	}
7933     }
7934 
7935     /*
7936      * [ WFC: No Recursion ]
7937      * A parsed entity must not contain a recursive reference
7938      * to itself, either directly or indirectly.
7939      * Done somewhere else
7940      */
7941 
7942     xmlFree(name);
7943     *str = ptr;
7944     return(ent);
7945 }
7946 
7947 /**
7948  * xmlParsePEReference:
7949  * @ctxt:  an XML parser context
7950  *
7951  * parse PEReference declarations
7952  * The entity content is handled directly by pushing it's content as
7953  * a new input stream.
7954  *
7955  * [69] PEReference ::= '%' Name ';'
7956  *
7957  * [ WFC: No Recursion ]
7958  * A parsed entity must not contain a recursive
7959  * reference to itself, either directly or indirectly.
7960  *
7961  * [ WFC: Entity Declared ]
7962  * In a document without any DTD, a document with only an internal DTD
7963  * subset which contains no parameter entity references, or a document
7964  * with "standalone='yes'", ...  ... The declaration of a parameter
7965  * entity must precede any reference to it...
7966  *
7967  * [ VC: Entity Declared ]
7968  * In a document with an external subset or external parameter entities
7969  * with "standalone='no'", ...  ... The declaration of a parameter entity
7970  * must precede any reference to it...
7971  *
7972  * [ WFC: In DTD ]
7973  * Parameter-entity references may only appear in the DTD.
7974  * NOTE: misleading but this is handled.
7975  */
7976 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7977 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7978 {
7979     const xmlChar *name;
7980     xmlEntityPtr entity = NULL;
7981     xmlParserInputPtr input;
7982 
7983     if (RAW != '%')
7984         return;
7985     NEXT;
7986     name = xmlParseName(ctxt);
7987     if (name == NULL) {
7988 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7989 		       "xmlParsePEReference: no name\n");
7990 	return;
7991     }
7992     if (RAW != ';') {
7993 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7994         return;
7995     }
7996 
7997     NEXT;
7998 
7999     /*
8000      * Increate the number of entity references parsed
8001      */
8002     ctxt->nbentities++;
8003 
8004     /*
8005      * Request the entity from SAX
8006      */
8007     if ((ctxt->sax != NULL) &&
8008 	(ctxt->sax->getParameterEntity != NULL))
8009 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8010     if (ctxt->instate == XML_PARSER_EOF)
8011 	return;
8012     if (entity == NULL) {
8013 	/*
8014 	 * [ WFC: Entity Declared ]
8015 	 * In a document without any DTD, a document with only an
8016 	 * internal DTD subset which contains no parameter entity
8017 	 * references, or a document with "standalone='yes'", ...
8018 	 * ... The declaration of a parameter entity must precede
8019 	 * any reference to it...
8020 	 */
8021 	if ((ctxt->standalone == 1) ||
8022 	    ((ctxt->hasExternalSubset == 0) &&
8023 	     (ctxt->hasPErefs == 0))) {
8024 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8025 			      "PEReference: %%%s; not found\n",
8026 			      name);
8027 	} else {
8028 	    /*
8029 	     * [ VC: Entity Declared ]
8030 	     * In a document with an external subset or external
8031 	     * parameter entities with "standalone='no'", ...
8032 	     * ... The declaration of a parameter entity must
8033 	     * precede any reference to it...
8034 	     */
8035 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8036 			  "PEReference: %%%s; not found\n",
8037 			  name, NULL);
8038 	    ctxt->valid = 0;
8039 	}
8040 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8041     } else {
8042 	/*
8043 	 * Internal checking in case the entity quest barfed
8044 	 */
8045 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8046 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8047 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8048 		  "Internal: %%%s; is not a parameter entity\n",
8049 			  name, NULL);
8050 	} else if (ctxt->input->free != deallocblankswrapper) {
8051 	    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8052 	    if (xmlPushInput(ctxt, input) < 0)
8053 		return;
8054 	} else {
8055 	    /*
8056 	     * TODO !!!
8057 	     * handle the extra spaces added before and after
8058 	     * c.f. http://www.w3.org/TR/REC-xml#as-PE
8059 	     */
8060 	    input = xmlNewEntityInputStream(ctxt, entity);
8061 	    if (xmlPushInput(ctxt, input) < 0)
8062 		return;
8063 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8064 		(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8065 		(IS_BLANK_CH(NXT(5)))) {
8066 		xmlParseTextDecl(ctxt);
8067 		if (ctxt->errNo ==
8068 		    XML_ERR_UNSUPPORTED_ENCODING) {
8069 		    /*
8070 		     * The XML REC instructs us to stop parsing
8071 		     * right here
8072 		     */
8073 		    ctxt->instate = XML_PARSER_EOF;
8074 		    return;
8075 		}
8076 	    }
8077 	}
8078     }
8079     ctxt->hasPErefs = 1;
8080 }
8081 
8082 /**
8083  * xmlLoadEntityContent:
8084  * @ctxt:  an XML parser context
8085  * @entity: an unloaded system entity
8086  *
8087  * Load the original content of the given system entity from the
8088  * ExternalID/SystemID given. This is to be used for Included in Literal
8089  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8090  *
8091  * Returns 0 in case of success and -1 in case of failure
8092  */
8093 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8094 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8095     xmlParserInputPtr input;
8096     xmlBufferPtr buf;
8097     int l, c;
8098     int count = 0;
8099 
8100     if ((ctxt == NULL) || (entity == NULL) ||
8101         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8102 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8103 	(entity->content != NULL)) {
8104 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8105 	            "xmlLoadEntityContent parameter error");
8106         return(-1);
8107     }
8108 
8109     if (xmlParserDebugEntities)
8110 	xmlGenericError(xmlGenericErrorContext,
8111 		"Reading %s entity content input\n", entity->name);
8112 
8113     buf = xmlBufferCreate();
8114     if (buf == NULL) {
8115 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8116 	            "xmlLoadEntityContent parameter error");
8117         return(-1);
8118     }
8119 
8120     input = xmlNewEntityInputStream(ctxt, entity);
8121     if (input == NULL) {
8122 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8123 	            "xmlLoadEntityContent input error");
8124 	xmlBufferFree(buf);
8125         return(-1);
8126     }
8127 
8128     /*
8129      * Push the entity as the current input, read char by char
8130      * saving to the buffer until the end of the entity or an error
8131      */
8132     if (xmlPushInput(ctxt, input) < 0) {
8133         xmlBufferFree(buf);
8134 	return(-1);
8135     }
8136 
8137     GROW;
8138     c = CUR_CHAR(l);
8139     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8140            (IS_CHAR(c))) {
8141         xmlBufferAdd(buf, ctxt->input->cur, l);
8142 	if (count++ > XML_PARSER_CHUNK_SIZE) {
8143 	    count = 0;
8144 	    GROW;
8145             if (ctxt->instate == XML_PARSER_EOF) {
8146                 xmlBufferFree(buf);
8147                 return(-1);
8148             }
8149 	}
8150 	NEXTL(l);
8151 	c = CUR_CHAR(l);
8152 	if (c == 0) {
8153 	    count = 0;
8154 	    GROW;
8155             if (ctxt->instate == XML_PARSER_EOF) {
8156                 xmlBufferFree(buf);
8157                 return(-1);
8158             }
8159 	    c = CUR_CHAR(l);
8160 	}
8161     }
8162 
8163     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8164         xmlPopInput(ctxt);
8165     } else if (!IS_CHAR(c)) {
8166         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8167                           "xmlLoadEntityContent: invalid char value %d\n",
8168 	                  c);
8169 	xmlBufferFree(buf);
8170 	return(-1);
8171     }
8172     entity->content = buf->content;
8173     buf->content = NULL;
8174     xmlBufferFree(buf);
8175 
8176     return(0);
8177 }
8178 
8179 /**
8180  * xmlParseStringPEReference:
8181  * @ctxt:  an XML parser context
8182  * @str:  a pointer to an index in the string
8183  *
8184  * parse PEReference declarations
8185  *
8186  * [69] PEReference ::= '%' Name ';'
8187  *
8188  * [ WFC: No Recursion ]
8189  * A parsed entity must not contain a recursive
8190  * reference to itself, either directly or indirectly.
8191  *
8192  * [ WFC: Entity Declared ]
8193  * In a document without any DTD, a document with only an internal DTD
8194  * subset which contains no parameter entity references, or a document
8195  * with "standalone='yes'", ...  ... The declaration of a parameter
8196  * entity must precede any reference to it...
8197  *
8198  * [ VC: Entity Declared ]
8199  * In a document with an external subset or external parameter entities
8200  * with "standalone='no'", ...  ... The declaration of a parameter entity
8201  * must precede any reference to it...
8202  *
8203  * [ WFC: In DTD ]
8204  * Parameter-entity references may only appear in the DTD.
8205  * NOTE: misleading but this is handled.
8206  *
8207  * Returns the string of the entity content.
8208  *         str is updated to the current value of the index
8209  */
8210 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8211 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8212     const xmlChar *ptr;
8213     xmlChar cur;
8214     xmlChar *name;
8215     xmlEntityPtr entity = NULL;
8216 
8217     if ((str == NULL) || (*str == NULL)) return(NULL);
8218     ptr = *str;
8219     cur = *ptr;
8220     if (cur != '%')
8221         return(NULL);
8222     ptr++;
8223     name = xmlParseStringName(ctxt, &ptr);
8224     if (name == NULL) {
8225 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8226 		       "xmlParseStringPEReference: no name\n");
8227 	*str = ptr;
8228 	return(NULL);
8229     }
8230     cur = *ptr;
8231     if (cur != ';') {
8232 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8233 	xmlFree(name);
8234 	*str = ptr;
8235 	return(NULL);
8236     }
8237     ptr++;
8238 
8239     /*
8240      * Increate the number of entity references parsed
8241      */
8242     ctxt->nbentities++;
8243 
8244     /*
8245      * Request the entity from SAX
8246      */
8247     if ((ctxt->sax != NULL) &&
8248 	(ctxt->sax->getParameterEntity != NULL))
8249 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8250     if (ctxt->instate == XML_PARSER_EOF) {
8251 	xmlFree(name);
8252 	return(NULL);
8253     }
8254     if (entity == NULL) {
8255 	/*
8256 	 * [ WFC: Entity Declared ]
8257 	 * In a document without any DTD, a document with only an
8258 	 * internal DTD subset which contains no parameter entity
8259 	 * references, or a document with "standalone='yes'", ...
8260 	 * ... The declaration of a parameter entity must precede
8261 	 * any reference to it...
8262 	 */
8263 	if ((ctxt->standalone == 1) ||
8264 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8265 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8266 		 "PEReference: %%%s; not found\n", name);
8267 	} else {
8268 	    /*
8269 	     * [ VC: Entity Declared ]
8270 	     * In a document with an external subset or external
8271 	     * parameter entities with "standalone='no'", ...
8272 	     * ... The declaration of a parameter entity must
8273 	     * precede any reference to it...
8274 	     */
8275 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8276 			  "PEReference: %%%s; not found\n",
8277 			  name, NULL);
8278 	    ctxt->valid = 0;
8279 	}
8280 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8281     } else {
8282 	/*
8283 	 * Internal checking in case the entity quest barfed
8284 	 */
8285 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8286 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8287 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8288 			  "%%%s; is not a parameter entity\n",
8289 			  name, NULL);
8290 	}
8291     }
8292     ctxt->hasPErefs = 1;
8293     xmlFree(name);
8294     *str = ptr;
8295     return(entity);
8296 }
8297 
8298 /**
8299  * xmlParseDocTypeDecl:
8300  * @ctxt:  an XML parser context
8301  *
8302  * parse a DOCTYPE declaration
8303  *
8304  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8305  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8306  *
8307  * [ VC: Root Element Type ]
8308  * The Name in the document type declaration must match the element
8309  * type of the root element.
8310  */
8311 
8312 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8313 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8314     const xmlChar *name = NULL;
8315     xmlChar *ExternalID = NULL;
8316     xmlChar *URI = NULL;
8317 
8318     /*
8319      * We know that '<!DOCTYPE' has been detected.
8320      */
8321     SKIP(9);
8322 
8323     SKIP_BLANKS;
8324 
8325     /*
8326      * Parse the DOCTYPE name.
8327      */
8328     name = xmlParseName(ctxt);
8329     if (name == NULL) {
8330 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8331 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8332     }
8333     ctxt->intSubName = name;
8334 
8335     SKIP_BLANKS;
8336 
8337     /*
8338      * Check for SystemID and ExternalID
8339      */
8340     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8341 
8342     if ((URI != NULL) || (ExternalID != NULL)) {
8343         ctxt->hasExternalSubset = 1;
8344     }
8345     ctxt->extSubURI = URI;
8346     ctxt->extSubSystem = ExternalID;
8347 
8348     SKIP_BLANKS;
8349 
8350     /*
8351      * Create and update the internal subset.
8352      */
8353     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8354 	(!ctxt->disableSAX))
8355 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8356     if (ctxt->instate == XML_PARSER_EOF)
8357 	return;
8358 
8359     /*
8360      * Is there any internal subset declarations ?
8361      * they are handled separately in xmlParseInternalSubset()
8362      */
8363     if (RAW == '[')
8364 	return;
8365 
8366     /*
8367      * We should be at the end of the DOCTYPE declaration.
8368      */
8369     if (RAW != '>') {
8370 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8371     }
8372     NEXT;
8373 }
8374 
8375 /**
8376  * xmlParseInternalSubset:
8377  * @ctxt:  an XML parser context
8378  *
8379  * parse the internal subset declaration
8380  *
8381  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8382  */
8383 
8384 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8385 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8386     /*
8387      * Is there any DTD definition ?
8388      */
8389     if (RAW == '[') {
8390         ctxt->instate = XML_PARSER_DTD;
8391         NEXT;
8392 	/*
8393 	 * Parse the succession of Markup declarations and
8394 	 * PEReferences.
8395 	 * Subsequence (markupdecl | PEReference | S)*
8396 	 */
8397 	while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8398 	    const xmlChar *check = CUR_PTR;
8399 	    unsigned int cons = ctxt->input->consumed;
8400 
8401 	    SKIP_BLANKS;
8402 	    xmlParseMarkupDecl(ctxt);
8403 	    xmlParsePEReference(ctxt);
8404 
8405 	    /*
8406 	     * Pop-up of finished entities.
8407 	     */
8408 	    while ((RAW == 0) && (ctxt->inputNr > 1))
8409 		xmlPopInput(ctxt);
8410 
8411 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8412 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8413 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8414 		break;
8415 	    }
8416 	}
8417 	if (RAW == ']') {
8418 	    NEXT;
8419 	    SKIP_BLANKS;
8420 	}
8421     }
8422 
8423     /*
8424      * We should be at the end of the DOCTYPE declaration.
8425      */
8426     if (RAW != '>') {
8427 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8428     }
8429     NEXT;
8430 }
8431 
8432 #ifdef LIBXML_SAX1_ENABLED
8433 /**
8434  * xmlParseAttribute:
8435  * @ctxt:  an XML parser context
8436  * @value:  a xmlChar ** used to store the value of the attribute
8437  *
8438  * parse an attribute
8439  *
8440  * [41] Attribute ::= Name Eq AttValue
8441  *
8442  * [ WFC: No External Entity References ]
8443  * Attribute values cannot contain direct or indirect entity references
8444  * to external entities.
8445  *
8446  * [ WFC: No < in Attribute Values ]
8447  * The replacement text of any entity referred to directly or indirectly in
8448  * an attribute value (other than "&lt;") must not contain a <.
8449  *
8450  * [ VC: Attribute Value Type ]
8451  * The attribute must have been declared; the value must be of the type
8452  * declared for it.
8453  *
8454  * [25] Eq ::= S? '=' S?
8455  *
8456  * With namespace:
8457  *
8458  * [NS 11] Attribute ::= QName Eq AttValue
8459  *
8460  * Also the case QName == xmlns:??? is handled independently as a namespace
8461  * definition.
8462  *
8463  * Returns the attribute name, and the value in *value.
8464  */
8465 
8466 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8467 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8468     const xmlChar *name;
8469     xmlChar *val;
8470 
8471     *value = NULL;
8472     GROW;
8473     name = xmlParseName(ctxt);
8474     if (name == NULL) {
8475 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8476 	               "error parsing attribute name\n");
8477         return(NULL);
8478     }
8479 
8480     /*
8481      * read the value
8482      */
8483     SKIP_BLANKS;
8484     if (RAW == '=') {
8485         NEXT;
8486 	SKIP_BLANKS;
8487 	val = xmlParseAttValue(ctxt);
8488 	ctxt->instate = XML_PARSER_CONTENT;
8489     } else {
8490 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8491 	       "Specification mandate value for attribute %s\n", name);
8492 	return(NULL);
8493     }
8494 
8495     /*
8496      * Check that xml:lang conforms to the specification
8497      * No more registered as an error, just generate a warning now
8498      * since this was deprecated in XML second edition
8499      */
8500     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8501 	if (!xmlCheckLanguageID(val)) {
8502 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8503 		          "Malformed value for xml:lang : %s\n",
8504 			  val, NULL);
8505 	}
8506     }
8507 
8508     /*
8509      * Check that xml:space conforms to the specification
8510      */
8511     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8512 	if (xmlStrEqual(val, BAD_CAST "default"))
8513 	    *(ctxt->space) = 0;
8514 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8515 	    *(ctxt->space) = 1;
8516 	else {
8517 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8518 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8519                                  val, NULL);
8520 	}
8521     }
8522 
8523     *value = val;
8524     return(name);
8525 }
8526 
8527 /**
8528  * xmlParseStartTag:
8529  * @ctxt:  an XML parser context
8530  *
8531  * parse a start of tag either for rule element or
8532  * EmptyElement. In both case we don't parse the tag closing chars.
8533  *
8534  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8535  *
8536  * [ WFC: Unique Att Spec ]
8537  * No attribute name may appear more than once in the same start-tag or
8538  * empty-element tag.
8539  *
8540  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8541  *
8542  * [ WFC: Unique Att Spec ]
8543  * No attribute name may appear more than once in the same start-tag or
8544  * empty-element tag.
8545  *
8546  * With namespace:
8547  *
8548  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8549  *
8550  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8551  *
8552  * Returns the element name parsed
8553  */
8554 
8555 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8556 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8557     const xmlChar *name;
8558     const xmlChar *attname;
8559     xmlChar *attvalue;
8560     const xmlChar **atts = ctxt->atts;
8561     int nbatts = 0;
8562     int maxatts = ctxt->maxatts;
8563     int i;
8564 
8565     if (RAW != '<') return(NULL);
8566     NEXT1;
8567 
8568     name = xmlParseName(ctxt);
8569     if (name == NULL) {
8570 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8571 	     "xmlParseStartTag: invalid element name\n");
8572         return(NULL);
8573     }
8574 
8575     /*
8576      * Now parse the attributes, it ends up with the ending
8577      *
8578      * (S Attribute)* S?
8579      */
8580     SKIP_BLANKS;
8581     GROW;
8582 
8583     while (((RAW != '>') &&
8584 	   ((RAW != '/') || (NXT(1) != '>')) &&
8585 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8586 	const xmlChar *q = CUR_PTR;
8587 	unsigned int cons = ctxt->input->consumed;
8588 
8589 	attname = xmlParseAttribute(ctxt, &attvalue);
8590         if ((attname != NULL) && (attvalue != NULL)) {
8591 	    /*
8592 	     * [ WFC: Unique Att Spec ]
8593 	     * No attribute name may appear more than once in the same
8594 	     * start-tag or empty-element tag.
8595 	     */
8596 	    for (i = 0; i < nbatts;i += 2) {
8597 	        if (xmlStrEqual(atts[i], attname)) {
8598 		    xmlErrAttributeDup(ctxt, NULL, attname);
8599 		    xmlFree(attvalue);
8600 		    goto failed;
8601 		}
8602 	    }
8603 	    /*
8604 	     * Add the pair to atts
8605 	     */
8606 	    if (atts == NULL) {
8607 	        maxatts = 22; /* allow for 10 attrs by default */
8608 	        atts = (const xmlChar **)
8609 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8610 		if (atts == NULL) {
8611 		    xmlErrMemory(ctxt, NULL);
8612 		    if (attvalue != NULL)
8613 			xmlFree(attvalue);
8614 		    goto failed;
8615 		}
8616 		ctxt->atts = atts;
8617 		ctxt->maxatts = maxatts;
8618 	    } else if (nbatts + 4 > maxatts) {
8619 	        const xmlChar **n;
8620 
8621 	        maxatts *= 2;
8622 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8623 					     maxatts * sizeof(const xmlChar *));
8624 		if (n == NULL) {
8625 		    xmlErrMemory(ctxt, NULL);
8626 		    if (attvalue != NULL)
8627 			xmlFree(attvalue);
8628 		    goto failed;
8629 		}
8630 		atts = n;
8631 		ctxt->atts = atts;
8632 		ctxt->maxatts = maxatts;
8633 	    }
8634 	    atts[nbatts++] = attname;
8635 	    atts[nbatts++] = attvalue;
8636 	    atts[nbatts] = NULL;
8637 	    atts[nbatts + 1] = NULL;
8638 	} else {
8639 	    if (attvalue != NULL)
8640 		xmlFree(attvalue);
8641 	}
8642 
8643 failed:
8644 
8645 	GROW
8646 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8647 	    break;
8648 	if (!IS_BLANK_CH(RAW)) {
8649 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8650 			   "attributes construct error\n");
8651 	}
8652 	SKIP_BLANKS;
8653         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8654             (attname == NULL) && (attvalue == NULL)) {
8655 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8656 			   "xmlParseStartTag: problem parsing attributes\n");
8657 	    break;
8658 	}
8659 	SHRINK;
8660         GROW;
8661     }
8662 
8663     /*
8664      * SAX: Start of Element !
8665      */
8666     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8667 	(!ctxt->disableSAX)) {
8668 	if (nbatts > 0)
8669 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8670 	else
8671 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8672     }
8673 
8674     if (atts != NULL) {
8675         /* Free only the content strings */
8676         for (i = 1;i < nbatts;i+=2)
8677 	    if (atts[i] != NULL)
8678 	       xmlFree((xmlChar *) atts[i]);
8679     }
8680     return(name);
8681 }
8682 
8683 /**
8684  * xmlParseEndTag1:
8685  * @ctxt:  an XML parser context
8686  * @line:  line of the start tag
8687  * @nsNr:  number of namespaces on the start tag
8688  *
8689  * parse an end of tag
8690  *
8691  * [42] ETag ::= '</' Name S? '>'
8692  *
8693  * With namespace
8694  *
8695  * [NS 9] ETag ::= '</' QName S? '>'
8696  */
8697 
8698 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8699 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8700     const xmlChar *name;
8701 
8702     GROW;
8703     if ((RAW != '<') || (NXT(1) != '/')) {
8704 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8705 		       "xmlParseEndTag: '</' not found\n");
8706 	return;
8707     }
8708     SKIP(2);
8709 
8710     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8711 
8712     /*
8713      * We should definitely be at the ending "S? '>'" part
8714      */
8715     GROW;
8716     SKIP_BLANKS;
8717     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8718 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8719     } else
8720 	NEXT1;
8721 
8722     /*
8723      * [ WFC: Element Type Match ]
8724      * The Name in an element's end-tag must match the element type in the
8725      * start-tag.
8726      *
8727      */
8728     if (name != (xmlChar*)1) {
8729         if (name == NULL) name = BAD_CAST "unparseable";
8730         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8731 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8732 		                ctxt->name, line, name);
8733     }
8734 
8735     /*
8736      * SAX: End of Tag
8737      */
8738     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8739 	(!ctxt->disableSAX))
8740         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8741 
8742     namePop(ctxt);
8743     spacePop(ctxt);
8744     return;
8745 }
8746 
8747 /**
8748  * xmlParseEndTag:
8749  * @ctxt:  an XML parser context
8750  *
8751  * parse an end of tag
8752  *
8753  * [42] ETag ::= '</' Name S? '>'
8754  *
8755  * With namespace
8756  *
8757  * [NS 9] ETag ::= '</' QName S? '>'
8758  */
8759 
8760 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8761 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8762     xmlParseEndTag1(ctxt, 0);
8763 }
8764 #endif /* LIBXML_SAX1_ENABLED */
8765 
8766 /************************************************************************
8767  *									*
8768  *		      SAX 2 specific operations				*
8769  *									*
8770  ************************************************************************/
8771 
8772 /*
8773  * xmlGetNamespace:
8774  * @ctxt:  an XML parser context
8775  * @prefix:  the prefix to lookup
8776  *
8777  * Lookup the namespace name for the @prefix (which ca be NULL)
8778  * The prefix must come from the @ctxt->dict dictionnary
8779  *
8780  * Returns the namespace name or NULL if not bound
8781  */
8782 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8783 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8784     int i;
8785 
8786     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8787     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8788         if (ctxt->nsTab[i] == prefix) {
8789 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8790 	        return(NULL);
8791 	    return(ctxt->nsTab[i + 1]);
8792 	}
8793     return(NULL);
8794 }
8795 
8796 /**
8797  * xmlParseQName:
8798  * @ctxt:  an XML parser context
8799  * @prefix:  pointer to store the prefix part
8800  *
8801  * parse an XML Namespace QName
8802  *
8803  * [6]  QName  ::= (Prefix ':')? LocalPart
8804  * [7]  Prefix  ::= NCName
8805  * [8]  LocalPart  ::= NCName
8806  *
8807  * Returns the Name parsed or NULL
8808  */
8809 
8810 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8811 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8812     const xmlChar *l, *p;
8813 
8814     GROW;
8815 
8816     l = xmlParseNCName(ctxt);
8817     if (l == NULL) {
8818         if (CUR == ':') {
8819 	    l = xmlParseName(ctxt);
8820 	    if (l != NULL) {
8821 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8822 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8823 		*prefix = NULL;
8824 		return(l);
8825 	    }
8826 	}
8827         return(NULL);
8828     }
8829     if (CUR == ':') {
8830         NEXT;
8831 	p = l;
8832 	l = xmlParseNCName(ctxt);
8833 	if (l == NULL) {
8834 	    xmlChar *tmp;
8835 
8836             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8837 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8838 	    l = xmlParseNmtoken(ctxt);
8839 	    if (l == NULL)
8840 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8841 	    else {
8842 		tmp = xmlBuildQName(l, p, NULL, 0);
8843 		xmlFree((char *)l);
8844 	    }
8845 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8846 	    if (tmp != NULL) xmlFree(tmp);
8847 	    *prefix = NULL;
8848 	    return(p);
8849 	}
8850 	if (CUR == ':') {
8851 	    xmlChar *tmp;
8852 
8853             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8854 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8855 	    NEXT;
8856 	    tmp = (xmlChar *) xmlParseName(ctxt);
8857 	    if (tmp != NULL) {
8858 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8859 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8860 		if (tmp != NULL) xmlFree(tmp);
8861 		*prefix = p;
8862 		return(l);
8863 	    }
8864 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8865 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8866 	    if (tmp != NULL) xmlFree(tmp);
8867 	    *prefix = p;
8868 	    return(l);
8869 	}
8870 	*prefix = p;
8871     } else
8872         *prefix = NULL;
8873     return(l);
8874 }
8875 
8876 /**
8877  * xmlParseQNameAndCompare:
8878  * @ctxt:  an XML parser context
8879  * @name:  the localname
8880  * @prefix:  the prefix, if any.
8881  *
8882  * parse an XML name and compares for match
8883  * (specialized for endtag parsing)
8884  *
8885  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8886  * and the name for mismatch
8887  */
8888 
8889 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8890 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8891                         xmlChar const *prefix) {
8892     const xmlChar *cmp;
8893     const xmlChar *in;
8894     const xmlChar *ret;
8895     const xmlChar *prefix2;
8896 
8897     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8898 
8899     GROW;
8900     in = ctxt->input->cur;
8901 
8902     cmp = prefix;
8903     while (*in != 0 && *in == *cmp) {
8904 	++in;
8905 	++cmp;
8906     }
8907     if ((*cmp == 0) && (*in == ':')) {
8908         in++;
8909 	cmp = name;
8910 	while (*in != 0 && *in == *cmp) {
8911 	    ++in;
8912 	    ++cmp;
8913 	}
8914 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8915 	    /* success */
8916 	    ctxt->input->cur = in;
8917 	    return((const xmlChar*) 1);
8918 	}
8919     }
8920     /*
8921      * all strings coms from the dictionary, equality can be done directly
8922      */
8923     ret = xmlParseQName (ctxt, &prefix2);
8924     if ((ret == name) && (prefix == prefix2))
8925 	return((const xmlChar*) 1);
8926     return ret;
8927 }
8928 
8929 /**
8930  * xmlParseAttValueInternal:
8931  * @ctxt:  an XML parser context
8932  * @len:  attribute len result
8933  * @alloc:  whether the attribute was reallocated as a new string
8934  * @normalize:  if 1 then further non-CDATA normalization must be done
8935  *
8936  * parse a value for an attribute.
8937  * NOTE: if no normalization is needed, the routine will return pointers
8938  *       directly from the data buffer.
8939  *
8940  * 3.3.3 Attribute-Value Normalization:
8941  * Before the value of an attribute is passed to the application or
8942  * checked for validity, the XML processor must normalize it as follows:
8943  * - a character reference is processed by appending the referenced
8944  *   character to the attribute value
8945  * - an entity reference is processed by recursively processing the
8946  *   replacement text of the entity
8947  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8948  *   appending #x20 to the normalized value, except that only a single
8949  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8950  *   parsed entity or the literal entity value of an internal parsed entity
8951  * - other characters are processed by appending them to the normalized value
8952  * If the declared value is not CDATA, then the XML processor must further
8953  * process the normalized attribute value by discarding any leading and
8954  * trailing space (#x20) characters, and by replacing sequences of space
8955  * (#x20) characters by a single space (#x20) character.
8956  * All attributes for which no declaration has been read should be treated
8957  * by a non-validating parser as if declared CDATA.
8958  *
8959  * Returns the AttValue parsed or NULL. The value has to be freed by the
8960  *     caller if it was copied, this can be detected by val[*len] == 0.
8961  */
8962 
8963 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8964 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8965                          int normalize)
8966 {
8967     xmlChar limit = 0;
8968     const xmlChar *in = NULL, *start, *end, *last;
8969     xmlChar *ret = NULL;
8970     int line, col;
8971 
8972     GROW;
8973     in = (xmlChar *) CUR_PTR;
8974     line = ctxt->input->line;
8975     col = ctxt->input->col;
8976     if (*in != '"' && *in != '\'') {
8977         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8978         return (NULL);
8979     }
8980     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8981 
8982     /*
8983      * try to handle in this routine the most common case where no
8984      * allocation of a new string is required and where content is
8985      * pure ASCII.
8986      */
8987     limit = *in++;
8988     col++;
8989     end = ctxt->input->end;
8990     start = in;
8991     if (in >= end) {
8992         const xmlChar *oldbase = ctxt->input->base;
8993 	GROW;
8994 	if (oldbase != ctxt->input->base) {
8995 	    long delta = ctxt->input->base - oldbase;
8996 	    start = start + delta;
8997 	    in = in + delta;
8998 	}
8999 	end = ctxt->input->end;
9000     }
9001     if (normalize) {
9002         /*
9003 	 * Skip any leading spaces
9004 	 */
9005 	while ((in < end) && (*in != limit) &&
9006 	       ((*in == 0x20) || (*in == 0x9) ||
9007 	        (*in == 0xA) || (*in == 0xD))) {
9008 	    if (*in == 0xA) {
9009 	        line++; col = 1;
9010 	    } else {
9011 	        col++;
9012 	    }
9013 	    in++;
9014 	    start = in;
9015 	    if (in >= end) {
9016 		const xmlChar *oldbase = ctxt->input->base;
9017 		GROW;
9018                 if (ctxt->instate == XML_PARSER_EOF)
9019                     return(NULL);
9020 		if (oldbase != ctxt->input->base) {
9021 		    long delta = ctxt->input->base - oldbase;
9022 		    start = start + delta;
9023 		    in = in + delta;
9024 		}
9025 		end = ctxt->input->end;
9026                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9027                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9028                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9029                                    "AttValue length too long\n");
9030                     return(NULL);
9031                 }
9032 	    }
9033 	}
9034 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9035 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9036 	    col++;
9037 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
9038 	    if (in >= end) {
9039 		const xmlChar *oldbase = ctxt->input->base;
9040 		GROW;
9041                 if (ctxt->instate == XML_PARSER_EOF)
9042                     return(NULL);
9043 		if (oldbase != ctxt->input->base) {
9044 		    long delta = ctxt->input->base - oldbase;
9045 		    start = start + delta;
9046 		    in = in + delta;
9047 		}
9048 		end = ctxt->input->end;
9049                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9050                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9051                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9052                                    "AttValue length too long\n");
9053                     return(NULL);
9054                 }
9055 	    }
9056 	}
9057 	last = in;
9058 	/*
9059 	 * skip the trailing blanks
9060 	 */
9061 	while ((last[-1] == 0x20) && (last > start)) last--;
9062 	while ((in < end) && (*in != limit) &&
9063 	       ((*in == 0x20) || (*in == 0x9) ||
9064 	        (*in == 0xA) || (*in == 0xD))) {
9065 	    if (*in == 0xA) {
9066 	        line++, col = 1;
9067 	    } else {
9068 	        col++;
9069 	    }
9070 	    in++;
9071 	    if (in >= end) {
9072 		const xmlChar *oldbase = ctxt->input->base;
9073 		GROW;
9074                 if (ctxt->instate == XML_PARSER_EOF)
9075                     return(NULL);
9076 		if (oldbase != ctxt->input->base) {
9077 		    long delta = ctxt->input->base - oldbase;
9078 		    start = start + delta;
9079 		    in = in + delta;
9080 		    last = last + delta;
9081 		}
9082 		end = ctxt->input->end;
9083                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9084                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9085                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9086                                    "AttValue length too long\n");
9087                     return(NULL);
9088                 }
9089 	    }
9090 	}
9091         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9092             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9093             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9094                            "AttValue length too long\n");
9095             return(NULL);
9096         }
9097 	if (*in != limit) goto need_complex;
9098     } else {
9099 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9100 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9101 	    in++;
9102 	    col++;
9103 	    if (in >= end) {
9104 		const xmlChar *oldbase = ctxt->input->base;
9105 		GROW;
9106                 if (ctxt->instate == XML_PARSER_EOF)
9107                     return(NULL);
9108 		if (oldbase != ctxt->input->base) {
9109 		    long delta = ctxt->input->base - oldbase;
9110 		    start = start + delta;
9111 		    in = in + delta;
9112 		}
9113 		end = ctxt->input->end;
9114                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9115                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9116                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9117                                    "AttValue length too long\n");
9118                     return(NULL);
9119                 }
9120 	    }
9121 	}
9122 	last = in;
9123         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9124             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9125             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9126                            "AttValue length too long\n");
9127             return(NULL);
9128         }
9129 	if (*in != limit) goto need_complex;
9130     }
9131     in++;
9132     col++;
9133     if (len != NULL) {
9134         *len = last - start;
9135         ret = (xmlChar *) start;
9136     } else {
9137         if (alloc) *alloc = 1;
9138         ret = xmlStrndup(start, last - start);
9139     }
9140     CUR_PTR = in;
9141     ctxt->input->line = line;
9142     ctxt->input->col = col;
9143     if (alloc) *alloc = 0;
9144     return ret;
9145 need_complex:
9146     if (alloc) *alloc = 1;
9147     return xmlParseAttValueComplex(ctxt, len, normalize);
9148 }
9149 
9150 /**
9151  * xmlParseAttribute2:
9152  * @ctxt:  an XML parser context
9153  * @pref:  the element prefix
9154  * @elem:  the element name
9155  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9156  * @value:  a xmlChar ** used to store the value of the attribute
9157  * @len:  an int * to save the length of the attribute
9158  * @alloc:  an int * to indicate if the attribute was allocated
9159  *
9160  * parse an attribute in the new SAX2 framework.
9161  *
9162  * Returns the attribute name, and the value in *value, .
9163  */
9164 
9165 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9166 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9167                    const xmlChar * pref, const xmlChar * elem,
9168                    const xmlChar ** prefix, xmlChar ** value,
9169                    int *len, int *alloc)
9170 {
9171     const xmlChar *name;
9172     xmlChar *val, *internal_val = NULL;
9173     int normalize = 0;
9174 
9175     *value = NULL;
9176     GROW;
9177     name = xmlParseQName(ctxt, prefix);
9178     if (name == NULL) {
9179         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9180                        "error parsing attribute name\n");
9181         return (NULL);
9182     }
9183 
9184     /*
9185      * get the type if needed
9186      */
9187     if (ctxt->attsSpecial != NULL) {
9188         int type;
9189 
9190         type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9191                                             pref, elem, *prefix, name);
9192         if (type != 0)
9193             normalize = 1;
9194     }
9195 
9196     /*
9197      * read the value
9198      */
9199     SKIP_BLANKS;
9200     if (RAW == '=') {
9201         NEXT;
9202         SKIP_BLANKS;
9203         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9204 	if (normalize) {
9205 	    /*
9206 	     * Sometimes a second normalisation pass for spaces is needed
9207 	     * but that only happens if charrefs or entities refernces
9208 	     * have been used in the attribute value, i.e. the attribute
9209 	     * value have been extracted in an allocated string already.
9210 	     */
9211 	    if (*alloc) {
9212 	        const xmlChar *val2;
9213 
9214 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9215 		if ((val2 != NULL) && (val2 != val)) {
9216 		    xmlFree(val);
9217 		    val = (xmlChar *) val2;
9218 		}
9219 	    }
9220 	}
9221         ctxt->instate = XML_PARSER_CONTENT;
9222     } else {
9223         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9224                           "Specification mandate value for attribute %s\n",
9225                           name);
9226         return (NULL);
9227     }
9228 
9229     if (*prefix == ctxt->str_xml) {
9230         /*
9231          * Check that xml:lang conforms to the specification
9232          * No more registered as an error, just generate a warning now
9233          * since this was deprecated in XML second edition
9234          */
9235         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9236             internal_val = xmlStrndup(val, *len);
9237             if (!xmlCheckLanguageID(internal_val)) {
9238                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9239                               "Malformed value for xml:lang : %s\n",
9240                               internal_val, NULL);
9241             }
9242         }
9243 
9244         /*
9245          * Check that xml:space conforms to the specification
9246          */
9247         if (xmlStrEqual(name, BAD_CAST "space")) {
9248             internal_val = xmlStrndup(val, *len);
9249             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9250                 *(ctxt->space) = 0;
9251             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9252                 *(ctxt->space) = 1;
9253             else {
9254                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9255                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9256                               internal_val, NULL);
9257             }
9258         }
9259         if (internal_val) {
9260             xmlFree(internal_val);
9261         }
9262     }
9263 
9264     *value = val;
9265     return (name);
9266 }
9267 /**
9268  * xmlParseStartTag2:
9269  * @ctxt:  an XML parser context
9270  *
9271  * parse a start of tag either for rule element or
9272  * EmptyElement. In both case we don't parse the tag closing chars.
9273  * This routine is called when running SAX2 parsing
9274  *
9275  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9276  *
9277  * [ WFC: Unique Att Spec ]
9278  * No attribute name may appear more than once in the same start-tag or
9279  * empty-element tag.
9280  *
9281  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9282  *
9283  * [ WFC: Unique Att Spec ]
9284  * No attribute name may appear more than once in the same start-tag or
9285  * empty-element tag.
9286  *
9287  * With namespace:
9288  *
9289  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9290  *
9291  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9292  *
9293  * Returns the element name parsed
9294  */
9295 
9296 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9297 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9298                   const xmlChar **URI, int *tlen) {
9299     const xmlChar *localname;
9300     const xmlChar *prefix;
9301     const xmlChar *attname;
9302     const xmlChar *aprefix;
9303     const xmlChar *nsname;
9304     xmlChar *attvalue;
9305     const xmlChar **atts = ctxt->atts;
9306     int maxatts = ctxt->maxatts;
9307     int nratts, nbatts, nbdef;
9308     int i, j, nbNs, attval, oldline, oldcol;
9309     const xmlChar *base;
9310     unsigned long cur;
9311     int nsNr = ctxt->nsNr;
9312 
9313     if (RAW != '<') return(NULL);
9314     NEXT1;
9315 
9316     /*
9317      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9318      *       point since the attribute values may be stored as pointers to
9319      *       the buffer and calling SHRINK would destroy them !
9320      *       The Shrinking is only possible once the full set of attribute
9321      *       callbacks have been done.
9322      */
9323 reparse:
9324     SHRINK;
9325     base = ctxt->input->base;
9326     cur = ctxt->input->cur - ctxt->input->base;
9327     oldline = ctxt->input->line;
9328     oldcol = ctxt->input->col;
9329     nbatts = 0;
9330     nratts = 0;
9331     nbdef = 0;
9332     nbNs = 0;
9333     attval = 0;
9334     /* Forget any namespaces added during an earlier parse of this element. */
9335     ctxt->nsNr = nsNr;
9336 
9337     localname = xmlParseQName(ctxt, &prefix);
9338     if (localname == NULL) {
9339 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9340 		       "StartTag: invalid element name\n");
9341         return(NULL);
9342     }
9343     *tlen = ctxt->input->cur - ctxt->input->base - cur;
9344 
9345     /*
9346      * Now parse the attributes, it ends up with the ending
9347      *
9348      * (S Attribute)* S?
9349      */
9350     SKIP_BLANKS;
9351     GROW;
9352     if (ctxt->input->base != base) goto base_changed;
9353 
9354     while (((RAW != '>') &&
9355 	   ((RAW != '/') || (NXT(1) != '>')) &&
9356 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9357 	const xmlChar *q = CUR_PTR;
9358 	unsigned int cons = ctxt->input->consumed;
9359 	int len = -1, alloc = 0;
9360 
9361 	attname = xmlParseAttribute2(ctxt, prefix, localname,
9362 	                             &aprefix, &attvalue, &len, &alloc);
9363 	if (ctxt->input->base != base) {
9364 	    if ((attvalue != NULL) && (alloc != 0))
9365 	        xmlFree(attvalue);
9366 	    attvalue = NULL;
9367 	    goto base_changed;
9368 	}
9369         if ((attname != NULL) && (attvalue != NULL)) {
9370 	    if (len < 0) len = xmlStrlen(attvalue);
9371             if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9372 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9373 		xmlURIPtr uri;
9374 
9375                 if (URL == NULL) {
9376 		    xmlErrMemory(ctxt, "dictionary allocation failure");
9377 		    if ((attvalue != NULL) && (alloc != 0))
9378 			xmlFree(attvalue);
9379 		    return(NULL);
9380 		}
9381                 if (*URL != 0) {
9382 		    uri = xmlParseURI((const char *) URL);
9383 		    if (uri == NULL) {
9384 			xmlNsErr(ctxt, XML_WAR_NS_URI,
9385 			         "xmlns: '%s' is not a valid URI\n",
9386 					   URL, NULL, NULL);
9387 		    } else {
9388 			if (uri->scheme == NULL) {
9389 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9390 				      "xmlns: URI %s is not absolute\n",
9391 				      URL, NULL, NULL);
9392 			}
9393 			xmlFreeURI(uri);
9394 		    }
9395 		    if (URL == ctxt->str_xml_ns) {
9396 			if (attname != ctxt->str_xml) {
9397 			    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9398 			 "xml namespace URI cannot be the default namespace\n",
9399 				     NULL, NULL, NULL);
9400 			}
9401 			goto skip_default_ns;
9402 		    }
9403 		    if ((len == 29) &&
9404 			(xmlStrEqual(URL,
9405 				 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9406 			xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9407 			     "reuse of the xmlns namespace name is forbidden\n",
9408 				 NULL, NULL, NULL);
9409 			goto skip_default_ns;
9410 		    }
9411 		}
9412 		/*
9413 		 * check that it's not a defined namespace
9414 		 */
9415 		for (j = 1;j <= nbNs;j++)
9416 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9417 			break;
9418 		if (j <= nbNs)
9419 		    xmlErrAttributeDup(ctxt, NULL, attname);
9420 		else
9421 		    if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9422 skip_default_ns:
9423 		if (alloc != 0) xmlFree(attvalue);
9424 		if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9425 		    break;
9426 		if (!IS_BLANK_CH(RAW)) {
9427 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9428 				   "attributes construct error\n");
9429 		    break;
9430 		}
9431 		SKIP_BLANKS;
9432 		continue;
9433 	    }
9434             if (aprefix == ctxt->str_xmlns) {
9435 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9436 		xmlURIPtr uri;
9437 
9438                 if (attname == ctxt->str_xml) {
9439 		    if (URL != ctxt->str_xml_ns) {
9440 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9441 			         "xml namespace prefix mapped to wrong URI\n",
9442 			         NULL, NULL, NULL);
9443 		    }
9444 		    /*
9445 		     * Do not keep a namespace definition node
9446 		     */
9447 		    goto skip_ns;
9448 		}
9449                 if (URL == ctxt->str_xml_ns) {
9450 		    if (attname != ctxt->str_xml) {
9451 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9452 			         "xml namespace URI mapped to wrong prefix\n",
9453 			         NULL, NULL, NULL);
9454 		    }
9455 		    goto skip_ns;
9456 		}
9457                 if (attname == ctxt->str_xmlns) {
9458 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459 			     "redefinition of the xmlns prefix is forbidden\n",
9460 			     NULL, NULL, NULL);
9461 		    goto skip_ns;
9462 		}
9463 		if ((len == 29) &&
9464 		    (xmlStrEqual(URL,
9465 		                 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9466 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9467 			     "reuse of the xmlns namespace name is forbidden\n",
9468 			     NULL, NULL, NULL);
9469 		    goto skip_ns;
9470 		}
9471 		if ((URL == NULL) || (URL[0] == 0)) {
9472 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9473 		             "xmlns:%s: Empty XML namespace is not allowed\n",
9474 			          attname, NULL, NULL);
9475 		    goto skip_ns;
9476 		} else {
9477 		    uri = xmlParseURI((const char *) URL);
9478 		    if (uri == NULL) {
9479 			xmlNsErr(ctxt, XML_WAR_NS_URI,
9480 			     "xmlns:%s: '%s' is not a valid URI\n",
9481 					   attname, URL, NULL);
9482 		    } else {
9483 			if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9484 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9485 				      "xmlns:%s: URI %s is not absolute\n",
9486 				      attname, URL, NULL);
9487 			}
9488 			xmlFreeURI(uri);
9489 		    }
9490 		}
9491 
9492 		/*
9493 		 * check that it's not a defined namespace
9494 		 */
9495 		for (j = 1;j <= nbNs;j++)
9496 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9497 			break;
9498 		if (j <= nbNs)
9499 		    xmlErrAttributeDup(ctxt, aprefix, attname);
9500 		else
9501 		    if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9502 skip_ns:
9503 		if (alloc != 0) xmlFree(attvalue);
9504 		if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9505 		    break;
9506 		if (!IS_BLANK_CH(RAW)) {
9507 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9508 				   "attributes construct error\n");
9509 		    break;
9510 		}
9511 		SKIP_BLANKS;
9512 		if (ctxt->input->base != base) goto base_changed;
9513 		continue;
9514 	    }
9515 
9516 	    /*
9517 	     * Add the pair to atts
9518 	     */
9519 	    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9520 	        if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9521 		    if (attvalue[len] == 0)
9522 			xmlFree(attvalue);
9523 		    goto failed;
9524 		}
9525 	        maxatts = ctxt->maxatts;
9526 		atts = ctxt->atts;
9527 	    }
9528 	    ctxt->attallocs[nratts++] = alloc;
9529 	    atts[nbatts++] = attname;
9530 	    atts[nbatts++] = aprefix;
9531 	    atts[nbatts++] = NULL; /* the URI will be fetched later */
9532 	    atts[nbatts++] = attvalue;
9533 	    attvalue += len;
9534 	    atts[nbatts++] = attvalue;
9535 	    /*
9536 	     * tag if some deallocation is needed
9537 	     */
9538 	    if (alloc != 0) attval = 1;
9539 	} else {
9540 	    if ((attvalue != NULL) && (attvalue[len] == 0))
9541 		xmlFree(attvalue);
9542 	}
9543 
9544 failed:
9545 
9546 	GROW
9547         if (ctxt->instate == XML_PARSER_EOF)
9548             break;
9549 	if (ctxt->input->base != base) goto base_changed;
9550 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9551 	    break;
9552 	if (!IS_BLANK_CH(RAW)) {
9553 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9554 			   "attributes construct error\n");
9555 	    break;
9556 	}
9557 	SKIP_BLANKS;
9558         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9559             (attname == NULL) && (attvalue == NULL)) {
9560 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9561 	         "xmlParseStartTag: problem parsing attributes\n");
9562 	    break;
9563 	}
9564         GROW;
9565 	if (ctxt->input->base != base) goto base_changed;
9566     }
9567 
9568     /*
9569      * The attributes defaulting
9570      */
9571     if (ctxt->attsDefault != NULL) {
9572         xmlDefAttrsPtr defaults;
9573 
9574 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9575 	if (defaults != NULL) {
9576 	    for (i = 0;i < defaults->nbAttrs;i++) {
9577 	        attname = defaults->values[5 * i];
9578 		aprefix = defaults->values[5 * i + 1];
9579 
9580                 /*
9581 		 * special work for namespaces defaulted defs
9582 		 */
9583 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9584 		    /*
9585 		     * check that it's not a defined namespace
9586 		     */
9587 		    for (j = 1;j <= nbNs;j++)
9588 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9589 			    break;
9590 	            if (j <= nbNs) continue;
9591 
9592 		    nsname = xmlGetNamespace(ctxt, NULL);
9593 		    if (nsname != defaults->values[5 * i + 2]) {
9594 			if (nsPush(ctxt, NULL,
9595 			           defaults->values[5 * i + 2]) > 0)
9596 			    nbNs++;
9597 		    }
9598 		} else if (aprefix == ctxt->str_xmlns) {
9599 		    /*
9600 		     * check that it's not a defined namespace
9601 		     */
9602 		    for (j = 1;j <= nbNs;j++)
9603 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9604 			    break;
9605 	            if (j <= nbNs) continue;
9606 
9607 		    nsname = xmlGetNamespace(ctxt, attname);
9608 		    if (nsname != defaults->values[2]) {
9609 			if (nsPush(ctxt, attname,
9610 			           defaults->values[5 * i + 2]) > 0)
9611 			    nbNs++;
9612 		    }
9613 		} else {
9614 		    /*
9615 		     * check that it's not a defined attribute
9616 		     */
9617 		    for (j = 0;j < nbatts;j+=5) {
9618 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9619 			    break;
9620 		    }
9621 		    if (j < nbatts) continue;
9622 
9623 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9624 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9625 			    return(NULL);
9626 			}
9627 			maxatts = ctxt->maxatts;
9628 			atts = ctxt->atts;
9629 		    }
9630 		    atts[nbatts++] = attname;
9631 		    atts[nbatts++] = aprefix;
9632 		    if (aprefix == NULL)
9633 			atts[nbatts++] = NULL;
9634 		    else
9635 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9636 		    atts[nbatts++] = defaults->values[5 * i + 2];
9637 		    atts[nbatts++] = defaults->values[5 * i + 3];
9638 		    if ((ctxt->standalone == 1) &&
9639 		        (defaults->values[5 * i + 4] != NULL)) {
9640 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9641 	  "standalone: attribute %s on %s defaulted from external subset\n",
9642 	                                 attname, localname);
9643 		    }
9644 		    nbdef++;
9645 		}
9646 	    }
9647 	}
9648     }
9649 
9650     /*
9651      * The attributes checkings
9652      */
9653     for (i = 0; i < nbatts;i += 5) {
9654         /*
9655 	* The default namespace does not apply to attribute names.
9656 	*/
9657 	if (atts[i + 1] != NULL) {
9658 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9659 	    if (nsname == NULL) {
9660 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9661 		    "Namespace prefix %s for %s on %s is not defined\n",
9662 		    atts[i + 1], atts[i], localname);
9663 	    }
9664 	    atts[i + 2] = nsname;
9665 	} else
9666 	    nsname = NULL;
9667 	/*
9668 	 * [ WFC: Unique Att Spec ]
9669 	 * No attribute name may appear more than once in the same
9670 	 * start-tag or empty-element tag.
9671 	 * As extended by the Namespace in XML REC.
9672 	 */
9673         for (j = 0; j < i;j += 5) {
9674 	    if (atts[i] == atts[j]) {
9675 	        if (atts[i+1] == atts[j+1]) {
9676 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9677 		    break;
9678 		}
9679 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9680 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9681 			     "Namespaced Attribute %s in '%s' redefined\n",
9682 			     atts[i], nsname, NULL);
9683 		    break;
9684 		}
9685 	    }
9686 	}
9687     }
9688 
9689     nsname = xmlGetNamespace(ctxt, prefix);
9690     if ((prefix != NULL) && (nsname == NULL)) {
9691 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9692 	         "Namespace prefix %s on %s is not defined\n",
9693 		 prefix, localname, NULL);
9694     }
9695     *pref = prefix;
9696     *URI = nsname;
9697 
9698     /*
9699      * SAX: Start of Element !
9700      */
9701     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9702 	(!ctxt->disableSAX)) {
9703 	if (nbNs > 0)
9704 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9705 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9706 			  nbatts / 5, nbdef, atts);
9707 	else
9708 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9709 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9710     }
9711 
9712     /*
9713      * Free up attribute allocated strings if needed
9714      */
9715     if (attval != 0) {
9716 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9717 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9718 	        xmlFree((xmlChar *) atts[i]);
9719     }
9720 
9721     return(localname);
9722 
9723 base_changed:
9724     /*
9725      * the attribute strings are valid iif the base didn't changed
9726      */
9727     if (attval != 0) {
9728 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9729 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9730 	        xmlFree((xmlChar *) atts[i]);
9731     }
9732     ctxt->input->cur = ctxt->input->base + cur;
9733     ctxt->input->line = oldline;
9734     ctxt->input->col = oldcol;
9735     if (ctxt->wellFormed == 1) {
9736 	goto reparse;
9737     }
9738     return(NULL);
9739 }
9740 
9741 /**
9742  * xmlParseEndTag2:
9743  * @ctxt:  an XML parser context
9744  * @line:  line of the start tag
9745  * @nsNr:  number of namespaces on the start tag
9746  *
9747  * parse an end of tag
9748  *
9749  * [42] ETag ::= '</' Name S? '>'
9750  *
9751  * With namespace
9752  *
9753  * [NS 9] ETag ::= '</' QName S? '>'
9754  */
9755 
9756 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9757 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9758                 const xmlChar *URI, int line, int nsNr, int tlen) {
9759     const xmlChar *name;
9760 
9761     GROW;
9762     if ((RAW != '<') || (NXT(1) != '/')) {
9763 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9764 	return;
9765     }
9766     SKIP(2);
9767 
9768     if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9769         if (ctxt->input->cur[tlen] == '>') {
9770 	    ctxt->input->cur += tlen + 1;
9771 	    ctxt->input->col += tlen + 1;
9772 	    goto done;
9773 	}
9774 	ctxt->input->cur += tlen;
9775 	ctxt->input->col += tlen;
9776 	name = (xmlChar*)1;
9777     } else {
9778 	if (prefix == NULL)
9779 	    name = xmlParseNameAndCompare(ctxt, ctxt->name);
9780 	else
9781 	    name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9782     }
9783 
9784     /*
9785      * We should definitely be at the ending "S? '>'" part
9786      */
9787     GROW;
9788     if (ctxt->instate == XML_PARSER_EOF)
9789         return;
9790     SKIP_BLANKS;
9791     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9792 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9793     } else
9794 	NEXT1;
9795 
9796     /*
9797      * [ WFC: Element Type Match ]
9798      * The Name in an element's end-tag must match the element type in the
9799      * start-tag.
9800      *
9801      */
9802     if (name != (xmlChar*)1) {
9803         if (name == NULL) name = BAD_CAST "unparseable";
9804         if ((line == 0) && (ctxt->node != NULL))
9805             line = ctxt->node->line;
9806         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9807 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9808 		                ctxt->name, line, name);
9809     }
9810 
9811     /*
9812      * SAX: End of Tag
9813      */
9814 done:
9815     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9816 	(!ctxt->disableSAX))
9817 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9818 
9819     spacePop(ctxt);
9820     if (nsNr != 0)
9821 	nsPop(ctxt, nsNr);
9822     return;
9823 }
9824 
9825 /**
9826  * xmlParseCDSect:
9827  * @ctxt:  an XML parser context
9828  *
9829  * Parse escaped pure raw content.
9830  *
9831  * [18] CDSect ::= CDStart CData CDEnd
9832  *
9833  * [19] CDStart ::= '<![CDATA['
9834  *
9835  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9836  *
9837  * [21] CDEnd ::= ']]>'
9838  */
9839 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9840 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9841     xmlChar *buf = NULL;
9842     int len = 0;
9843     int size = XML_PARSER_BUFFER_SIZE;
9844     int r, rl;
9845     int	s, sl;
9846     int cur, l;
9847     int count = 0;
9848 
9849     /* Check 2.6.0 was NXT(0) not RAW */
9850     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9851 	SKIP(9);
9852     } else
9853         return;
9854 
9855     ctxt->instate = XML_PARSER_CDATA_SECTION;
9856     r = CUR_CHAR(rl);
9857     if (!IS_CHAR(r)) {
9858 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9859 	ctxt->instate = XML_PARSER_CONTENT;
9860         return;
9861     }
9862     NEXTL(rl);
9863     s = CUR_CHAR(sl);
9864     if (!IS_CHAR(s)) {
9865 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9866 	ctxt->instate = XML_PARSER_CONTENT;
9867         return;
9868     }
9869     NEXTL(sl);
9870     cur = CUR_CHAR(l);
9871     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9872     if (buf == NULL) {
9873 	xmlErrMemory(ctxt, NULL);
9874 	return;
9875     }
9876     while (IS_CHAR(cur) &&
9877            ((r != ']') || (s != ']') || (cur != '>'))) {
9878 	if (len + 5 >= size) {
9879 	    xmlChar *tmp;
9880 
9881             if ((size > XML_MAX_TEXT_LENGTH) &&
9882                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9883                 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9884                              "CData section too big found", NULL);
9885                 xmlFree (buf);
9886                 return;
9887             }
9888 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9889 	    if (tmp == NULL) {
9890 	        xmlFree(buf);
9891 		xmlErrMemory(ctxt, NULL);
9892 		return;
9893 	    }
9894 	    buf = tmp;
9895 	    size *= 2;
9896 	}
9897 	COPY_BUF(rl,buf,len,r);
9898 	r = s;
9899 	rl = sl;
9900 	s = cur;
9901 	sl = l;
9902 	count++;
9903 	if (count > 50) {
9904 	    GROW;
9905             if (ctxt->instate == XML_PARSER_EOF) {
9906 		xmlFree(buf);
9907 		return;
9908             }
9909 	    count = 0;
9910 	}
9911 	NEXTL(l);
9912 	cur = CUR_CHAR(l);
9913     }
9914     buf[len] = 0;
9915     ctxt->instate = XML_PARSER_CONTENT;
9916     if (cur != '>') {
9917 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9918 	                     "CData section not finished\n%.50s\n", buf);
9919 	xmlFree(buf);
9920         return;
9921     }
9922     NEXTL(l);
9923 
9924     /*
9925      * OK the buffer is to be consumed as cdata.
9926      */
9927     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9928 	if (ctxt->sax->cdataBlock != NULL)
9929 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9930 	else if (ctxt->sax->characters != NULL)
9931 	    ctxt->sax->characters(ctxt->userData, buf, len);
9932     }
9933     xmlFree(buf);
9934 }
9935 
9936 /**
9937  * xmlParseContent:
9938  * @ctxt:  an XML parser context
9939  *
9940  * Parse a content:
9941  *
9942  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9943  */
9944 
9945 void
xmlParseContent(xmlParserCtxtPtr ctxt)9946 xmlParseContent(xmlParserCtxtPtr ctxt) {
9947     GROW;
9948     while ((RAW != 0) &&
9949 	   ((RAW != '<') || (NXT(1) != '/')) &&
9950 	   (ctxt->instate != XML_PARSER_EOF)) {
9951 	const xmlChar *test = CUR_PTR;
9952 	unsigned int cons = ctxt->input->consumed;
9953 	const xmlChar *cur = ctxt->input->cur;
9954 
9955 	/*
9956 	 * First case : a Processing Instruction.
9957 	 */
9958 	if ((*cur == '<') && (cur[1] == '?')) {
9959 	    xmlParsePI(ctxt);
9960 	}
9961 
9962 	/*
9963 	 * Second case : a CDSection
9964 	 */
9965 	/* 2.6.0 test was *cur not RAW */
9966 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9967 	    xmlParseCDSect(ctxt);
9968 	}
9969 
9970 	/*
9971 	 * Third case :  a comment
9972 	 */
9973 	else if ((*cur == '<') && (NXT(1) == '!') &&
9974 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9975 	    xmlParseComment(ctxt);
9976 	    ctxt->instate = XML_PARSER_CONTENT;
9977 	}
9978 
9979 	/*
9980 	 * Fourth case :  a sub-element.
9981 	 */
9982 	else if (*cur == '<') {
9983 	    xmlParseElement(ctxt);
9984 	}
9985 
9986 	/*
9987 	 * Fifth case : a reference. If if has not been resolved,
9988 	 *    parsing returns it's Name, create the node
9989 	 */
9990 
9991 	else if (*cur == '&') {
9992 	    xmlParseReference(ctxt);
9993 	}
9994 
9995 	/*
9996 	 * Last case, text. Note that References are handled directly.
9997 	 */
9998 	else {
9999 	    xmlParseCharData(ctxt, 0);
10000 	}
10001 
10002 	GROW;
10003 	/*
10004 	 * Pop-up of finished entities.
10005 	 */
10006 	while ((RAW == 0) && (ctxt->inputNr > 1))
10007 	    xmlPopInput(ctxt);
10008 	SHRINK;
10009 
10010 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10011 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10012 	                "detected an error in element content\n");
10013 	    ctxt->instate = XML_PARSER_EOF;
10014             break;
10015 	}
10016     }
10017 }
10018 
10019 /**
10020  * xmlParseElement:
10021  * @ctxt:  an XML parser context
10022  *
10023  * parse an XML element, this is highly recursive
10024  *
10025  * [39] element ::= EmptyElemTag | STag content ETag
10026  *
10027  * [ WFC: Element Type Match ]
10028  * The Name in an element's end-tag must match the element type in the
10029  * start-tag.
10030  *
10031  */
10032 
10033 void
xmlParseElement(xmlParserCtxtPtr ctxt)10034 xmlParseElement(xmlParserCtxtPtr ctxt) {
10035     const xmlChar *name;
10036     const xmlChar *prefix = NULL;
10037     const xmlChar *URI = NULL;
10038     xmlParserNodeInfo node_info;
10039     int line, tlen = 0;
10040     xmlNodePtr ret;
10041     int nsNr = ctxt->nsNr;
10042 
10043     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10044         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10045 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10046 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10047 			  xmlParserMaxDepth);
10048 	ctxt->instate = XML_PARSER_EOF;
10049 	return;
10050     }
10051 
10052     /* Capture start position */
10053     if (ctxt->record_info) {
10054         node_info.begin_pos = ctxt->input->consumed +
10055                           (CUR_PTR - ctxt->input->base);
10056 	node_info.begin_line = ctxt->input->line;
10057     }
10058 
10059     if (ctxt->spaceNr == 0)
10060 	spacePush(ctxt, -1);
10061     else if (*ctxt->space == -2)
10062 	spacePush(ctxt, -1);
10063     else
10064 	spacePush(ctxt, *ctxt->space);
10065 
10066     line = ctxt->input->line;
10067 #ifdef LIBXML_SAX1_ENABLED
10068     if (ctxt->sax2)
10069 #endif /* LIBXML_SAX1_ENABLED */
10070         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10071 #ifdef LIBXML_SAX1_ENABLED
10072     else
10073 	name = xmlParseStartTag(ctxt);
10074 #endif /* LIBXML_SAX1_ENABLED */
10075     if (ctxt->instate == XML_PARSER_EOF)
10076 	return;
10077     if (name == NULL) {
10078 	spacePop(ctxt);
10079         return;
10080     }
10081     namePush(ctxt, name);
10082     ret = ctxt->node;
10083 
10084 #ifdef LIBXML_VALID_ENABLED
10085     /*
10086      * [ VC: Root Element Type ]
10087      * The Name in the document type declaration must match the element
10088      * type of the root element.
10089      */
10090     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10091         ctxt->node && (ctxt->node == ctxt->myDoc->children))
10092         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10093 #endif /* LIBXML_VALID_ENABLED */
10094 
10095     /*
10096      * Check for an Empty Element.
10097      */
10098     if ((RAW == '/') && (NXT(1) == '>')) {
10099         SKIP(2);
10100 	if (ctxt->sax2) {
10101 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10102 		(!ctxt->disableSAX))
10103 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10104 #ifdef LIBXML_SAX1_ENABLED
10105 	} else {
10106 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10107 		(!ctxt->disableSAX))
10108 		ctxt->sax->endElement(ctxt->userData, name);
10109 #endif /* LIBXML_SAX1_ENABLED */
10110 	}
10111 	namePop(ctxt);
10112 	spacePop(ctxt);
10113 	if (nsNr != ctxt->nsNr)
10114 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10115 	if ( ret != NULL && ctxt->record_info ) {
10116 	   node_info.end_pos = ctxt->input->consumed +
10117 			      (CUR_PTR - ctxt->input->base);
10118 	   node_info.end_line = ctxt->input->line;
10119 	   node_info.node = ret;
10120 	   xmlParserAddNodeInfo(ctxt, &node_info);
10121 	}
10122 	return;
10123     }
10124     if (RAW == '>') {
10125         NEXT1;
10126     } else {
10127         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10128 		     "Couldn't find end of Start Tag %s line %d\n",
10129 		                name, line, NULL);
10130 
10131 	/*
10132 	 * end of parsing of this node.
10133 	 */
10134 	nodePop(ctxt);
10135 	namePop(ctxt);
10136 	spacePop(ctxt);
10137 	if (nsNr != ctxt->nsNr)
10138 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10139 
10140 	/*
10141 	 * Capture end position and add node
10142 	 */
10143 	if ( ret != NULL && ctxt->record_info ) {
10144 	   node_info.end_pos = ctxt->input->consumed +
10145 			      (CUR_PTR - ctxt->input->base);
10146 	   node_info.end_line = ctxt->input->line;
10147 	   node_info.node = ret;
10148 	   xmlParserAddNodeInfo(ctxt, &node_info);
10149 	}
10150 	return;
10151     }
10152 
10153     /*
10154      * Parse the content of the element:
10155      */
10156     xmlParseContent(ctxt);
10157     if (ctxt->instate == XML_PARSER_EOF)
10158 	return;
10159     if (!IS_BYTE_CHAR(RAW)) {
10160         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10161 	 "Premature end of data in tag %s line %d\n",
10162 		                name, line, NULL);
10163 
10164 	/*
10165 	 * end of parsing of this node.
10166 	 */
10167 	nodePop(ctxt);
10168 	namePop(ctxt);
10169 	spacePop(ctxt);
10170 	if (nsNr != ctxt->nsNr)
10171 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10172 	return;
10173     }
10174 
10175     /*
10176      * parse the end of tag: '</' should be here.
10177      */
10178     if (ctxt->sax2) {
10179 	xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10180 	namePop(ctxt);
10181     }
10182 #ifdef LIBXML_SAX1_ENABLED
10183       else
10184 	xmlParseEndTag1(ctxt, line);
10185 #endif /* LIBXML_SAX1_ENABLED */
10186 
10187     /*
10188      * Capture end position and add node
10189      */
10190     if ( ret != NULL && ctxt->record_info ) {
10191        node_info.end_pos = ctxt->input->consumed +
10192                           (CUR_PTR - ctxt->input->base);
10193        node_info.end_line = ctxt->input->line;
10194        node_info.node = ret;
10195        xmlParserAddNodeInfo(ctxt, &node_info);
10196     }
10197 }
10198 
10199 /**
10200  * xmlParseVersionNum:
10201  * @ctxt:  an XML parser context
10202  *
10203  * parse the XML version value.
10204  *
10205  * [26] VersionNum ::= '1.' [0-9]+
10206  *
10207  * In practice allow [0-9].[0-9]+ at that level
10208  *
10209  * Returns the string giving the XML version number, or NULL
10210  */
10211 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10212 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10213     xmlChar *buf = NULL;
10214     int len = 0;
10215     int size = 10;
10216     xmlChar cur;
10217 
10218     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10219     if (buf == NULL) {
10220 	xmlErrMemory(ctxt, NULL);
10221 	return(NULL);
10222     }
10223     cur = CUR;
10224     if (!((cur >= '0') && (cur <= '9'))) {
10225 	xmlFree(buf);
10226 	return(NULL);
10227     }
10228     buf[len++] = cur;
10229     NEXT;
10230     cur=CUR;
10231     if (cur != '.') {
10232 	xmlFree(buf);
10233 	return(NULL);
10234     }
10235     buf[len++] = cur;
10236     NEXT;
10237     cur=CUR;
10238     while ((cur >= '0') && (cur <= '9')) {
10239 	if (len + 1 >= size) {
10240 	    xmlChar *tmp;
10241 
10242 	    size *= 2;
10243 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10244 	    if (tmp == NULL) {
10245 	        xmlFree(buf);
10246 		xmlErrMemory(ctxt, NULL);
10247 		return(NULL);
10248 	    }
10249 	    buf = tmp;
10250 	}
10251 	buf[len++] = cur;
10252 	NEXT;
10253 	cur=CUR;
10254     }
10255     buf[len] = 0;
10256     return(buf);
10257 }
10258 
10259 /**
10260  * xmlParseVersionInfo:
10261  * @ctxt:  an XML parser context
10262  *
10263  * parse the XML version.
10264  *
10265  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10266  *
10267  * [25] Eq ::= S? '=' S?
10268  *
10269  * Returns the version string, e.g. "1.0"
10270  */
10271 
10272 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10273 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10274     xmlChar *version = NULL;
10275 
10276     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10277 	SKIP(7);
10278 	SKIP_BLANKS;
10279 	if (RAW != '=') {
10280 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10281 	    return(NULL);
10282         }
10283 	NEXT;
10284 	SKIP_BLANKS;
10285 	if (RAW == '"') {
10286 	    NEXT;
10287 	    version = xmlParseVersionNum(ctxt);
10288 	    if (RAW != '"') {
10289 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10290 	    } else
10291 	        NEXT;
10292 	} else if (RAW == '\''){
10293 	    NEXT;
10294 	    version = xmlParseVersionNum(ctxt);
10295 	    if (RAW != '\'') {
10296 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10297 	    } else
10298 	        NEXT;
10299 	} else {
10300 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10301 	}
10302     }
10303     return(version);
10304 }
10305 
10306 /**
10307  * xmlParseEncName:
10308  * @ctxt:  an XML parser context
10309  *
10310  * parse the XML encoding name
10311  *
10312  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10313  *
10314  * Returns the encoding name value or NULL
10315  */
10316 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10317 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10318     xmlChar *buf = NULL;
10319     int len = 0;
10320     int size = 10;
10321     xmlChar cur;
10322 
10323     cur = CUR;
10324     if (((cur >= 'a') && (cur <= 'z')) ||
10325         ((cur >= 'A') && (cur <= 'Z'))) {
10326 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10327 	if (buf == NULL) {
10328 	    xmlErrMemory(ctxt, NULL);
10329 	    return(NULL);
10330 	}
10331 
10332 	buf[len++] = cur;
10333 	NEXT;
10334 	cur = CUR;
10335 	while (((cur >= 'a') && (cur <= 'z')) ||
10336 	       ((cur >= 'A') && (cur <= 'Z')) ||
10337 	       ((cur >= '0') && (cur <= '9')) ||
10338 	       (cur == '.') || (cur == '_') ||
10339 	       (cur == '-')) {
10340 	    if (len + 1 >= size) {
10341 	        xmlChar *tmp;
10342 
10343 		size *= 2;
10344 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10345 		if (tmp == NULL) {
10346 		    xmlErrMemory(ctxt, NULL);
10347 		    xmlFree(buf);
10348 		    return(NULL);
10349 		}
10350 		buf = tmp;
10351 	    }
10352 	    buf[len++] = cur;
10353 	    NEXT;
10354 	    cur = CUR;
10355 	    if (cur == 0) {
10356 	        SHRINK;
10357 		GROW;
10358 		cur = CUR;
10359 	    }
10360         }
10361 	buf[len] = 0;
10362     } else {
10363 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10364     }
10365     return(buf);
10366 }
10367 
10368 /**
10369  * xmlParseEncodingDecl:
10370  * @ctxt:  an XML parser context
10371  *
10372  * parse the XML encoding declaration
10373  *
10374  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10375  *
10376  * this setups the conversion filters.
10377  *
10378  * Returns the encoding value or NULL
10379  */
10380 
10381 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10382 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10383     xmlChar *encoding = NULL;
10384 
10385     SKIP_BLANKS;
10386     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10387 	SKIP(8);
10388 	SKIP_BLANKS;
10389 	if (RAW != '=') {
10390 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10391 	    return(NULL);
10392         }
10393 	NEXT;
10394 	SKIP_BLANKS;
10395 	if (RAW == '"') {
10396 	    NEXT;
10397 	    encoding = xmlParseEncName(ctxt);
10398 	    if (RAW != '"') {
10399 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10400 	    } else
10401 	        NEXT;
10402 	} else if (RAW == '\''){
10403 	    NEXT;
10404 	    encoding = xmlParseEncName(ctxt);
10405 	    if (RAW != '\'') {
10406 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10407 	    } else
10408 	        NEXT;
10409 	} else {
10410 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10411 	}
10412 
10413         /*
10414          * Non standard parsing, allowing the user to ignore encoding
10415          */
10416         if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10417 	    xmlFree((xmlChar *) encoding);
10418             return(NULL);
10419 	}
10420 
10421 	/*
10422 	 * UTF-16 encoding stwich has already taken place at this stage,
10423 	 * more over the little-endian/big-endian selection is already done
10424 	 */
10425         if ((encoding != NULL) &&
10426 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10427 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10428 	    /*
10429 	     * If no encoding was passed to the parser, that we are
10430 	     * using UTF-16 and no decoder is present i.e. the
10431 	     * document is apparently UTF-8 compatible, then raise an
10432 	     * encoding mismatch fatal error
10433 	     */
10434 	    if ((ctxt->encoding == NULL) &&
10435 	        (ctxt->input->buf != NULL) &&
10436 	        (ctxt->input->buf->encoder == NULL)) {
10437 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10438 		  "Document labelled UTF-16 but has UTF-8 content\n");
10439 	    }
10440 	    if (ctxt->encoding != NULL)
10441 		xmlFree((xmlChar *) ctxt->encoding);
10442 	    ctxt->encoding = encoding;
10443 	}
10444 	/*
10445 	 * UTF-8 encoding is handled natively
10446 	 */
10447         else if ((encoding != NULL) &&
10448 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10449 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10450 	    if (ctxt->encoding != NULL)
10451 		xmlFree((xmlChar *) ctxt->encoding);
10452 	    ctxt->encoding = encoding;
10453 	}
10454 	else if (encoding != NULL) {
10455 	    xmlCharEncodingHandlerPtr handler;
10456 
10457 	    if (ctxt->input->encoding != NULL)
10458 		xmlFree((xmlChar *) ctxt->input->encoding);
10459 	    ctxt->input->encoding = encoding;
10460 
10461             handler = xmlFindCharEncodingHandler((const char *) encoding);
10462 	    if (handler != NULL) {
10463 		xmlSwitchToEncoding(ctxt, handler);
10464 	    } else {
10465 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10466 			"Unsupported encoding %s\n", encoding);
10467 		return(NULL);
10468 	    }
10469 	}
10470     }
10471     return(encoding);
10472 }
10473 
10474 /**
10475  * xmlParseSDDecl:
10476  * @ctxt:  an XML parser context
10477  *
10478  * parse the XML standalone declaration
10479  *
10480  * [32] SDDecl ::= S 'standalone' Eq
10481  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10482  *
10483  * [ VC: Standalone Document Declaration ]
10484  * TODO The standalone document declaration must have the value "no"
10485  * if any external markup declarations contain declarations of:
10486  *  - attributes with default values, if elements to which these
10487  *    attributes apply appear in the document without specifications
10488  *    of values for these attributes, or
10489  *  - entities (other than amp, lt, gt, apos, quot), if references
10490  *    to those entities appear in the document, or
10491  *  - attributes with values subject to normalization, where the
10492  *    attribute appears in the document with a value which will change
10493  *    as a result of normalization, or
10494  *  - element types with element content, if white space occurs directly
10495  *    within any instance of those types.
10496  *
10497  * Returns:
10498  *   1 if standalone="yes"
10499  *   0 if standalone="no"
10500  *  -2 if standalone attribute is missing or invalid
10501  *	  (A standalone value of -2 means that the XML declaration was found,
10502  *	   but no value was specified for the standalone attribute).
10503  */
10504 
10505 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10506 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10507     int standalone = -2;
10508 
10509     SKIP_BLANKS;
10510     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10511 	SKIP(10);
10512         SKIP_BLANKS;
10513 	if (RAW != '=') {
10514 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10515 	    return(standalone);
10516         }
10517 	NEXT;
10518 	SKIP_BLANKS;
10519         if (RAW == '\''){
10520 	    NEXT;
10521 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10522 	        standalone = 0;
10523                 SKIP(2);
10524 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10525 	               (NXT(2) == 's')) {
10526 	        standalone = 1;
10527 		SKIP(3);
10528             } else {
10529 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10530 	    }
10531 	    if (RAW != '\'') {
10532 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10533 	    } else
10534 	        NEXT;
10535 	} else if (RAW == '"'){
10536 	    NEXT;
10537 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10538 	        standalone = 0;
10539 		SKIP(2);
10540 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10541 	               (NXT(2) == 's')) {
10542 	        standalone = 1;
10543                 SKIP(3);
10544             } else {
10545 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10546 	    }
10547 	    if (RAW != '"') {
10548 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10549 	    } else
10550 	        NEXT;
10551 	} else {
10552 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10553         }
10554     }
10555     return(standalone);
10556 }
10557 
10558 /**
10559  * xmlParseXMLDecl:
10560  * @ctxt:  an XML parser context
10561  *
10562  * parse an XML declaration header
10563  *
10564  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10565  */
10566 
10567 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10568 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10569     xmlChar *version;
10570 
10571     /*
10572      * This value for standalone indicates that the document has an
10573      * XML declaration but it does not have a standalone attribute.
10574      * It will be overwritten later if a standalone attribute is found.
10575      */
10576     ctxt->input->standalone = -2;
10577 
10578     /*
10579      * We know that '<?xml' is here.
10580      */
10581     SKIP(5);
10582 
10583     if (!IS_BLANK_CH(RAW)) {
10584 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10585 	               "Blank needed after '<?xml'\n");
10586     }
10587     SKIP_BLANKS;
10588 
10589     /*
10590      * We must have the VersionInfo here.
10591      */
10592     version = xmlParseVersionInfo(ctxt);
10593     if (version == NULL) {
10594 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10595     } else {
10596 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10597 	    /*
10598 	     * Changed here for XML-1.0 5th edition
10599 	     */
10600 	    if (ctxt->options & XML_PARSE_OLD10) {
10601 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10602 			          "Unsupported version '%s'\n",
10603 			          version);
10604 	    } else {
10605 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10606 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10607 		                  "Unsupported version '%s'\n",
10608 				  version, NULL);
10609 		} else {
10610 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10611 				      "Unsupported version '%s'\n",
10612 				      version);
10613 		}
10614 	    }
10615 	}
10616 	if (ctxt->version != NULL)
10617 	    xmlFree((void *) ctxt->version);
10618 	ctxt->version = version;
10619     }
10620 
10621     /*
10622      * We may have the encoding declaration
10623      */
10624     if (!IS_BLANK_CH(RAW)) {
10625         if ((RAW == '?') && (NXT(1) == '>')) {
10626 	    SKIP(2);
10627 	    return;
10628 	}
10629 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10630     }
10631     xmlParseEncodingDecl(ctxt);
10632     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10633 	/*
10634 	 * The XML REC instructs us to stop parsing right here
10635 	 */
10636         return;
10637     }
10638 
10639     /*
10640      * We may have the standalone status.
10641      */
10642     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10643         if ((RAW == '?') && (NXT(1) == '>')) {
10644 	    SKIP(2);
10645 	    return;
10646 	}
10647 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10648     }
10649 
10650     /*
10651      * We can grow the input buffer freely at that point
10652      */
10653     GROW;
10654 
10655     SKIP_BLANKS;
10656     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10657 
10658     SKIP_BLANKS;
10659     if ((RAW == '?') && (NXT(1) == '>')) {
10660         SKIP(2);
10661     } else if (RAW == '>') {
10662         /* Deprecated old WD ... */
10663 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10664 	NEXT;
10665     } else {
10666 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10667 	MOVETO_ENDTAG(CUR_PTR);
10668 	NEXT;
10669     }
10670 }
10671 
10672 /**
10673  * xmlParseMisc:
10674  * @ctxt:  an XML parser context
10675  *
10676  * parse an XML Misc* optional field.
10677  *
10678  * [27] Misc ::= Comment | PI |  S
10679  */
10680 
10681 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10682 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10683     while ((ctxt->instate != XML_PARSER_EOF) &&
10684            (((RAW == '<') && (NXT(1) == '?')) ||
10685             (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10686             IS_BLANK_CH(CUR))) {
10687         if ((RAW == '<') && (NXT(1) == '?')) {
10688 	    xmlParsePI(ctxt);
10689 	} else if (IS_BLANK_CH(CUR)) {
10690 	    NEXT;
10691 	} else
10692 	    xmlParseComment(ctxt);
10693     }
10694 }
10695 
10696 /**
10697  * xmlParseDocument:
10698  * @ctxt:  an XML parser context
10699  *
10700  * parse an XML document (and build a tree if using the standard SAX
10701  * interface).
10702  *
10703  * [1] document ::= prolog element Misc*
10704  *
10705  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10706  *
10707  * Returns 0, -1 in case of error. the parser context is augmented
10708  *                as a result of the parsing.
10709  */
10710 
10711 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10712 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10713     xmlChar start[4];
10714     xmlCharEncoding enc;
10715 
10716     xmlInitParser();
10717 
10718     if ((ctxt == NULL) || (ctxt->input == NULL))
10719         return(-1);
10720 
10721     GROW;
10722 
10723     /*
10724      * SAX: detecting the level.
10725      */
10726     xmlDetectSAX2(ctxt);
10727 
10728     /*
10729      * SAX: beginning of the document processing.
10730      */
10731     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10732         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10733     if (ctxt->instate == XML_PARSER_EOF)
10734 	return(-1);
10735 
10736     if ((ctxt->encoding == NULL) &&
10737         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10738 	/*
10739 	 * Get the 4 first bytes and decode the charset
10740 	 * if enc != XML_CHAR_ENCODING_NONE
10741 	 * plug some encoding conversion routines.
10742 	 */
10743 	start[0] = RAW;
10744 	start[1] = NXT(1);
10745 	start[2] = NXT(2);
10746 	start[3] = NXT(3);
10747 	enc = xmlDetectCharEncoding(&start[0], 4);
10748 	if (enc != XML_CHAR_ENCODING_NONE) {
10749 	    xmlSwitchEncoding(ctxt, enc);
10750 	}
10751     }
10752 
10753 
10754     if (CUR == 0) {
10755 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10756     }
10757 
10758     /*
10759      * Check for the XMLDecl in the Prolog.
10760      * do not GROW here to avoid the detected encoder to decode more
10761      * than just the first line, unless the amount of data is really
10762      * too small to hold "<?xml version="1.0" encoding="foo"
10763      */
10764     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10765        GROW;
10766     }
10767     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10768 
10769 	/*
10770 	 * Note that we will switch encoding on the fly.
10771 	 */
10772 	xmlParseXMLDecl(ctxt);
10773 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10774 	    /*
10775 	     * The XML REC instructs us to stop parsing right here
10776 	     */
10777 	    return(-1);
10778 	}
10779 	ctxt->standalone = ctxt->input->standalone;
10780 	SKIP_BLANKS;
10781     } else {
10782 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10783     }
10784     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10785         ctxt->sax->startDocument(ctxt->userData);
10786     if (ctxt->instate == XML_PARSER_EOF)
10787 	return(-1);
10788     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10789         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10790 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10791     }
10792 
10793     /*
10794      * The Misc part of the Prolog
10795      */
10796     GROW;
10797     xmlParseMisc(ctxt);
10798 
10799     /*
10800      * Then possibly doc type declaration(s) and more Misc
10801      * (doctypedecl Misc*)?
10802      */
10803     GROW;
10804     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10805 
10806 	ctxt->inSubset = 1;
10807 	xmlParseDocTypeDecl(ctxt);
10808 	if (RAW == '[') {
10809 	    ctxt->instate = XML_PARSER_DTD;
10810 	    xmlParseInternalSubset(ctxt);
10811 	    if (ctxt->instate == XML_PARSER_EOF)
10812 		return(-1);
10813 	}
10814 
10815 	/*
10816 	 * Create and update the external subset.
10817 	 */
10818 	ctxt->inSubset = 2;
10819 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10820 	    (!ctxt->disableSAX))
10821 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10822 	                              ctxt->extSubSystem, ctxt->extSubURI);
10823 	if (ctxt->instate == XML_PARSER_EOF)
10824 	    return(-1);
10825 	ctxt->inSubset = 0;
10826 
10827         xmlCleanSpecialAttr(ctxt);
10828 
10829 	ctxt->instate = XML_PARSER_PROLOG;
10830 	xmlParseMisc(ctxt);
10831     }
10832 
10833     /*
10834      * Time to start parsing the tree itself
10835      */
10836     GROW;
10837     if (RAW != '<') {
10838 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10839 		       "Start tag expected, '<' not found\n");
10840     } else {
10841 	ctxt->instate = XML_PARSER_CONTENT;
10842 	xmlParseElement(ctxt);
10843 	ctxt->instate = XML_PARSER_EPILOG;
10844 
10845 
10846 	/*
10847 	 * The Misc part at the end
10848 	 */
10849 	xmlParseMisc(ctxt);
10850 
10851 	if (RAW != 0) {
10852 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10853 	}
10854 	ctxt->instate = XML_PARSER_EOF;
10855     }
10856 
10857     /*
10858      * SAX: end of the document processing.
10859      */
10860     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10861         ctxt->sax->endDocument(ctxt->userData);
10862 
10863     /*
10864      * Remove locally kept entity definitions if the tree was not built
10865      */
10866     if ((ctxt->myDoc != NULL) &&
10867 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10868 	xmlFreeDoc(ctxt->myDoc);
10869 	ctxt->myDoc = NULL;
10870     }
10871 
10872     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10873         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10874 	if (ctxt->valid)
10875 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10876 	if (ctxt->nsWellFormed)
10877 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10878 	if (ctxt->options & XML_PARSE_OLD10)
10879 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10880     }
10881     if (! ctxt->wellFormed) {
10882 	ctxt->valid = 0;
10883 	return(-1);
10884     }
10885     return(0);
10886 }
10887 
10888 /**
10889  * xmlParseExtParsedEnt:
10890  * @ctxt:  an XML parser context
10891  *
10892  * parse a general parsed entity
10893  * An external general parsed entity is well-formed if it matches the
10894  * production labeled extParsedEnt.
10895  *
10896  * [78] extParsedEnt ::= TextDecl? content
10897  *
10898  * Returns 0, -1 in case of error. the parser context is augmented
10899  *                as a result of the parsing.
10900  */
10901 
10902 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10903 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10904     xmlChar start[4];
10905     xmlCharEncoding enc;
10906 
10907     if ((ctxt == NULL) || (ctxt->input == NULL))
10908         return(-1);
10909 
10910     xmlDefaultSAXHandlerInit();
10911 
10912     xmlDetectSAX2(ctxt);
10913 
10914     GROW;
10915 
10916     /*
10917      * SAX: beginning of the document processing.
10918      */
10919     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10920         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10921 
10922     /*
10923      * Get the 4 first bytes and decode the charset
10924      * if enc != XML_CHAR_ENCODING_NONE
10925      * plug some encoding conversion routines.
10926      */
10927     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10928 	start[0] = RAW;
10929 	start[1] = NXT(1);
10930 	start[2] = NXT(2);
10931 	start[3] = NXT(3);
10932 	enc = xmlDetectCharEncoding(start, 4);
10933 	if (enc != XML_CHAR_ENCODING_NONE) {
10934 	    xmlSwitchEncoding(ctxt, enc);
10935 	}
10936     }
10937 
10938 
10939     if (CUR == 0) {
10940 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10941     }
10942 
10943     /*
10944      * Check for the XMLDecl in the Prolog.
10945      */
10946     GROW;
10947     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10948 
10949 	/*
10950 	 * Note that we will switch encoding on the fly.
10951 	 */
10952 	xmlParseXMLDecl(ctxt);
10953 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10954 	    /*
10955 	     * The XML REC instructs us to stop parsing right here
10956 	     */
10957 	    return(-1);
10958 	}
10959 	SKIP_BLANKS;
10960     } else {
10961 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10962     }
10963     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10964         ctxt->sax->startDocument(ctxt->userData);
10965     if (ctxt->instate == XML_PARSER_EOF)
10966 	return(-1);
10967 
10968     /*
10969      * Doing validity checking on chunk doesn't make sense
10970      */
10971     ctxt->instate = XML_PARSER_CONTENT;
10972     ctxt->validate = 0;
10973     ctxt->loadsubset = 0;
10974     ctxt->depth = 0;
10975 
10976     xmlParseContent(ctxt);
10977     if (ctxt->instate == XML_PARSER_EOF)
10978 	return(-1);
10979 
10980     if ((RAW == '<') && (NXT(1) == '/')) {
10981 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10982     } else if (RAW != 0) {
10983 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10984     }
10985 
10986     /*
10987      * SAX: end of the document processing.
10988      */
10989     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10990         ctxt->sax->endDocument(ctxt->userData);
10991 
10992     if (! ctxt->wellFormed) return(-1);
10993     return(0);
10994 }
10995 
10996 #ifdef LIBXML_PUSH_ENABLED
10997 /************************************************************************
10998  *									*
10999  *		Progressive parsing interfaces				*
11000  *									*
11001  ************************************************************************/
11002 
11003 /**
11004  * xmlParseLookupSequence:
11005  * @ctxt:  an XML parser context
11006  * @first:  the first char to lookup
11007  * @next:  the next char to lookup or zero
11008  * @third:  the next char to lookup or zero
11009  *
11010  * Try to find if a sequence (first, next, third) or  just (first next) or
11011  * (first) is available in the input stream.
11012  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11013  * to avoid rescanning sequences of bytes, it DOES change the state of the
11014  * parser, do not use liberally.
11015  *
11016  * Returns the index to the current parsing point if the full sequence
11017  *      is available, -1 otherwise.
11018  */
11019 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11020 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11021                        xmlChar next, xmlChar third) {
11022     int base, len;
11023     xmlParserInputPtr in;
11024     const xmlChar *buf;
11025 
11026     in = ctxt->input;
11027     if (in == NULL) return(-1);
11028     base = in->cur - in->base;
11029     if (base < 0) return(-1);
11030     if (ctxt->checkIndex > base)
11031         base = ctxt->checkIndex;
11032     if (in->buf == NULL) {
11033 	buf = in->base;
11034 	len = in->length;
11035     } else {
11036 	buf = xmlBufContent(in->buf->buffer);
11037 	len = xmlBufUse(in->buf->buffer);
11038     }
11039     /* take into account the sequence length */
11040     if (third) len -= 2;
11041     else if (next) len --;
11042     for (;base < len;base++) {
11043         if (buf[base] == first) {
11044 	    if (third != 0) {
11045 		if ((buf[base + 1] != next) ||
11046 		    (buf[base + 2] != third)) continue;
11047 	    } else if (next != 0) {
11048 		if (buf[base + 1] != next) continue;
11049 	    }
11050 	    ctxt->checkIndex = 0;
11051 #ifdef DEBUG_PUSH
11052 	    if (next == 0)
11053 		xmlGenericError(xmlGenericErrorContext,
11054 			"PP: lookup '%c' found at %d\n",
11055 			first, base);
11056 	    else if (third == 0)
11057 		xmlGenericError(xmlGenericErrorContext,
11058 			"PP: lookup '%c%c' found at %d\n",
11059 			first, next, base);
11060 	    else
11061 		xmlGenericError(xmlGenericErrorContext,
11062 			"PP: lookup '%c%c%c' found at %d\n",
11063 			first, next, third, base);
11064 #endif
11065 	    return(base - (in->cur - in->base));
11066 	}
11067     }
11068     ctxt->checkIndex = base;
11069 #ifdef DEBUG_PUSH
11070     if (next == 0)
11071 	xmlGenericError(xmlGenericErrorContext,
11072 		"PP: lookup '%c' failed\n", first);
11073     else if (third == 0)
11074 	xmlGenericError(xmlGenericErrorContext,
11075 		"PP: lookup '%c%c' failed\n", first, next);
11076     else
11077 	xmlGenericError(xmlGenericErrorContext,
11078 		"PP: lookup '%c%c%c' failed\n", first, next, third);
11079 #endif
11080     return(-1);
11081 }
11082 
11083 /**
11084  * xmlParseGetLasts:
11085  * @ctxt:  an XML parser context
11086  * @lastlt:  pointer to store the last '<' from the input
11087  * @lastgt:  pointer to store the last '>' from the input
11088  *
11089  * Lookup the last < and > in the current chunk
11090  */
11091 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11092 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11093                  const xmlChar **lastgt) {
11094     const xmlChar *tmp;
11095 
11096     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11097 	xmlGenericError(xmlGenericErrorContext,
11098 		    "Internal error: xmlParseGetLasts\n");
11099 	return;
11100     }
11101     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11102         tmp = ctxt->input->end;
11103 	tmp--;
11104 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11105 	if (tmp < ctxt->input->base) {
11106 	    *lastlt = NULL;
11107 	    *lastgt = NULL;
11108 	} else {
11109 	    *lastlt = tmp;
11110 	    tmp++;
11111 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11112 	        if (*tmp == '\'') {
11113 		    tmp++;
11114 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11115 		    if (tmp < ctxt->input->end) tmp++;
11116 		} else if (*tmp == '"') {
11117 		    tmp++;
11118 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11119 		    if (tmp < ctxt->input->end) tmp++;
11120 		} else
11121 		    tmp++;
11122 	    }
11123 	    if (tmp < ctxt->input->end)
11124 	        *lastgt = tmp;
11125 	    else {
11126 	        tmp = *lastlt;
11127 		tmp--;
11128 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11129 		if (tmp >= ctxt->input->base)
11130 		    *lastgt = tmp;
11131 		else
11132 		    *lastgt = NULL;
11133 	    }
11134 	}
11135     } else {
11136         *lastlt = NULL;
11137 	*lastgt = NULL;
11138     }
11139 }
11140 /**
11141  * xmlCheckCdataPush:
11142  * @cur: pointer to the bock of characters
11143  * @len: length of the block in bytes
11144  *
11145  * Check that the block of characters is okay as SCdata content [20]
11146  *
11147  * Returns the number of bytes to pass if okay, a negative index where an
11148  *         UTF-8 error occured otherwise
11149  */
11150 static int
xmlCheckCdataPush(const xmlChar * utf,int len)11151 xmlCheckCdataPush(const xmlChar *utf, int len) {
11152     int ix;
11153     unsigned char c;
11154     int codepoint;
11155 
11156     if ((utf == NULL) || (len <= 0))
11157         return(0);
11158 
11159     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11160         c = utf[ix];
11161         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11162 	    if (c >= 0x20)
11163 		ix++;
11164 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11165 	        ix++;
11166 	    else
11167 	        return(-ix);
11168 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11169 	    if (ix + 2 > len) return(ix);
11170 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11171 	        return(-ix);
11172 	    codepoint = (utf[ix] & 0x1f) << 6;
11173 	    codepoint |= utf[ix+1] & 0x3f;
11174 	    if (!xmlIsCharQ(codepoint))
11175 	        return(-ix);
11176 	    ix += 2;
11177 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11178 	    if (ix + 3 > len) return(ix);
11179 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11180 	        ((utf[ix+2] & 0xc0) != 0x80))
11181 		    return(-ix);
11182 	    codepoint = (utf[ix] & 0xf) << 12;
11183 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11184 	    codepoint |= utf[ix+2] & 0x3f;
11185 	    if (!xmlIsCharQ(codepoint))
11186 	        return(-ix);
11187 	    ix += 3;
11188 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11189 	    if (ix + 4 > len) return(ix);
11190 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11191 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11192 		((utf[ix+3] & 0xc0) != 0x80))
11193 		    return(-ix);
11194 	    codepoint = (utf[ix] & 0x7) << 18;
11195 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11196 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11197 	    codepoint |= utf[ix+3] & 0x3f;
11198 	    if (!xmlIsCharQ(codepoint))
11199 	        return(-ix);
11200 	    ix += 4;
11201 	} else				/* unknown encoding */
11202 	    return(-ix);
11203       }
11204       return(ix);
11205 }
11206 
11207 /**
11208  * xmlParseTryOrFinish:
11209  * @ctxt:  an XML parser context
11210  * @terminate:  last chunk indicator
11211  *
11212  * Try to progress on parsing
11213  *
11214  * Returns zero if no parsing was possible
11215  */
11216 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11217 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11218     int ret = 0;
11219     int avail, tlen;
11220     xmlChar cur, next;
11221     const xmlChar *lastlt, *lastgt;
11222 
11223     if (ctxt->input == NULL)
11224         return(0);
11225 
11226 #ifdef DEBUG_PUSH
11227     switch (ctxt->instate) {
11228 	case XML_PARSER_EOF:
11229 	    xmlGenericError(xmlGenericErrorContext,
11230 		    "PP: try EOF\n"); break;
11231 	case XML_PARSER_START:
11232 	    xmlGenericError(xmlGenericErrorContext,
11233 		    "PP: try START\n"); break;
11234 	case XML_PARSER_MISC:
11235 	    xmlGenericError(xmlGenericErrorContext,
11236 		    "PP: try MISC\n");break;
11237 	case XML_PARSER_COMMENT:
11238 	    xmlGenericError(xmlGenericErrorContext,
11239 		    "PP: try COMMENT\n");break;
11240 	case XML_PARSER_PROLOG:
11241 	    xmlGenericError(xmlGenericErrorContext,
11242 		    "PP: try PROLOG\n");break;
11243 	case XML_PARSER_START_TAG:
11244 	    xmlGenericError(xmlGenericErrorContext,
11245 		    "PP: try START_TAG\n");break;
11246 	case XML_PARSER_CONTENT:
11247 	    xmlGenericError(xmlGenericErrorContext,
11248 		    "PP: try CONTENT\n");break;
11249 	case XML_PARSER_CDATA_SECTION:
11250 	    xmlGenericError(xmlGenericErrorContext,
11251 		    "PP: try CDATA_SECTION\n");break;
11252 	case XML_PARSER_END_TAG:
11253 	    xmlGenericError(xmlGenericErrorContext,
11254 		    "PP: try END_TAG\n");break;
11255 	case XML_PARSER_ENTITY_DECL:
11256 	    xmlGenericError(xmlGenericErrorContext,
11257 		    "PP: try ENTITY_DECL\n");break;
11258 	case XML_PARSER_ENTITY_VALUE:
11259 	    xmlGenericError(xmlGenericErrorContext,
11260 		    "PP: try ENTITY_VALUE\n");break;
11261 	case XML_PARSER_ATTRIBUTE_VALUE:
11262 	    xmlGenericError(xmlGenericErrorContext,
11263 		    "PP: try ATTRIBUTE_VALUE\n");break;
11264 	case XML_PARSER_DTD:
11265 	    xmlGenericError(xmlGenericErrorContext,
11266 		    "PP: try DTD\n");break;
11267 	case XML_PARSER_EPILOG:
11268 	    xmlGenericError(xmlGenericErrorContext,
11269 		    "PP: try EPILOG\n");break;
11270 	case XML_PARSER_PI:
11271 	    xmlGenericError(xmlGenericErrorContext,
11272 		    "PP: try PI\n");break;
11273         case XML_PARSER_IGNORE:
11274             xmlGenericError(xmlGenericErrorContext,
11275 		    "PP: try IGNORE\n");break;
11276     }
11277 #endif
11278 
11279     if ((ctxt->input != NULL) &&
11280         (ctxt->input->cur - ctxt->input->base > 4096)) {
11281 	xmlSHRINK(ctxt);
11282 	ctxt->checkIndex = 0;
11283     }
11284     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11285 
11286     while (ctxt->instate != XML_PARSER_EOF) {
11287 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11288 	    return(0);
11289 
11290 
11291 	/*
11292 	 * Pop-up of finished entities.
11293 	 */
11294 	while ((RAW == 0) && (ctxt->inputNr > 1))
11295 	    xmlPopInput(ctxt);
11296 
11297 	if (ctxt->input == NULL) break;
11298 	if (ctxt->input->buf == NULL)
11299 	    avail = ctxt->input->length -
11300 	            (ctxt->input->cur - ctxt->input->base);
11301 	else {
11302 	    /*
11303 	     * If we are operating on converted input, try to flush
11304 	     * remainng chars to avoid them stalling in the non-converted
11305 	     * buffer. But do not do this in document start where
11306 	     * encoding="..." may not have been read and we work on a
11307 	     * guessed encoding.
11308 	     */
11309 	    if ((ctxt->instate != XML_PARSER_START) &&
11310 	        (ctxt->input->buf->raw != NULL) &&
11311 		(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11312                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11313                                                  ctxt->input);
11314 		size_t current = ctxt->input->cur - ctxt->input->base;
11315 
11316 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11317                 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11318                                       base, current);
11319 	    }
11320 	    avail = xmlBufUse(ctxt->input->buf->buffer) -
11321 		    (ctxt->input->cur - ctxt->input->base);
11322 	}
11323         if (avail < 1)
11324 	    goto done;
11325         switch (ctxt->instate) {
11326             case XML_PARSER_EOF:
11327 	        /*
11328 		 * Document parsing is done !
11329 		 */
11330 	        goto done;
11331             case XML_PARSER_START:
11332 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11333 		    xmlChar start[4];
11334 		    xmlCharEncoding enc;
11335 
11336 		    /*
11337 		     * Very first chars read from the document flow.
11338 		     */
11339 		    if (avail < 4)
11340 			goto done;
11341 
11342 		    /*
11343 		     * Get the 4 first bytes and decode the charset
11344 		     * if enc != XML_CHAR_ENCODING_NONE
11345 		     * plug some encoding conversion routines,
11346 		     * else xmlSwitchEncoding will set to (default)
11347 		     * UTF8.
11348 		     */
11349 		    start[0] = RAW;
11350 		    start[1] = NXT(1);
11351 		    start[2] = NXT(2);
11352 		    start[3] = NXT(3);
11353 		    enc = xmlDetectCharEncoding(start, 4);
11354 		    xmlSwitchEncoding(ctxt, enc);
11355 		    break;
11356 		}
11357 
11358 		if (avail < 2)
11359 		    goto done;
11360 		cur = ctxt->input->cur[0];
11361 		next = ctxt->input->cur[1];
11362 		if (cur == 0) {
11363 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11364 			ctxt->sax->setDocumentLocator(ctxt->userData,
11365 						      &xmlDefaultSAXLocator);
11366 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11367 		    ctxt->instate = XML_PARSER_EOF;
11368 #ifdef DEBUG_PUSH
11369 		    xmlGenericError(xmlGenericErrorContext,
11370 			    "PP: entering EOF\n");
11371 #endif
11372 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11373 			ctxt->sax->endDocument(ctxt->userData);
11374 		    goto done;
11375 		}
11376 	        if ((cur == '<') && (next == '?')) {
11377 		    /* PI or XML decl */
11378 		    if (avail < 5) return(ret);
11379 		    if ((!terminate) &&
11380 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11381 			return(ret);
11382 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11383 			ctxt->sax->setDocumentLocator(ctxt->userData,
11384 						      &xmlDefaultSAXLocator);
11385 		    if ((ctxt->input->cur[2] == 'x') &&
11386 			(ctxt->input->cur[3] == 'm') &&
11387 			(ctxt->input->cur[4] == 'l') &&
11388 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11389 			ret += 5;
11390 #ifdef DEBUG_PUSH
11391 			xmlGenericError(xmlGenericErrorContext,
11392 				"PP: Parsing XML Decl\n");
11393 #endif
11394 			xmlParseXMLDecl(ctxt);
11395 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11396 			    /*
11397 			     * The XML REC instructs us to stop parsing right
11398 			     * here
11399 			     */
11400 			    ctxt->instate = XML_PARSER_EOF;
11401 			    return(0);
11402 			}
11403 			ctxt->standalone = ctxt->input->standalone;
11404 			if ((ctxt->encoding == NULL) &&
11405 			    (ctxt->input->encoding != NULL))
11406 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11407 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11408 			    (!ctxt->disableSAX))
11409 			    ctxt->sax->startDocument(ctxt->userData);
11410 			ctxt->instate = XML_PARSER_MISC;
11411 #ifdef DEBUG_PUSH
11412 			xmlGenericError(xmlGenericErrorContext,
11413 				"PP: entering MISC\n");
11414 #endif
11415 		    } else {
11416 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11417 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11418 			    (!ctxt->disableSAX))
11419 			    ctxt->sax->startDocument(ctxt->userData);
11420 			ctxt->instate = XML_PARSER_MISC;
11421 #ifdef DEBUG_PUSH
11422 			xmlGenericError(xmlGenericErrorContext,
11423 				"PP: entering MISC\n");
11424 #endif
11425 		    }
11426 		} else {
11427 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11428 			ctxt->sax->setDocumentLocator(ctxt->userData,
11429 						      &xmlDefaultSAXLocator);
11430 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11431 		    if (ctxt->version == NULL) {
11432 		        xmlErrMemory(ctxt, NULL);
11433 			break;
11434 		    }
11435 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11436 		        (!ctxt->disableSAX))
11437 			ctxt->sax->startDocument(ctxt->userData);
11438 		    ctxt->instate = XML_PARSER_MISC;
11439 #ifdef DEBUG_PUSH
11440 		    xmlGenericError(xmlGenericErrorContext,
11441 			    "PP: entering MISC\n");
11442 #endif
11443 		}
11444 		break;
11445             case XML_PARSER_START_TAG: {
11446 	        const xmlChar *name;
11447 		const xmlChar *prefix = NULL;
11448 		const xmlChar *URI = NULL;
11449 		int nsNr = ctxt->nsNr;
11450 
11451 		if ((avail < 2) && (ctxt->inputNr == 1))
11452 		    goto done;
11453 		cur = ctxt->input->cur[0];
11454 	        if (cur != '<') {
11455 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11456 		    ctxt->instate = XML_PARSER_EOF;
11457 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11458 			ctxt->sax->endDocument(ctxt->userData);
11459 		    goto done;
11460 		}
11461 		if (!terminate) {
11462 		    if (ctxt->progressive) {
11463 		        /* > can be found unescaped in attribute values */
11464 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11465 			    goto done;
11466 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11467 			goto done;
11468 		    }
11469 		}
11470 		if (ctxt->spaceNr == 0)
11471 		    spacePush(ctxt, -1);
11472 		else if (*ctxt->space == -2)
11473 		    spacePush(ctxt, -1);
11474 		else
11475 		    spacePush(ctxt, *ctxt->space);
11476 #ifdef LIBXML_SAX1_ENABLED
11477 		if (ctxt->sax2)
11478 #endif /* LIBXML_SAX1_ENABLED */
11479 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11480 #ifdef LIBXML_SAX1_ENABLED
11481 		else
11482 		    name = xmlParseStartTag(ctxt);
11483 #endif /* LIBXML_SAX1_ENABLED */
11484 		if (ctxt->instate == XML_PARSER_EOF)
11485 		    goto done;
11486 		if (name == NULL) {
11487 		    spacePop(ctxt);
11488 		    ctxt->instate = XML_PARSER_EOF;
11489 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11490 			ctxt->sax->endDocument(ctxt->userData);
11491 		    goto done;
11492 		}
11493 #ifdef LIBXML_VALID_ENABLED
11494 		/*
11495 		 * [ VC: Root Element Type ]
11496 		 * The Name in the document type declaration must match
11497 		 * the element type of the root element.
11498 		 */
11499 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11500 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11501 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11502 #endif /* LIBXML_VALID_ENABLED */
11503 
11504 		/*
11505 		 * Check for an Empty Element.
11506 		 */
11507 		if ((RAW == '/') && (NXT(1) == '>')) {
11508 		    SKIP(2);
11509 
11510 		    if (ctxt->sax2) {
11511 			if ((ctxt->sax != NULL) &&
11512 			    (ctxt->sax->endElementNs != NULL) &&
11513 			    (!ctxt->disableSAX))
11514 			    ctxt->sax->endElementNs(ctxt->userData, name,
11515 			                            prefix, URI);
11516 			if (ctxt->nsNr - nsNr > 0)
11517 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11518 #ifdef LIBXML_SAX1_ENABLED
11519 		    } else {
11520 			if ((ctxt->sax != NULL) &&
11521 			    (ctxt->sax->endElement != NULL) &&
11522 			    (!ctxt->disableSAX))
11523 			    ctxt->sax->endElement(ctxt->userData, name);
11524 #endif /* LIBXML_SAX1_ENABLED */
11525 		    }
11526 		    if (ctxt->instate == XML_PARSER_EOF)
11527 			goto done;
11528 		    spacePop(ctxt);
11529 		    if (ctxt->nameNr == 0) {
11530 			ctxt->instate = XML_PARSER_EPILOG;
11531 		    } else {
11532 			ctxt->instate = XML_PARSER_CONTENT;
11533 		    }
11534                     ctxt->progressive = 1;
11535 		    break;
11536 		}
11537 		if (RAW == '>') {
11538 		    NEXT;
11539 		} else {
11540 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11541 					 "Couldn't find end of Start Tag %s\n",
11542 					 name);
11543 		    nodePop(ctxt);
11544 		    spacePop(ctxt);
11545 		}
11546 		if (ctxt->sax2)
11547 		    nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11548 #ifdef LIBXML_SAX1_ENABLED
11549 		else
11550 		    namePush(ctxt, name);
11551 #endif /* LIBXML_SAX1_ENABLED */
11552 
11553 		ctxt->instate = XML_PARSER_CONTENT;
11554                 ctxt->progressive = 1;
11555                 break;
11556 	    }
11557             case XML_PARSER_CONTENT: {
11558 		const xmlChar *test;
11559 		unsigned int cons;
11560 		if ((avail < 2) && (ctxt->inputNr == 1))
11561 		    goto done;
11562 		cur = ctxt->input->cur[0];
11563 		next = ctxt->input->cur[1];
11564 
11565 		test = CUR_PTR;
11566 	        cons = ctxt->input->consumed;
11567 		if ((cur == '<') && (next == '/')) {
11568 		    ctxt->instate = XML_PARSER_END_TAG;
11569 		    break;
11570 	        } else if ((cur == '<') && (next == '?')) {
11571 		    if ((!terminate) &&
11572 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11573                         ctxt->progressive = XML_PARSER_PI;
11574 			goto done;
11575                     }
11576 		    xmlParsePI(ctxt);
11577 		    ctxt->instate = XML_PARSER_CONTENT;
11578                     ctxt->progressive = 1;
11579 		} else if ((cur == '<') && (next != '!')) {
11580 		    ctxt->instate = XML_PARSER_START_TAG;
11581 		    break;
11582 		} else if ((cur == '<') && (next == '!') &&
11583 		           (ctxt->input->cur[2] == '-') &&
11584 			   (ctxt->input->cur[3] == '-')) {
11585 		    int term;
11586 
11587 	            if (avail < 4)
11588 		        goto done;
11589 		    ctxt->input->cur += 4;
11590 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11591 		    ctxt->input->cur -= 4;
11592 		    if ((!terminate) && (term < 0)) {
11593                         ctxt->progressive = XML_PARSER_COMMENT;
11594 			goto done;
11595                     }
11596 		    xmlParseComment(ctxt);
11597 		    ctxt->instate = XML_PARSER_CONTENT;
11598                     ctxt->progressive = 1;
11599 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11600 		    (ctxt->input->cur[2] == '[') &&
11601 		    (ctxt->input->cur[3] == 'C') &&
11602 		    (ctxt->input->cur[4] == 'D') &&
11603 		    (ctxt->input->cur[5] == 'A') &&
11604 		    (ctxt->input->cur[6] == 'T') &&
11605 		    (ctxt->input->cur[7] == 'A') &&
11606 		    (ctxt->input->cur[8] == '[')) {
11607 		    SKIP(9);
11608 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11609 		    break;
11610 		} else if ((cur == '<') && (next == '!') &&
11611 		           (avail < 9)) {
11612 		    goto done;
11613 		} else if (cur == '&') {
11614 		    if ((!terminate) &&
11615 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11616 			goto done;
11617 		    xmlParseReference(ctxt);
11618 		} else {
11619 		    /* TODO Avoid the extra copy, handle directly !!! */
11620 		    /*
11621 		     * Goal of the following test is:
11622 		     *  - minimize calls to the SAX 'character' callback
11623 		     *    when they are mergeable
11624 		     *  - handle an problem for isBlank when we only parse
11625 		     *    a sequence of blank chars and the next one is
11626 		     *    not available to check against '<' presence.
11627 		     *  - tries to homogenize the differences in SAX
11628 		     *    callbacks between the push and pull versions
11629 		     *    of the parser.
11630 		     */
11631 		    if ((ctxt->inputNr == 1) &&
11632 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11633 			if (!terminate) {
11634 			    if (ctxt->progressive) {
11635 				if ((lastlt == NULL) ||
11636 				    (ctxt->input->cur > lastlt))
11637 				    goto done;
11638 			    } else if (xmlParseLookupSequence(ctxt,
11639 			                                      '<', 0, 0) < 0) {
11640 				goto done;
11641 			    }
11642 			}
11643                     }
11644 		    ctxt->checkIndex = 0;
11645 		    xmlParseCharData(ctxt, 0);
11646 		}
11647 		/*
11648 		 * Pop-up of finished entities.
11649 		 */
11650 		while ((RAW == 0) && (ctxt->inputNr > 1))
11651 		    xmlPopInput(ctxt);
11652 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11653 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11654 		                "detected an error in element content\n");
11655 		    ctxt->instate = XML_PARSER_EOF;
11656 		    break;
11657 		}
11658 		break;
11659 	    }
11660             case XML_PARSER_END_TAG:
11661 		if (avail < 2)
11662 		    goto done;
11663 		if (!terminate) {
11664 		    if (ctxt->progressive) {
11665 		        /* > can be found unescaped in attribute values */
11666 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11667 			    goto done;
11668 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11669 			goto done;
11670 		    }
11671 		}
11672 		if (ctxt->sax2) {
11673 		    xmlParseEndTag2(ctxt,
11674 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11675 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11676 		       (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11677 		    nameNsPop(ctxt);
11678 		}
11679 #ifdef LIBXML_SAX1_ENABLED
11680 		  else
11681 		    xmlParseEndTag1(ctxt, 0);
11682 #endif /* LIBXML_SAX1_ENABLED */
11683 		if (ctxt->instate == XML_PARSER_EOF) {
11684 		    /* Nothing */
11685 		} else if (ctxt->nameNr == 0) {
11686 		    ctxt->instate = XML_PARSER_EPILOG;
11687 		} else {
11688 		    ctxt->instate = XML_PARSER_CONTENT;
11689 		}
11690 		break;
11691             case XML_PARSER_CDATA_SECTION: {
11692 	        /*
11693 		 * The Push mode need to have the SAX callback for
11694 		 * cdataBlock merge back contiguous callbacks.
11695 		 */
11696 		int base;
11697 
11698 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11699 		if (base < 0) {
11700 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11701 		        int tmp;
11702 
11703 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11704 			                        XML_PARSER_BIG_BUFFER_SIZE);
11705 			if (tmp < 0) {
11706 			    tmp = -tmp;
11707 			    ctxt->input->cur += tmp;
11708 			    goto encoding_error;
11709 			}
11710 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11711 			    if (ctxt->sax->cdataBlock != NULL)
11712 				ctxt->sax->cdataBlock(ctxt->userData,
11713 				                      ctxt->input->cur, tmp);
11714 			    else if (ctxt->sax->characters != NULL)
11715 				ctxt->sax->characters(ctxt->userData,
11716 				                      ctxt->input->cur, tmp);
11717 			}
11718 			if (ctxt->instate == XML_PARSER_EOF)
11719 			    goto done;
11720 			SKIPL(tmp);
11721 			ctxt->checkIndex = 0;
11722 		    }
11723 		    goto done;
11724 		} else {
11725 		    int tmp;
11726 
11727 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11728 		    if ((tmp < 0) || (tmp != base)) {
11729 			tmp = -tmp;
11730 			ctxt->input->cur += tmp;
11731 			goto encoding_error;
11732 		    }
11733 		    if ((ctxt->sax != NULL) && (base == 0) &&
11734 		        (ctxt->sax->cdataBlock != NULL) &&
11735 		        (!ctxt->disableSAX)) {
11736 			/*
11737 			 * Special case to provide identical behaviour
11738 			 * between pull and push parsers on enpty CDATA
11739 			 * sections
11740 			 */
11741 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11742 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11743 			               "<![CDATA[", 9)))
11744 			     ctxt->sax->cdataBlock(ctxt->userData,
11745 			                           BAD_CAST "", 0);
11746 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11747 			(!ctxt->disableSAX)) {
11748 			if (ctxt->sax->cdataBlock != NULL)
11749 			    ctxt->sax->cdataBlock(ctxt->userData,
11750 						  ctxt->input->cur, base);
11751 			else if (ctxt->sax->characters != NULL)
11752 			    ctxt->sax->characters(ctxt->userData,
11753 						  ctxt->input->cur, base);
11754 		    }
11755 		    if (ctxt->instate == XML_PARSER_EOF)
11756 			goto done;
11757 		    SKIPL(base + 3);
11758 		    ctxt->checkIndex = 0;
11759 		    ctxt->instate = XML_PARSER_CONTENT;
11760 #ifdef DEBUG_PUSH
11761 		    xmlGenericError(xmlGenericErrorContext,
11762 			    "PP: entering CONTENT\n");
11763 #endif
11764 		}
11765 		break;
11766 	    }
11767             case XML_PARSER_MISC:
11768 		SKIP_BLANKS;
11769 		if (ctxt->input->buf == NULL)
11770 		    avail = ctxt->input->length -
11771 		            (ctxt->input->cur - ctxt->input->base);
11772 		else
11773 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11774 		            (ctxt->input->cur - ctxt->input->base);
11775 		if (avail < 2)
11776 		    goto done;
11777 		cur = ctxt->input->cur[0];
11778 		next = ctxt->input->cur[1];
11779 	        if ((cur == '<') && (next == '?')) {
11780 		    if ((!terminate) &&
11781 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11782                         ctxt->progressive = XML_PARSER_PI;
11783 			goto done;
11784                     }
11785 #ifdef DEBUG_PUSH
11786 		    xmlGenericError(xmlGenericErrorContext,
11787 			    "PP: Parsing PI\n");
11788 #endif
11789 		    xmlParsePI(ctxt);
11790 		    if (ctxt->instate == XML_PARSER_EOF)
11791 			goto done;
11792 		    ctxt->instate = XML_PARSER_MISC;
11793                     ctxt->progressive = 1;
11794 		    ctxt->checkIndex = 0;
11795 		} else if ((cur == '<') && (next == '!') &&
11796 		    (ctxt->input->cur[2] == '-') &&
11797 		    (ctxt->input->cur[3] == '-')) {
11798 		    if ((!terminate) &&
11799 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11800                         ctxt->progressive = XML_PARSER_COMMENT;
11801 			goto done;
11802                     }
11803 #ifdef DEBUG_PUSH
11804 		    xmlGenericError(xmlGenericErrorContext,
11805 			    "PP: Parsing Comment\n");
11806 #endif
11807 		    xmlParseComment(ctxt);
11808 		    if (ctxt->instate == XML_PARSER_EOF)
11809 			goto done;
11810 		    ctxt->instate = XML_PARSER_MISC;
11811                     ctxt->progressive = 1;
11812 		    ctxt->checkIndex = 0;
11813 		} else if ((cur == '<') && (next == '!') &&
11814 		    (ctxt->input->cur[2] == 'D') &&
11815 		    (ctxt->input->cur[3] == 'O') &&
11816 		    (ctxt->input->cur[4] == 'C') &&
11817 		    (ctxt->input->cur[5] == 'T') &&
11818 		    (ctxt->input->cur[6] == 'Y') &&
11819 		    (ctxt->input->cur[7] == 'P') &&
11820 		    (ctxt->input->cur[8] == 'E')) {
11821 		    if ((!terminate) &&
11822 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11823                         ctxt->progressive = XML_PARSER_DTD;
11824 			goto done;
11825                     }
11826 #ifdef DEBUG_PUSH
11827 		    xmlGenericError(xmlGenericErrorContext,
11828 			    "PP: Parsing internal subset\n");
11829 #endif
11830 		    ctxt->inSubset = 1;
11831                     ctxt->progressive = 0;
11832 		    ctxt->checkIndex = 0;
11833 		    xmlParseDocTypeDecl(ctxt);
11834 		    if (ctxt->instate == XML_PARSER_EOF)
11835 			goto done;
11836 		    if (RAW == '[') {
11837 			ctxt->instate = XML_PARSER_DTD;
11838 #ifdef DEBUG_PUSH
11839 			xmlGenericError(xmlGenericErrorContext,
11840 				"PP: entering DTD\n");
11841 #endif
11842 		    } else {
11843 			/*
11844 			 * Create and update the external subset.
11845 			 */
11846 			ctxt->inSubset = 2;
11847 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11848 			    (ctxt->sax->externalSubset != NULL))
11849 			    ctxt->sax->externalSubset(ctxt->userData,
11850 				    ctxt->intSubName, ctxt->extSubSystem,
11851 				    ctxt->extSubURI);
11852 			ctxt->inSubset = 0;
11853 			xmlCleanSpecialAttr(ctxt);
11854 			ctxt->instate = XML_PARSER_PROLOG;
11855 #ifdef DEBUG_PUSH
11856 			xmlGenericError(xmlGenericErrorContext,
11857 				"PP: entering PROLOG\n");
11858 #endif
11859 		    }
11860 		} else if ((cur == '<') && (next == '!') &&
11861 		           (avail < 9)) {
11862 		    goto done;
11863 		} else {
11864 		    ctxt->instate = XML_PARSER_START_TAG;
11865 		    ctxt->progressive = XML_PARSER_START_TAG;
11866 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11867 #ifdef DEBUG_PUSH
11868 		    xmlGenericError(xmlGenericErrorContext,
11869 			    "PP: entering START_TAG\n");
11870 #endif
11871 		}
11872 		break;
11873             case XML_PARSER_PROLOG:
11874 		SKIP_BLANKS;
11875 		if (ctxt->input->buf == NULL)
11876 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11877 		else
11878 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11879                             (ctxt->input->cur - ctxt->input->base);
11880 		if (avail < 2)
11881 		    goto done;
11882 		cur = ctxt->input->cur[0];
11883 		next = ctxt->input->cur[1];
11884 	        if ((cur == '<') && (next == '?')) {
11885 		    if ((!terminate) &&
11886 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11887                         ctxt->progressive = XML_PARSER_PI;
11888 			goto done;
11889                     }
11890 #ifdef DEBUG_PUSH
11891 		    xmlGenericError(xmlGenericErrorContext,
11892 			    "PP: Parsing PI\n");
11893 #endif
11894 		    xmlParsePI(ctxt);
11895 		    if (ctxt->instate == XML_PARSER_EOF)
11896 			goto done;
11897 		    ctxt->instate = XML_PARSER_PROLOG;
11898                     ctxt->progressive = 1;
11899 		} else if ((cur == '<') && (next == '!') &&
11900 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11901 		    if ((!terminate) &&
11902 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11903                         ctxt->progressive = XML_PARSER_COMMENT;
11904 			goto done;
11905                     }
11906 #ifdef DEBUG_PUSH
11907 		    xmlGenericError(xmlGenericErrorContext,
11908 			    "PP: Parsing Comment\n");
11909 #endif
11910 		    xmlParseComment(ctxt);
11911 		    if (ctxt->instate == XML_PARSER_EOF)
11912 			goto done;
11913 		    ctxt->instate = XML_PARSER_PROLOG;
11914                     ctxt->progressive = 1;
11915 		} else if ((cur == '<') && (next == '!') &&
11916 		           (avail < 4)) {
11917 		    goto done;
11918 		} else {
11919 		    ctxt->instate = XML_PARSER_START_TAG;
11920 		    if (ctxt->progressive == 0)
11921 			ctxt->progressive = XML_PARSER_START_TAG;
11922 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11923 #ifdef DEBUG_PUSH
11924 		    xmlGenericError(xmlGenericErrorContext,
11925 			    "PP: entering START_TAG\n");
11926 #endif
11927 		}
11928 		break;
11929             case XML_PARSER_EPILOG:
11930 		SKIP_BLANKS;
11931 		if (ctxt->input->buf == NULL)
11932 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11933 		else
11934 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11935                             (ctxt->input->cur - ctxt->input->base);
11936 		if (avail < 2)
11937 		    goto done;
11938 		cur = ctxt->input->cur[0];
11939 		next = ctxt->input->cur[1];
11940 	        if ((cur == '<') && (next == '?')) {
11941 		    if ((!terminate) &&
11942 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11943                         ctxt->progressive = XML_PARSER_PI;
11944 			goto done;
11945                     }
11946 #ifdef DEBUG_PUSH
11947 		    xmlGenericError(xmlGenericErrorContext,
11948 			    "PP: Parsing PI\n");
11949 #endif
11950 		    xmlParsePI(ctxt);
11951 		    if (ctxt->instate == XML_PARSER_EOF)
11952 			goto done;
11953 		    ctxt->instate = XML_PARSER_EPILOG;
11954                     ctxt->progressive = 1;
11955 		} else if ((cur == '<') && (next == '!') &&
11956 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11957 		    if ((!terminate) &&
11958 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11959                         ctxt->progressive = XML_PARSER_COMMENT;
11960 			goto done;
11961                     }
11962 #ifdef DEBUG_PUSH
11963 		    xmlGenericError(xmlGenericErrorContext,
11964 			    "PP: Parsing Comment\n");
11965 #endif
11966 		    xmlParseComment(ctxt);
11967 		    if (ctxt->instate == XML_PARSER_EOF)
11968 			goto done;
11969 		    ctxt->instate = XML_PARSER_EPILOG;
11970                     ctxt->progressive = 1;
11971 		} else if ((cur == '<') && (next == '!') &&
11972 		           (avail < 4)) {
11973 		    goto done;
11974 		} else {
11975 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11976 		    ctxt->instate = XML_PARSER_EOF;
11977 #ifdef DEBUG_PUSH
11978 		    xmlGenericError(xmlGenericErrorContext,
11979 			    "PP: entering EOF\n");
11980 #endif
11981 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11982 			ctxt->sax->endDocument(ctxt->userData);
11983 		    goto done;
11984 		}
11985 		break;
11986             case XML_PARSER_DTD: {
11987 	        /*
11988 		 * Sorry but progressive parsing of the internal subset
11989 		 * is not expected to be supported. We first check that
11990 		 * the full content of the internal subset is available and
11991 		 * the parsing is launched only at that point.
11992 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11993 		 * section and not in a ']]>' sequence which are conditional
11994 		 * sections (whoever argued to keep that crap in XML deserve
11995 		 * a place in hell !).
11996 		 */
11997 		int base, i;
11998 		xmlChar *buf;
11999 	        xmlChar quote = 0;
12000                 size_t use;
12001 
12002 		base = ctxt->input->cur - ctxt->input->base;
12003 		if (base < 0) return(0);
12004 		if (ctxt->checkIndex > base)
12005 		    base = ctxt->checkIndex;
12006 		buf = xmlBufContent(ctxt->input->buf->buffer);
12007                 use = xmlBufUse(ctxt->input->buf->buffer);
12008 		for (;(unsigned int) base < use; base++) {
12009 		    if (quote != 0) {
12010 		        if (buf[base] == quote)
12011 			    quote = 0;
12012 			continue;
12013 		    }
12014 		    if ((quote == 0) && (buf[base] == '<')) {
12015 		        int found  = 0;
12016 			/* special handling of comments */
12017 		        if (((unsigned int) base + 4 < use) &&
12018 			    (buf[base + 1] == '!') &&
12019 			    (buf[base + 2] == '-') &&
12020 			    (buf[base + 3] == '-')) {
12021 			    for (;(unsigned int) base + 3 < use; base++) {
12022 				if ((buf[base] == '-') &&
12023 				    (buf[base + 1] == '-') &&
12024 				    (buf[base + 2] == '>')) {
12025 				    found = 1;
12026 				    base += 2;
12027 				    break;
12028 				}
12029 		            }
12030 			    if (!found) {
12031 #if 0
12032 			        fprintf(stderr, "unfinished comment\n");
12033 #endif
12034 			        break; /* for */
12035 		            }
12036 		            continue;
12037 			}
12038 		    }
12039 		    if (buf[base] == '"') {
12040 		        quote = '"';
12041 			continue;
12042 		    }
12043 		    if (buf[base] == '\'') {
12044 		        quote = '\'';
12045 			continue;
12046 		    }
12047 		    if (buf[base] == ']') {
12048 #if 0
12049 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
12050 			        buf[base + 1], buf[base + 2], buf[base + 3]);
12051 #endif
12052 		        if ((unsigned int) base +1 >= use)
12053 			    break;
12054 			if (buf[base + 1] == ']') {
12055 			    /* conditional crap, skip both ']' ! */
12056 			    base++;
12057 			    continue;
12058 			}
12059 		        for (i = 1; (unsigned int) base + i < use; i++) {
12060 			    if (buf[base + i] == '>') {
12061 #if 0
12062 			        fprintf(stderr, "found\n");
12063 #endif
12064 			        goto found_end_int_subset;
12065 			    }
12066 			    if (!IS_BLANK_CH(buf[base + i])) {
12067 #if 0
12068 			        fprintf(stderr, "not found\n");
12069 #endif
12070 			        goto not_end_of_int_subset;
12071 			    }
12072 			}
12073 #if 0
12074 			fprintf(stderr, "end of stream\n");
12075 #endif
12076 		        break;
12077 
12078 		    }
12079 not_end_of_int_subset:
12080                     continue; /* for */
12081 		}
12082 		/*
12083 		 * We didn't found the end of the Internal subset
12084 		 */
12085                 if (quote == 0)
12086                     ctxt->checkIndex = base;
12087                 else
12088                     ctxt->checkIndex = 0;
12089 #ifdef DEBUG_PUSH
12090 		if (next == 0)
12091 		    xmlGenericError(xmlGenericErrorContext,
12092 			    "PP: lookup of int subset end filed\n");
12093 #endif
12094 	        goto done;
12095 
12096 found_end_int_subset:
12097                 ctxt->checkIndex = 0;
12098 		xmlParseInternalSubset(ctxt);
12099 		if (ctxt->instate == XML_PARSER_EOF)
12100 		    goto done;
12101 		ctxt->inSubset = 2;
12102 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12103 		    (ctxt->sax->externalSubset != NULL))
12104 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12105 			    ctxt->extSubSystem, ctxt->extSubURI);
12106 		ctxt->inSubset = 0;
12107 		xmlCleanSpecialAttr(ctxt);
12108 		if (ctxt->instate == XML_PARSER_EOF)
12109 		    goto done;
12110 		ctxt->instate = XML_PARSER_PROLOG;
12111 		ctxt->checkIndex = 0;
12112 #ifdef DEBUG_PUSH
12113 		xmlGenericError(xmlGenericErrorContext,
12114 			"PP: entering PROLOG\n");
12115 #endif
12116                 break;
12117 	    }
12118             case XML_PARSER_COMMENT:
12119 		xmlGenericError(xmlGenericErrorContext,
12120 			"PP: internal error, state == COMMENT\n");
12121 		ctxt->instate = XML_PARSER_CONTENT;
12122 #ifdef DEBUG_PUSH
12123 		xmlGenericError(xmlGenericErrorContext,
12124 			"PP: entering CONTENT\n");
12125 #endif
12126 		break;
12127             case XML_PARSER_IGNORE:
12128 		xmlGenericError(xmlGenericErrorContext,
12129 			"PP: internal error, state == IGNORE");
12130 	        ctxt->instate = XML_PARSER_DTD;
12131 #ifdef DEBUG_PUSH
12132 		xmlGenericError(xmlGenericErrorContext,
12133 			"PP: entering DTD\n");
12134 #endif
12135 	        break;
12136             case XML_PARSER_PI:
12137 		xmlGenericError(xmlGenericErrorContext,
12138 			"PP: internal error, state == PI\n");
12139 		ctxt->instate = XML_PARSER_CONTENT;
12140 #ifdef DEBUG_PUSH
12141 		xmlGenericError(xmlGenericErrorContext,
12142 			"PP: entering CONTENT\n");
12143 #endif
12144 		break;
12145             case XML_PARSER_ENTITY_DECL:
12146 		xmlGenericError(xmlGenericErrorContext,
12147 			"PP: internal error, state == ENTITY_DECL\n");
12148 		ctxt->instate = XML_PARSER_DTD;
12149 #ifdef DEBUG_PUSH
12150 		xmlGenericError(xmlGenericErrorContext,
12151 			"PP: entering DTD\n");
12152 #endif
12153 		break;
12154             case XML_PARSER_ENTITY_VALUE:
12155 		xmlGenericError(xmlGenericErrorContext,
12156 			"PP: internal error, state == ENTITY_VALUE\n");
12157 		ctxt->instate = XML_PARSER_CONTENT;
12158 #ifdef DEBUG_PUSH
12159 		xmlGenericError(xmlGenericErrorContext,
12160 			"PP: entering DTD\n");
12161 #endif
12162 		break;
12163             case XML_PARSER_ATTRIBUTE_VALUE:
12164 		xmlGenericError(xmlGenericErrorContext,
12165 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
12166 		ctxt->instate = XML_PARSER_START_TAG;
12167 #ifdef DEBUG_PUSH
12168 		xmlGenericError(xmlGenericErrorContext,
12169 			"PP: entering START_TAG\n");
12170 #endif
12171 		break;
12172             case XML_PARSER_SYSTEM_LITERAL:
12173 		xmlGenericError(xmlGenericErrorContext,
12174 			"PP: internal error, state == SYSTEM_LITERAL\n");
12175 		ctxt->instate = XML_PARSER_START_TAG;
12176 #ifdef DEBUG_PUSH
12177 		xmlGenericError(xmlGenericErrorContext,
12178 			"PP: entering START_TAG\n");
12179 #endif
12180 		break;
12181             case XML_PARSER_PUBLIC_LITERAL:
12182 		xmlGenericError(xmlGenericErrorContext,
12183 			"PP: internal error, state == PUBLIC_LITERAL\n");
12184 		ctxt->instate = XML_PARSER_START_TAG;
12185 #ifdef DEBUG_PUSH
12186 		xmlGenericError(xmlGenericErrorContext,
12187 			"PP: entering START_TAG\n");
12188 #endif
12189 		break;
12190 	}
12191     }
12192 done:
12193 #ifdef DEBUG_PUSH
12194     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12195 #endif
12196     return(ret);
12197 encoding_error:
12198     {
12199         char buffer[150];
12200 
12201 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12202 			ctxt->input->cur[0], ctxt->input->cur[1],
12203 			ctxt->input->cur[2], ctxt->input->cur[3]);
12204 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12205 		     "Input is not proper UTF-8, indicate encoding !\n%s",
12206 		     BAD_CAST buffer, NULL);
12207     }
12208     return(0);
12209 }
12210 
12211 /**
12212  * xmlParseCheckTransition:
12213  * @ctxt:  an XML parser context
12214  * @chunk:  a char array
12215  * @size:  the size in byte of the chunk
12216  *
12217  * Check depending on the current parser state if the chunk given must be
12218  * processed immediately or one need more data to advance on parsing.
12219  *
12220  * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12221  */
12222 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12223 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12224     if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12225         return(-1);
12226     if (ctxt->instate == XML_PARSER_START_TAG) {
12227         if (memchr(chunk, '>', size) != NULL)
12228             return(1);
12229         return(0);
12230     }
12231     if (ctxt->progressive == XML_PARSER_COMMENT) {
12232         if (memchr(chunk, '>', size) != NULL)
12233             return(1);
12234         return(0);
12235     }
12236     if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12237         if (memchr(chunk, '>', size) != NULL)
12238             return(1);
12239         return(0);
12240     }
12241     if (ctxt->progressive == XML_PARSER_PI) {
12242         if (memchr(chunk, '>', size) != NULL)
12243             return(1);
12244         return(0);
12245     }
12246     if (ctxt->instate == XML_PARSER_END_TAG) {
12247         if (memchr(chunk, '>', size) != NULL)
12248             return(1);
12249         return(0);
12250     }
12251     if ((ctxt->progressive == XML_PARSER_DTD) ||
12252         (ctxt->instate == XML_PARSER_DTD)) {
12253         if (memchr(chunk, '>', size) != NULL)
12254             return(1);
12255         return(0);
12256     }
12257     return(1);
12258 }
12259 
12260 /**
12261  * xmlParseChunk:
12262  * @ctxt:  an XML parser context
12263  * @chunk:  an char array
12264  * @size:  the size in byte of the chunk
12265  * @terminate:  last chunk indicator
12266  *
12267  * Parse a Chunk of memory
12268  *
12269  * Returns zero if no error, the xmlParserErrors otherwise.
12270  */
12271 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12272 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12273               int terminate) {
12274     int end_in_lf = 0;
12275     int remain = 0;
12276     size_t old_avail = 0;
12277     size_t avail = 0;
12278 
12279     if (ctxt == NULL)
12280         return(XML_ERR_INTERNAL_ERROR);
12281     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12282         return(ctxt->errNo);
12283     if (ctxt->instate == XML_PARSER_EOF)
12284         return(-1);
12285     if (ctxt->instate == XML_PARSER_START)
12286         xmlDetectSAX2(ctxt);
12287     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12288         (chunk[size - 1] == '\r')) {
12289 	end_in_lf = 1;
12290 	size--;
12291     }
12292 
12293 xmldecl_done:
12294 
12295     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12296         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12297 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12298 	size_t cur = ctxt->input->cur - ctxt->input->base;
12299 	int res;
12300 
12301         old_avail = xmlBufUse(ctxt->input->buf->buffer);
12302         /*
12303          * Specific handling if we autodetected an encoding, we should not
12304          * push more than the first line ... which depend on the encoding
12305          * And only push the rest once the final encoding was detected
12306          */
12307         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12308             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12309             unsigned int len = 45;
12310 
12311             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12312                                BAD_CAST "UTF-16")) ||
12313                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12314                                BAD_CAST "UTF16")))
12315                 len = 90;
12316             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12317                                     BAD_CAST "UCS-4")) ||
12318                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12319                                     BAD_CAST "UCS4")))
12320                 len = 180;
12321 
12322             if (ctxt->input->buf->rawconsumed < len)
12323                 len -= ctxt->input->buf->rawconsumed;
12324 
12325             /*
12326              * Change size for reading the initial declaration only
12327              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12328              * will blindly copy extra bytes from memory.
12329              */
12330             if ((unsigned int) size > len) {
12331                 remain = size - len;
12332                 size = len;
12333             } else {
12334                 remain = 0;
12335             }
12336         }
12337 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12338 	if (res < 0) {
12339 	    ctxt->errNo = XML_PARSER_EOF;
12340 	    ctxt->disableSAX = 1;
12341 	    return (XML_PARSER_EOF);
12342 	}
12343         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12344 #ifdef DEBUG_PUSH
12345 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12346 #endif
12347 
12348     } else if (ctxt->instate != XML_PARSER_EOF) {
12349 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12350 	    xmlParserInputBufferPtr in = ctxt->input->buf;
12351 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
12352 		    (in->raw != NULL)) {
12353 		int nbchars;
12354 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12355 		size_t current = ctxt->input->cur - ctxt->input->base;
12356 
12357 		nbchars = xmlCharEncInput(in, terminate);
12358 		if (nbchars < 0) {
12359 		    /* TODO 2.6.0 */
12360 		    xmlGenericError(xmlGenericErrorContext,
12361 				    "xmlParseChunk: encoder error\n");
12362 		    return(XML_ERR_INVALID_ENCODING);
12363 		}
12364 		xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12365 	    }
12366 	}
12367     }
12368     if (remain != 0) {
12369         xmlParseTryOrFinish(ctxt, 0);
12370     } else {
12371         if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12372             avail = xmlBufUse(ctxt->input->buf->buffer);
12373         /*
12374          * Depending on the current state it may not be such
12375          * a good idea to try parsing if there is nothing in the chunk
12376          * which would be worth doing a parser state transition and we
12377          * need to wait for more data
12378          */
12379         if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12380             (old_avail == 0) || (avail == 0) ||
12381             (xmlParseCheckTransition(ctxt,
12382                        (const char *)&ctxt->input->base[old_avail],
12383                                      avail - old_avail)))
12384             xmlParseTryOrFinish(ctxt, terminate);
12385     }
12386     if (ctxt->instate == XML_PARSER_EOF)
12387         return(ctxt->errNo);
12388 
12389     if ((ctxt->input != NULL) &&
12390          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12391          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12392         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12393         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12394         ctxt->instate = XML_PARSER_EOF;
12395     }
12396     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12397         return(ctxt->errNo);
12398 
12399     if (remain != 0) {
12400         chunk += size;
12401         size = remain;
12402         remain = 0;
12403         goto xmldecl_done;
12404     }
12405     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12406         (ctxt->input->buf != NULL)) {
12407 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12408 					 ctxt->input);
12409 	size_t current = ctxt->input->cur - ctxt->input->base;
12410 
12411 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12412 
12413 	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12414 			      base, current);
12415     }
12416     if (terminate) {
12417 	/*
12418 	 * Check for termination
12419 	 */
12420 	int cur_avail = 0;
12421 
12422 	if (ctxt->input != NULL) {
12423 	    if (ctxt->input->buf == NULL)
12424 		cur_avail = ctxt->input->length -
12425 			    (ctxt->input->cur - ctxt->input->base);
12426 	    else
12427 		cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12428 			              (ctxt->input->cur - ctxt->input->base);
12429 	}
12430 
12431 	if ((ctxt->instate != XML_PARSER_EOF) &&
12432 	    (ctxt->instate != XML_PARSER_EPILOG)) {
12433 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12434 	}
12435 	if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12436 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12437 	}
12438 	if (ctxt->instate != XML_PARSER_EOF) {
12439 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12440 		ctxt->sax->endDocument(ctxt->userData);
12441 	}
12442 	ctxt->instate = XML_PARSER_EOF;
12443     }
12444     if (ctxt->wellFormed == 0)
12445 	return((xmlParserErrors) ctxt->errNo);
12446     else
12447         return(0);
12448 }
12449 
12450 /************************************************************************
12451  *									*
12452  *		I/O front end functions to the parser			*
12453  *									*
12454  ************************************************************************/
12455 
12456 /**
12457  * xmlCreatePushParserCtxt:
12458  * @sax:  a SAX handler
12459  * @user_data:  The user data returned on SAX callbacks
12460  * @chunk:  a pointer to an array of chars
12461  * @size:  number of chars in the array
12462  * @filename:  an optional file name or URI
12463  *
12464  * Create a parser context for using the XML parser in push mode.
12465  * If @buffer and @size are non-NULL, the data is used to detect
12466  * the encoding.  The remaining characters will be parsed so they
12467  * don't need to be fed in again through xmlParseChunk.
12468  * To allow content encoding detection, @size should be >= 4
12469  * The value of @filename is used for fetching external entities
12470  * and error/warning reports.
12471  *
12472  * Returns the new parser context or NULL
12473  */
12474 
12475 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12476 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12477                         const char *chunk, int size, const char *filename) {
12478     xmlParserCtxtPtr ctxt;
12479     xmlParserInputPtr inputStream;
12480     xmlParserInputBufferPtr buf;
12481     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12482 
12483     /*
12484      * plug some encoding conversion routines
12485      */
12486     if ((chunk != NULL) && (size >= 4))
12487 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12488 
12489     buf = xmlAllocParserInputBuffer(enc);
12490     if (buf == NULL) return(NULL);
12491 
12492     ctxt = xmlNewParserCtxt();
12493     if (ctxt == NULL) {
12494         xmlErrMemory(NULL, "creating parser: out of memory\n");
12495 	xmlFreeParserInputBuffer(buf);
12496 	return(NULL);
12497     }
12498     ctxt->dictNames = 1;
12499     ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12500     if (ctxt->pushTab == NULL) {
12501         xmlErrMemory(ctxt, NULL);
12502 	xmlFreeParserInputBuffer(buf);
12503 	xmlFreeParserCtxt(ctxt);
12504 	return(NULL);
12505     }
12506     if (sax != NULL) {
12507 #ifdef LIBXML_SAX1_ENABLED
12508 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12509 #endif /* LIBXML_SAX1_ENABLED */
12510 	    xmlFree(ctxt->sax);
12511 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12512 	if (ctxt->sax == NULL) {
12513 	    xmlErrMemory(ctxt, NULL);
12514 	    xmlFreeParserInputBuffer(buf);
12515 	    xmlFreeParserCtxt(ctxt);
12516 	    return(NULL);
12517 	}
12518 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12519 	if (sax->initialized == XML_SAX2_MAGIC)
12520 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12521 	else
12522 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12523 	if (user_data != NULL)
12524 	    ctxt->userData = user_data;
12525     }
12526     if (filename == NULL) {
12527 	ctxt->directory = NULL;
12528     } else {
12529         ctxt->directory = xmlParserGetDirectory(filename);
12530     }
12531 
12532     inputStream = xmlNewInputStream(ctxt);
12533     if (inputStream == NULL) {
12534 	xmlFreeParserCtxt(ctxt);
12535 	xmlFreeParserInputBuffer(buf);
12536 	return(NULL);
12537     }
12538 
12539     if (filename == NULL)
12540 	inputStream->filename = NULL;
12541     else {
12542 	inputStream->filename = (char *)
12543 	    xmlCanonicPath((const xmlChar *) filename);
12544 	if (inputStream->filename == NULL) {
12545 	    xmlFreeParserCtxt(ctxt);
12546 	    xmlFreeParserInputBuffer(buf);
12547 	    return(NULL);
12548 	}
12549     }
12550     inputStream->buf = buf;
12551     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12552     inputPush(ctxt, inputStream);
12553 
12554     /*
12555      * If the caller didn't provide an initial 'chunk' for determining
12556      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12557      * that it can be automatically determined later
12558      */
12559     if ((size == 0) || (chunk == NULL)) {
12560 	ctxt->charset = XML_CHAR_ENCODING_NONE;
12561     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12562 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12563 	size_t cur = ctxt->input->cur - ctxt->input->base;
12564 
12565 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12566 
12567         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12568 #ifdef DEBUG_PUSH
12569 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12570 #endif
12571     }
12572 
12573     if (enc != XML_CHAR_ENCODING_NONE) {
12574         xmlSwitchEncoding(ctxt, enc);
12575     }
12576 
12577     return(ctxt);
12578 }
12579 #endif /* LIBXML_PUSH_ENABLED */
12580 
12581 /**
12582  * xmlStopParser:
12583  * @ctxt:  an XML parser context
12584  *
12585  * Blocks further parser processing
12586  */
12587 void
xmlStopParser(xmlParserCtxtPtr ctxt)12588 xmlStopParser(xmlParserCtxtPtr ctxt) {
12589     if (ctxt == NULL)
12590         return;
12591     ctxt->instate = XML_PARSER_EOF;
12592     ctxt->errNo = XML_ERR_USER_STOP;
12593     ctxt->disableSAX = 1;
12594     if (ctxt->input != NULL) {
12595 	ctxt->input->cur = BAD_CAST"";
12596 	ctxt->input->base = ctxt->input->cur;
12597     }
12598 }
12599 
12600 /**
12601  * xmlCreateIOParserCtxt:
12602  * @sax:  a SAX handler
12603  * @user_data:  The user data returned on SAX callbacks
12604  * @ioread:  an I/O read function
12605  * @ioclose:  an I/O close function
12606  * @ioctx:  an I/O handler
12607  * @enc:  the charset encoding if known
12608  *
12609  * Create a parser context for using the XML parser with an existing
12610  * I/O stream
12611  *
12612  * Returns the new parser context or NULL
12613  */
12614 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12615 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12616 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12617 	void *ioctx, xmlCharEncoding enc) {
12618     xmlParserCtxtPtr ctxt;
12619     xmlParserInputPtr inputStream;
12620     xmlParserInputBufferPtr buf;
12621 
12622     if (ioread == NULL) return(NULL);
12623 
12624     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12625     if (buf == NULL) {
12626         if (ioclose != NULL)
12627             ioclose(ioctx);
12628         return (NULL);
12629     }
12630 
12631     ctxt = xmlNewParserCtxt();
12632     if (ctxt == NULL) {
12633 	xmlFreeParserInputBuffer(buf);
12634 	return(NULL);
12635     }
12636     if (sax != NULL) {
12637 #ifdef LIBXML_SAX1_ENABLED
12638 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12639 #endif /* LIBXML_SAX1_ENABLED */
12640 	    xmlFree(ctxt->sax);
12641 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12642 	if (ctxt->sax == NULL) {
12643 	    xmlErrMemory(ctxt, NULL);
12644 	    xmlFreeParserCtxt(ctxt);
12645 	    return(NULL);
12646 	}
12647 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12648 	if (sax->initialized == XML_SAX2_MAGIC)
12649 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12650 	else
12651 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12652 	if (user_data != NULL)
12653 	    ctxt->userData = user_data;
12654     }
12655 
12656     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12657     if (inputStream == NULL) {
12658 	xmlFreeParserCtxt(ctxt);
12659 	return(NULL);
12660     }
12661     inputPush(ctxt, inputStream);
12662 
12663     return(ctxt);
12664 }
12665 
12666 #ifdef LIBXML_VALID_ENABLED
12667 /************************************************************************
12668  *									*
12669  *		Front ends when parsing a DTD				*
12670  *									*
12671  ************************************************************************/
12672 
12673 /**
12674  * xmlIOParseDTD:
12675  * @sax:  the SAX handler block or NULL
12676  * @input:  an Input Buffer
12677  * @enc:  the charset encoding if known
12678  *
12679  * Load and parse a DTD
12680  *
12681  * Returns the resulting xmlDtdPtr or NULL in case of error.
12682  * @input will be freed by the function in any case.
12683  */
12684 
12685 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12686 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12687 	      xmlCharEncoding enc) {
12688     xmlDtdPtr ret = NULL;
12689     xmlParserCtxtPtr ctxt;
12690     xmlParserInputPtr pinput = NULL;
12691     xmlChar start[4];
12692 
12693     if (input == NULL)
12694 	return(NULL);
12695 
12696     ctxt = xmlNewParserCtxt();
12697     if (ctxt == NULL) {
12698         xmlFreeParserInputBuffer(input);
12699 	return(NULL);
12700     }
12701 
12702     /* We are loading a DTD */
12703     ctxt->options |= XML_PARSE_DTDLOAD;
12704 
12705     /*
12706      * Set-up the SAX context
12707      */
12708     if (sax != NULL) {
12709 	if (ctxt->sax != NULL)
12710 	    xmlFree(ctxt->sax);
12711         ctxt->sax = sax;
12712         ctxt->userData = ctxt;
12713     }
12714     xmlDetectSAX2(ctxt);
12715 
12716     /*
12717      * generate a parser input from the I/O handler
12718      */
12719 
12720     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12721     if (pinput == NULL) {
12722         if (sax != NULL) ctxt->sax = NULL;
12723         xmlFreeParserInputBuffer(input);
12724 	xmlFreeParserCtxt(ctxt);
12725 	return(NULL);
12726     }
12727 
12728     /*
12729      * plug some encoding conversion routines here.
12730      */
12731     if (xmlPushInput(ctxt, pinput) < 0) {
12732         if (sax != NULL) ctxt->sax = NULL;
12733 	xmlFreeParserCtxt(ctxt);
12734 	return(NULL);
12735     }
12736     if (enc != XML_CHAR_ENCODING_NONE) {
12737         xmlSwitchEncoding(ctxt, enc);
12738     }
12739 
12740     pinput->filename = NULL;
12741     pinput->line = 1;
12742     pinput->col = 1;
12743     pinput->base = ctxt->input->cur;
12744     pinput->cur = ctxt->input->cur;
12745     pinput->free = NULL;
12746 
12747     /*
12748      * let's parse that entity knowing it's an external subset.
12749      */
12750     ctxt->inSubset = 2;
12751     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12752     if (ctxt->myDoc == NULL) {
12753 	xmlErrMemory(ctxt, "New Doc failed");
12754 	return(NULL);
12755     }
12756     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12757     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12758 	                               BAD_CAST "none", BAD_CAST "none");
12759 
12760     if ((enc == XML_CHAR_ENCODING_NONE) &&
12761         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12762 	/*
12763 	 * Get the 4 first bytes and decode the charset
12764 	 * if enc != XML_CHAR_ENCODING_NONE
12765 	 * plug some encoding conversion routines.
12766 	 */
12767 	start[0] = RAW;
12768 	start[1] = NXT(1);
12769 	start[2] = NXT(2);
12770 	start[3] = NXT(3);
12771 	enc = xmlDetectCharEncoding(start, 4);
12772 	if (enc != XML_CHAR_ENCODING_NONE) {
12773 	    xmlSwitchEncoding(ctxt, enc);
12774 	}
12775     }
12776 
12777     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12778 
12779     if (ctxt->myDoc != NULL) {
12780 	if (ctxt->wellFormed) {
12781 	    ret = ctxt->myDoc->extSubset;
12782 	    ctxt->myDoc->extSubset = NULL;
12783 	    if (ret != NULL) {
12784 		xmlNodePtr tmp;
12785 
12786 		ret->doc = NULL;
12787 		tmp = ret->children;
12788 		while (tmp != NULL) {
12789 		    tmp->doc = NULL;
12790 		    tmp = tmp->next;
12791 		}
12792 	    }
12793 	} else {
12794 	    ret = NULL;
12795 	}
12796         xmlFreeDoc(ctxt->myDoc);
12797         ctxt->myDoc = NULL;
12798     }
12799     if (sax != NULL) ctxt->sax = NULL;
12800     xmlFreeParserCtxt(ctxt);
12801 
12802     return(ret);
12803 }
12804 
12805 /**
12806  * xmlSAXParseDTD:
12807  * @sax:  the SAX handler block
12808  * @ExternalID:  a NAME* containing the External ID of the DTD
12809  * @SystemID:  a NAME* containing the URL to the DTD
12810  *
12811  * Load and parse an external subset.
12812  *
12813  * Returns the resulting xmlDtdPtr or NULL in case of error.
12814  */
12815 
12816 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12817 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12818                           const xmlChar *SystemID) {
12819     xmlDtdPtr ret = NULL;
12820     xmlParserCtxtPtr ctxt;
12821     xmlParserInputPtr input = NULL;
12822     xmlCharEncoding enc;
12823     xmlChar* systemIdCanonic;
12824 
12825     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12826 
12827     ctxt = xmlNewParserCtxt();
12828     if (ctxt == NULL) {
12829 	return(NULL);
12830     }
12831 
12832     /* We are loading a DTD */
12833     ctxt->options |= XML_PARSE_DTDLOAD;
12834 
12835     /*
12836      * Set-up the SAX context
12837      */
12838     if (sax != NULL) {
12839 	if (ctxt->sax != NULL)
12840 	    xmlFree(ctxt->sax);
12841         ctxt->sax = sax;
12842         ctxt->userData = ctxt;
12843     }
12844 
12845     /*
12846      * Canonicalise the system ID
12847      */
12848     systemIdCanonic = xmlCanonicPath(SystemID);
12849     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12850 	xmlFreeParserCtxt(ctxt);
12851 	return(NULL);
12852     }
12853 
12854     /*
12855      * Ask the Entity resolver to load the damn thing
12856      */
12857 
12858     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12859 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12860 	                                 systemIdCanonic);
12861     if (input == NULL) {
12862         if (sax != NULL) ctxt->sax = NULL;
12863 	xmlFreeParserCtxt(ctxt);
12864 	if (systemIdCanonic != NULL)
12865 	    xmlFree(systemIdCanonic);
12866 	return(NULL);
12867     }
12868 
12869     /*
12870      * plug some encoding conversion routines here.
12871      */
12872     if (xmlPushInput(ctxt, input) < 0) {
12873         if (sax != NULL) ctxt->sax = NULL;
12874 	xmlFreeParserCtxt(ctxt);
12875 	if (systemIdCanonic != NULL)
12876 	    xmlFree(systemIdCanonic);
12877 	return(NULL);
12878     }
12879     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12880 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12881 	xmlSwitchEncoding(ctxt, enc);
12882     }
12883 
12884     if (input->filename == NULL)
12885 	input->filename = (char *) systemIdCanonic;
12886     else
12887 	xmlFree(systemIdCanonic);
12888     input->line = 1;
12889     input->col = 1;
12890     input->base = ctxt->input->cur;
12891     input->cur = ctxt->input->cur;
12892     input->free = NULL;
12893 
12894     /*
12895      * let's parse that entity knowing it's an external subset.
12896      */
12897     ctxt->inSubset = 2;
12898     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12899     if (ctxt->myDoc == NULL) {
12900 	xmlErrMemory(ctxt, "New Doc failed");
12901         if (sax != NULL) ctxt->sax = NULL;
12902 	xmlFreeParserCtxt(ctxt);
12903 	return(NULL);
12904     }
12905     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12906     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12907 	                               ExternalID, SystemID);
12908     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12909 
12910     if (ctxt->myDoc != NULL) {
12911 	if (ctxt->wellFormed) {
12912 	    ret = ctxt->myDoc->extSubset;
12913 	    ctxt->myDoc->extSubset = NULL;
12914 	    if (ret != NULL) {
12915 		xmlNodePtr tmp;
12916 
12917 		ret->doc = NULL;
12918 		tmp = ret->children;
12919 		while (tmp != NULL) {
12920 		    tmp->doc = NULL;
12921 		    tmp = tmp->next;
12922 		}
12923 	    }
12924 	} else {
12925 	    ret = NULL;
12926 	}
12927         xmlFreeDoc(ctxt->myDoc);
12928         ctxt->myDoc = NULL;
12929     }
12930     if (sax != NULL) ctxt->sax = NULL;
12931     xmlFreeParserCtxt(ctxt);
12932 
12933     return(ret);
12934 }
12935 
12936 
12937 /**
12938  * xmlParseDTD:
12939  * @ExternalID:  a NAME* containing the External ID of the DTD
12940  * @SystemID:  a NAME* containing the URL to the DTD
12941  *
12942  * Load and parse an external subset.
12943  *
12944  * Returns the resulting xmlDtdPtr or NULL in case of error.
12945  */
12946 
12947 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12948 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12949     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12950 }
12951 #endif /* LIBXML_VALID_ENABLED */
12952 
12953 /************************************************************************
12954  *									*
12955  *		Front ends when parsing an Entity			*
12956  *									*
12957  ************************************************************************/
12958 
12959 /**
12960  * xmlParseCtxtExternalEntity:
12961  * @ctx:  the existing parsing context
12962  * @URL:  the URL for the entity to load
12963  * @ID:  the System ID for the entity to load
12964  * @lst:  the return value for the set of parsed nodes
12965  *
12966  * Parse an external general entity within an existing parsing context
12967  * An external general parsed entity is well-formed if it matches the
12968  * production labeled extParsedEnt.
12969  *
12970  * [78] extParsedEnt ::= TextDecl? content
12971  *
12972  * Returns 0 if the entity is well formed, -1 in case of args problem and
12973  *    the parser error code otherwise
12974  */
12975 
12976 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12977 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12978 	               const xmlChar *ID, xmlNodePtr *lst) {
12979     xmlParserCtxtPtr ctxt;
12980     xmlDocPtr newDoc;
12981     xmlNodePtr newRoot;
12982     xmlSAXHandlerPtr oldsax = NULL;
12983     int ret = 0;
12984     xmlChar start[4];
12985     xmlCharEncoding enc;
12986 
12987     if (ctx == NULL) return(-1);
12988 
12989     if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12990         (ctx->depth > 1024)) {
12991 	return(XML_ERR_ENTITY_LOOP);
12992     }
12993 
12994     if (lst != NULL)
12995         *lst = NULL;
12996     if ((URL == NULL) && (ID == NULL))
12997 	return(-1);
12998     if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12999 	return(-1);
13000 
13001     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
13002     if (ctxt == NULL) {
13003 	return(-1);
13004     }
13005 
13006     oldsax = ctxt->sax;
13007     ctxt->sax = ctx->sax;
13008     xmlDetectSAX2(ctxt);
13009     newDoc = xmlNewDoc(BAD_CAST "1.0");
13010     if (newDoc == NULL) {
13011 	xmlFreeParserCtxt(ctxt);
13012 	return(-1);
13013     }
13014     newDoc->properties = XML_DOC_INTERNAL;
13015     if (ctx->myDoc->dict) {
13016 	newDoc->dict = ctx->myDoc->dict;
13017 	xmlDictReference(newDoc->dict);
13018     }
13019     if (ctx->myDoc != NULL) {
13020 	newDoc->intSubset = ctx->myDoc->intSubset;
13021 	newDoc->extSubset = ctx->myDoc->extSubset;
13022     }
13023     if (ctx->myDoc->URL != NULL) {
13024 	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13025     }
13026     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13027     if (newRoot == NULL) {
13028 	ctxt->sax = oldsax;
13029 	xmlFreeParserCtxt(ctxt);
13030 	newDoc->intSubset = NULL;
13031 	newDoc->extSubset = NULL;
13032         xmlFreeDoc(newDoc);
13033 	return(-1);
13034     }
13035     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13036     nodePush(ctxt, newDoc->children);
13037     if (ctx->myDoc == NULL) {
13038 	ctxt->myDoc = newDoc;
13039     } else {
13040 	ctxt->myDoc = ctx->myDoc;
13041 	newDoc->children->doc = ctx->myDoc;
13042     }
13043 
13044     /*
13045      * Get the 4 first bytes and decode the charset
13046      * if enc != XML_CHAR_ENCODING_NONE
13047      * plug some encoding conversion routines.
13048      */
13049     GROW
13050     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13051 	start[0] = RAW;
13052 	start[1] = NXT(1);
13053 	start[2] = NXT(2);
13054 	start[3] = NXT(3);
13055 	enc = xmlDetectCharEncoding(start, 4);
13056 	if (enc != XML_CHAR_ENCODING_NONE) {
13057 	    xmlSwitchEncoding(ctxt, enc);
13058 	}
13059     }
13060 
13061     /*
13062      * Parse a possible text declaration first
13063      */
13064     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13065 	xmlParseTextDecl(ctxt);
13066 	/*
13067 	 * An XML-1.0 document can't reference an entity not XML-1.0
13068 	 */
13069 	if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13070 	    (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13071 	    xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13072 	                   "Version mismatch between document and entity\n");
13073 	}
13074     }
13075 
13076     /*
13077      * If the user provided its own SAX callbacks then reuse the
13078      * useData callback field, otherwise the expected setup in a
13079      * DOM builder is to have userData == ctxt
13080      */
13081     if (ctx->userData == ctx)
13082         ctxt->userData = ctxt;
13083     else
13084         ctxt->userData = ctx->userData;
13085 
13086     /*
13087      * Doing validity checking on chunk doesn't make sense
13088      */
13089     ctxt->instate = XML_PARSER_CONTENT;
13090     ctxt->validate = ctx->validate;
13091     ctxt->valid = ctx->valid;
13092     ctxt->loadsubset = ctx->loadsubset;
13093     ctxt->depth = ctx->depth + 1;
13094     ctxt->replaceEntities = ctx->replaceEntities;
13095     if (ctxt->validate) {
13096 	ctxt->vctxt.error = ctx->vctxt.error;
13097 	ctxt->vctxt.warning = ctx->vctxt.warning;
13098     } else {
13099 	ctxt->vctxt.error = NULL;
13100 	ctxt->vctxt.warning = NULL;
13101     }
13102     ctxt->vctxt.nodeTab = NULL;
13103     ctxt->vctxt.nodeNr = 0;
13104     ctxt->vctxt.nodeMax = 0;
13105     ctxt->vctxt.node = NULL;
13106     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13107     ctxt->dict = ctx->dict;
13108     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13109     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13110     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13111     ctxt->dictNames = ctx->dictNames;
13112     ctxt->attsDefault = ctx->attsDefault;
13113     ctxt->attsSpecial = ctx->attsSpecial;
13114     ctxt->linenumbers = ctx->linenumbers;
13115 
13116     xmlParseContent(ctxt);
13117 
13118     ctx->validate = ctxt->validate;
13119     ctx->valid = ctxt->valid;
13120     if ((RAW == '<') && (NXT(1) == '/')) {
13121 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13122     } else if (RAW != 0) {
13123 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13124     }
13125     if (ctxt->node != newDoc->children) {
13126 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13127     }
13128 
13129     if (!ctxt->wellFormed) {
13130         if (ctxt->errNo == 0)
13131 	    ret = 1;
13132 	else
13133 	    ret = ctxt->errNo;
13134     } else {
13135 	if (lst != NULL) {
13136 	    xmlNodePtr cur;
13137 
13138 	    /*
13139 	     * Return the newly created nodeset after unlinking it from
13140 	     * they pseudo parent.
13141 	     */
13142 	    cur = newDoc->children->children;
13143 	    *lst = cur;
13144 	    while (cur != NULL) {
13145 		cur->parent = NULL;
13146 		cur = cur->next;
13147 	    }
13148             newDoc->children->children = NULL;
13149 	}
13150 	ret = 0;
13151     }
13152     ctxt->sax = oldsax;
13153     ctxt->dict = NULL;
13154     ctxt->attsDefault = NULL;
13155     ctxt->attsSpecial = NULL;
13156     xmlFreeParserCtxt(ctxt);
13157     newDoc->intSubset = NULL;
13158     newDoc->extSubset = NULL;
13159     xmlFreeDoc(newDoc);
13160 
13161     return(ret);
13162 }
13163 
13164 /**
13165  * xmlParseExternalEntityPrivate:
13166  * @doc:  the document the chunk pertains to
13167  * @oldctxt:  the previous parser context if available
13168  * @sax:  the SAX handler bloc (possibly NULL)
13169  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13170  * @depth:  Used for loop detection, use 0
13171  * @URL:  the URL for the entity to load
13172  * @ID:  the System ID for the entity to load
13173  * @list:  the return value for the set of parsed nodes
13174  *
13175  * Private version of xmlParseExternalEntity()
13176  *
13177  * Returns 0 if the entity is well formed, -1 in case of args problem and
13178  *    the parser error code otherwise
13179  */
13180 
13181 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13182 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13183 	              xmlSAXHandlerPtr sax,
13184 		      void *user_data, int depth, const xmlChar *URL,
13185 		      const xmlChar *ID, xmlNodePtr *list) {
13186     xmlParserCtxtPtr ctxt;
13187     xmlDocPtr newDoc;
13188     xmlNodePtr newRoot;
13189     xmlSAXHandlerPtr oldsax = NULL;
13190     xmlParserErrors ret = XML_ERR_OK;
13191     xmlChar start[4];
13192     xmlCharEncoding enc;
13193 
13194     if (((depth > 40) &&
13195 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13196 	(depth > 1024)) {
13197 	return(XML_ERR_ENTITY_LOOP);
13198     }
13199 
13200     if (list != NULL)
13201         *list = NULL;
13202     if ((URL == NULL) && (ID == NULL))
13203 	return(XML_ERR_INTERNAL_ERROR);
13204     if (doc == NULL)
13205 	return(XML_ERR_INTERNAL_ERROR);
13206 
13207 
13208     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13209     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13210     ctxt->userData = ctxt;
13211     if (oldctxt != NULL) {
13212 	ctxt->_private = oldctxt->_private;
13213 	ctxt->loadsubset = oldctxt->loadsubset;
13214 	ctxt->validate = oldctxt->validate;
13215 	ctxt->external = oldctxt->external;
13216 	ctxt->record_info = oldctxt->record_info;
13217 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13218 	ctxt->node_seq.length = oldctxt->node_seq.length;
13219 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13220     } else {
13221 	/*
13222 	 * Doing validity checking on chunk without context
13223 	 * doesn't make sense
13224 	 */
13225 	ctxt->_private = NULL;
13226 	ctxt->validate = 0;
13227 	ctxt->external = 2;
13228 	ctxt->loadsubset = 0;
13229     }
13230     if (sax != NULL) {
13231 	oldsax = ctxt->sax;
13232         ctxt->sax = sax;
13233 	if (user_data != NULL)
13234 	    ctxt->userData = user_data;
13235     }
13236     xmlDetectSAX2(ctxt);
13237     newDoc = xmlNewDoc(BAD_CAST "1.0");
13238     if (newDoc == NULL) {
13239 	ctxt->node_seq.maximum = 0;
13240 	ctxt->node_seq.length = 0;
13241 	ctxt->node_seq.buffer = NULL;
13242 	xmlFreeParserCtxt(ctxt);
13243 	return(XML_ERR_INTERNAL_ERROR);
13244     }
13245     newDoc->properties = XML_DOC_INTERNAL;
13246     newDoc->intSubset = doc->intSubset;
13247     newDoc->extSubset = doc->extSubset;
13248     newDoc->dict = doc->dict;
13249     xmlDictReference(newDoc->dict);
13250 
13251     if (doc->URL != NULL) {
13252 	newDoc->URL = xmlStrdup(doc->URL);
13253     }
13254     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13255     if (newRoot == NULL) {
13256 	if (sax != NULL)
13257 	    ctxt->sax = oldsax;
13258 	ctxt->node_seq.maximum = 0;
13259 	ctxt->node_seq.length = 0;
13260 	ctxt->node_seq.buffer = NULL;
13261 	xmlFreeParserCtxt(ctxt);
13262 	newDoc->intSubset = NULL;
13263 	newDoc->extSubset = NULL;
13264         xmlFreeDoc(newDoc);
13265 	return(XML_ERR_INTERNAL_ERROR);
13266     }
13267     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13268     nodePush(ctxt, newDoc->children);
13269     ctxt->myDoc = doc;
13270     newRoot->doc = doc;
13271 
13272     /*
13273      * Get the 4 first bytes and decode the charset
13274      * if enc != XML_CHAR_ENCODING_NONE
13275      * plug some encoding conversion routines.
13276      */
13277     GROW;
13278     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13279 	start[0] = RAW;
13280 	start[1] = NXT(1);
13281 	start[2] = NXT(2);
13282 	start[3] = NXT(3);
13283 	enc = xmlDetectCharEncoding(start, 4);
13284 	if (enc != XML_CHAR_ENCODING_NONE) {
13285 	    xmlSwitchEncoding(ctxt, enc);
13286 	}
13287     }
13288 
13289     /*
13290      * Parse a possible text declaration first
13291      */
13292     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13293 	xmlParseTextDecl(ctxt);
13294     }
13295 
13296     ctxt->instate = XML_PARSER_CONTENT;
13297     ctxt->depth = depth;
13298 
13299     xmlParseContent(ctxt);
13300 
13301     if ((RAW == '<') && (NXT(1) == '/')) {
13302 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13303     } else if (RAW != 0) {
13304 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13305     }
13306     if (ctxt->node != newDoc->children) {
13307 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13308     }
13309 
13310     if (!ctxt->wellFormed) {
13311         if (ctxt->errNo == 0)
13312 	    ret = XML_ERR_INTERNAL_ERROR;
13313 	else
13314 	    ret = (xmlParserErrors)ctxt->errNo;
13315     } else {
13316 	if (list != NULL) {
13317 	    xmlNodePtr cur;
13318 
13319 	    /*
13320 	     * Return the newly created nodeset after unlinking it from
13321 	     * they pseudo parent.
13322 	     */
13323 	    cur = newDoc->children->children;
13324 	    *list = cur;
13325 	    while (cur != NULL) {
13326 		cur->parent = NULL;
13327 		cur = cur->next;
13328 	    }
13329             newDoc->children->children = NULL;
13330 	}
13331 	ret = XML_ERR_OK;
13332     }
13333 
13334     /*
13335      * Record in the parent context the number of entities replacement
13336      * done when parsing that reference.
13337      */
13338     if (oldctxt != NULL)
13339         oldctxt->nbentities += ctxt->nbentities;
13340 
13341     /*
13342      * Also record the size of the entity parsed
13343      */
13344     if (ctxt->input != NULL) {
13345 	oldctxt->sizeentities += ctxt->input->consumed;
13346 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13347     }
13348     /*
13349      * And record the last error if any
13350      */
13351     if (ctxt->lastError.code != XML_ERR_OK)
13352         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13353 
13354     if (sax != NULL)
13355 	ctxt->sax = oldsax;
13356     oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13357     oldctxt->node_seq.length = ctxt->node_seq.length;
13358     oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13359     ctxt->node_seq.maximum = 0;
13360     ctxt->node_seq.length = 0;
13361     ctxt->node_seq.buffer = NULL;
13362     xmlFreeParserCtxt(ctxt);
13363     newDoc->intSubset = NULL;
13364     newDoc->extSubset = NULL;
13365     xmlFreeDoc(newDoc);
13366 
13367     return(ret);
13368 }
13369 
13370 #ifdef LIBXML_SAX1_ENABLED
13371 /**
13372  * xmlParseExternalEntity:
13373  * @doc:  the document the chunk pertains to
13374  * @sax:  the SAX handler bloc (possibly NULL)
13375  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13376  * @depth:  Used for loop detection, use 0
13377  * @URL:  the URL for the entity to load
13378  * @ID:  the System ID for the entity to load
13379  * @lst:  the return value for the set of parsed nodes
13380  *
13381  * Parse an external general entity
13382  * An external general parsed entity is well-formed if it matches the
13383  * production labeled extParsedEnt.
13384  *
13385  * [78] extParsedEnt ::= TextDecl? content
13386  *
13387  * Returns 0 if the entity is well formed, -1 in case of args problem and
13388  *    the parser error code otherwise
13389  */
13390 
13391 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13392 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13393 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13394     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13395 		                       ID, lst));
13396 }
13397 
13398 /**
13399  * xmlParseBalancedChunkMemory:
13400  * @doc:  the document the chunk pertains to
13401  * @sax:  the SAX handler bloc (possibly NULL)
13402  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13403  * @depth:  Used for loop detection, use 0
13404  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13405  * @lst:  the return value for the set of parsed nodes
13406  *
13407  * Parse a well-balanced chunk of an XML document
13408  * called by the parser
13409  * The allowed sequence for the Well Balanced Chunk is the one defined by
13410  * the content production in the XML grammar:
13411  *
13412  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13413  *
13414  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13415  *    the parser error code otherwise
13416  */
13417 
13418 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13419 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13420      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13421     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13422                                                 depth, string, lst, 0 );
13423 }
13424 #endif /* LIBXML_SAX1_ENABLED */
13425 
13426 /**
13427  * xmlParseBalancedChunkMemoryInternal:
13428  * @oldctxt:  the existing parsing context
13429  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13430  * @user_data:  the user data field for the parser context
13431  * @lst:  the return value for the set of parsed nodes
13432  *
13433  *
13434  * Parse a well-balanced chunk of an XML document
13435  * called by the parser
13436  * The allowed sequence for the Well Balanced Chunk is the one defined by
13437  * the content production in the XML grammar:
13438  *
13439  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13440  *
13441  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13442  * error code otherwise
13443  *
13444  * In case recover is set to 1, the nodelist will not be empty even if
13445  * the parsed chunk is not well balanced.
13446  */
13447 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13448 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13449 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13450     xmlParserCtxtPtr ctxt;
13451     xmlDocPtr newDoc = NULL;
13452     xmlNodePtr newRoot;
13453     xmlSAXHandlerPtr oldsax = NULL;
13454     xmlNodePtr content = NULL;
13455     xmlNodePtr last = NULL;
13456     int size;
13457     xmlParserErrors ret = XML_ERR_OK;
13458 #ifdef SAX2
13459     int i;
13460 #endif
13461 
13462     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13463         (oldctxt->depth >  1024)) {
13464 	return(XML_ERR_ENTITY_LOOP);
13465     }
13466 
13467 
13468     if (lst != NULL)
13469         *lst = NULL;
13470     if (string == NULL)
13471         return(XML_ERR_INTERNAL_ERROR);
13472 
13473     size = xmlStrlen(string);
13474 
13475     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13476     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13477     if (user_data != NULL)
13478 	ctxt->userData = user_data;
13479     else
13480 	ctxt->userData = ctxt;
13481     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13482     ctxt->dict = oldctxt->dict;
13483     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13484     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13485     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13486 
13487 #ifdef SAX2
13488     /* propagate namespaces down the entity */
13489     for (i = 0;i < oldctxt->nsNr;i += 2) {
13490         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13491     }
13492 #endif
13493 
13494     oldsax = ctxt->sax;
13495     ctxt->sax = oldctxt->sax;
13496     xmlDetectSAX2(ctxt);
13497     ctxt->replaceEntities = oldctxt->replaceEntities;
13498     ctxt->options = oldctxt->options;
13499 
13500     ctxt->_private = oldctxt->_private;
13501     if (oldctxt->myDoc == NULL) {
13502 	newDoc = xmlNewDoc(BAD_CAST "1.0");
13503 	if (newDoc == NULL) {
13504 	    ctxt->sax = oldsax;
13505 	    ctxt->dict = NULL;
13506 	    xmlFreeParserCtxt(ctxt);
13507 	    return(XML_ERR_INTERNAL_ERROR);
13508 	}
13509 	newDoc->properties = XML_DOC_INTERNAL;
13510 	newDoc->dict = ctxt->dict;
13511 	xmlDictReference(newDoc->dict);
13512 	ctxt->myDoc = newDoc;
13513     } else {
13514 	ctxt->myDoc = oldctxt->myDoc;
13515         content = ctxt->myDoc->children;
13516 	last = ctxt->myDoc->last;
13517     }
13518     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13519     if (newRoot == NULL) {
13520 	ctxt->sax = oldsax;
13521 	ctxt->dict = NULL;
13522 	xmlFreeParserCtxt(ctxt);
13523 	if (newDoc != NULL) {
13524 	    xmlFreeDoc(newDoc);
13525 	}
13526 	return(XML_ERR_INTERNAL_ERROR);
13527     }
13528     ctxt->myDoc->children = NULL;
13529     ctxt->myDoc->last = NULL;
13530     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13531     nodePush(ctxt, ctxt->myDoc->children);
13532     ctxt->instate = XML_PARSER_CONTENT;
13533     ctxt->depth = oldctxt->depth + 1;
13534 
13535     ctxt->validate = 0;
13536     ctxt->loadsubset = oldctxt->loadsubset;
13537     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13538 	/*
13539 	 * ID/IDREF registration will be done in xmlValidateElement below
13540 	 */
13541 	ctxt->loadsubset |= XML_SKIP_IDS;
13542     }
13543     ctxt->dictNames = oldctxt->dictNames;
13544     ctxt->attsDefault = oldctxt->attsDefault;
13545     ctxt->attsSpecial = oldctxt->attsSpecial;
13546 
13547     xmlParseContent(ctxt);
13548     if ((RAW == '<') && (NXT(1) == '/')) {
13549 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13550     } else if (RAW != 0) {
13551 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13552     }
13553     if (ctxt->node != ctxt->myDoc->children) {
13554 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13555     }
13556 
13557     if (!ctxt->wellFormed) {
13558         if (ctxt->errNo == 0)
13559 	    ret = XML_ERR_INTERNAL_ERROR;
13560 	else
13561 	    ret = (xmlParserErrors)ctxt->errNo;
13562     } else {
13563       ret = XML_ERR_OK;
13564     }
13565 
13566     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13567 	xmlNodePtr cur;
13568 
13569 	/*
13570 	 * Return the newly created nodeset after unlinking it from
13571 	 * they pseudo parent.
13572 	 */
13573 	cur = ctxt->myDoc->children->children;
13574 	*lst = cur;
13575 	while (cur != NULL) {
13576 #ifdef LIBXML_VALID_ENABLED
13577 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13578 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13579 		(cur->type == XML_ELEMENT_NODE)) {
13580 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13581 			oldctxt->myDoc, cur);
13582 	    }
13583 #endif /* LIBXML_VALID_ENABLED */
13584 	    cur->parent = NULL;
13585 	    cur = cur->next;
13586 	}
13587 	ctxt->myDoc->children->children = NULL;
13588     }
13589     if (ctxt->myDoc != NULL) {
13590 	xmlFreeNode(ctxt->myDoc->children);
13591         ctxt->myDoc->children = content;
13592         ctxt->myDoc->last = last;
13593     }
13594 
13595     /*
13596      * Record in the parent context the number of entities replacement
13597      * done when parsing that reference.
13598      */
13599     if (oldctxt != NULL)
13600         oldctxt->nbentities += ctxt->nbentities;
13601 
13602     /*
13603      * Also record the last error if any
13604      */
13605     if (ctxt->lastError.code != XML_ERR_OK)
13606         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13607 
13608     ctxt->sax = oldsax;
13609     ctxt->dict = NULL;
13610     ctxt->attsDefault = NULL;
13611     ctxt->attsSpecial = NULL;
13612     xmlFreeParserCtxt(ctxt);
13613     if (newDoc != NULL) {
13614 	xmlFreeDoc(newDoc);
13615     }
13616 
13617     return(ret);
13618 }
13619 
13620 /**
13621  * xmlParseInNodeContext:
13622  * @node:  the context node
13623  * @data:  the input string
13624  * @datalen:  the input string length in bytes
13625  * @options:  a combination of xmlParserOption
13626  * @lst:  the return value for the set of parsed nodes
13627  *
13628  * Parse a well-balanced chunk of an XML document
13629  * within the context (DTD, namespaces, etc ...) of the given node.
13630  *
13631  * The allowed sequence for the data is a Well Balanced Chunk defined by
13632  * the content production in the XML grammar:
13633  *
13634  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13635  *
13636  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13637  * error code otherwise
13638  */
13639 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13640 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13641                       int options, xmlNodePtr *lst) {
13642 #ifdef SAX2
13643     xmlParserCtxtPtr ctxt;
13644     xmlDocPtr doc = NULL;
13645     xmlNodePtr fake, cur;
13646     int nsnr = 0;
13647 
13648     xmlParserErrors ret = XML_ERR_OK;
13649 
13650     /*
13651      * check all input parameters, grab the document
13652      */
13653     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13654         return(XML_ERR_INTERNAL_ERROR);
13655     switch (node->type) {
13656         case XML_ELEMENT_NODE:
13657         case XML_ATTRIBUTE_NODE:
13658         case XML_TEXT_NODE:
13659         case XML_CDATA_SECTION_NODE:
13660         case XML_ENTITY_REF_NODE:
13661         case XML_PI_NODE:
13662         case XML_COMMENT_NODE:
13663         case XML_DOCUMENT_NODE:
13664         case XML_HTML_DOCUMENT_NODE:
13665 	    break;
13666 	default:
13667 	    return(XML_ERR_INTERNAL_ERROR);
13668 
13669     }
13670     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13671            (node->type != XML_DOCUMENT_NODE) &&
13672 	   (node->type != XML_HTML_DOCUMENT_NODE))
13673 	node = node->parent;
13674     if (node == NULL)
13675 	return(XML_ERR_INTERNAL_ERROR);
13676     if (node->type == XML_ELEMENT_NODE)
13677 	doc = node->doc;
13678     else
13679         doc = (xmlDocPtr) node;
13680     if (doc == NULL)
13681 	return(XML_ERR_INTERNAL_ERROR);
13682 
13683     /*
13684      * allocate a context and set-up everything not related to the
13685      * node position in the tree
13686      */
13687     if (doc->type == XML_DOCUMENT_NODE)
13688 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13689 #ifdef LIBXML_HTML_ENABLED
13690     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13691 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13692         /*
13693          * When parsing in context, it makes no sense to add implied
13694          * elements like html/body/etc...
13695          */
13696         options |= HTML_PARSE_NOIMPLIED;
13697     }
13698 #endif
13699     else
13700         return(XML_ERR_INTERNAL_ERROR);
13701 
13702     if (ctxt == NULL)
13703         return(XML_ERR_NO_MEMORY);
13704 
13705     /*
13706      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13707      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13708      * we must wait until the last moment to free the original one.
13709      */
13710     if (doc->dict != NULL) {
13711         if (ctxt->dict != NULL)
13712 	    xmlDictFree(ctxt->dict);
13713 	ctxt->dict = doc->dict;
13714     } else
13715         options |= XML_PARSE_NODICT;
13716 
13717     if (doc->encoding != NULL) {
13718         xmlCharEncodingHandlerPtr hdlr;
13719 
13720         if (ctxt->encoding != NULL)
13721 	    xmlFree((xmlChar *) ctxt->encoding);
13722         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13723 
13724         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13725         if (hdlr != NULL) {
13726             xmlSwitchToEncoding(ctxt, hdlr);
13727 	} else {
13728             return(XML_ERR_UNSUPPORTED_ENCODING);
13729         }
13730     }
13731 
13732     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13733     xmlDetectSAX2(ctxt);
13734     ctxt->myDoc = doc;
13735     /* parsing in context, i.e. as within existing content */
13736     ctxt->instate = XML_PARSER_CONTENT;
13737 
13738     fake = xmlNewComment(NULL);
13739     if (fake == NULL) {
13740         xmlFreeParserCtxt(ctxt);
13741 	return(XML_ERR_NO_MEMORY);
13742     }
13743     xmlAddChild(node, fake);
13744 
13745     if (node->type == XML_ELEMENT_NODE) {
13746 	nodePush(ctxt, node);
13747 	/*
13748 	 * initialize the SAX2 namespaces stack
13749 	 */
13750 	cur = node;
13751 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13752 	    xmlNsPtr ns = cur->nsDef;
13753 	    const xmlChar *iprefix, *ihref;
13754 
13755 	    while (ns != NULL) {
13756 		if (ctxt->dict) {
13757 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13758 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13759 		} else {
13760 		    iprefix = ns->prefix;
13761 		    ihref = ns->href;
13762 		}
13763 
13764 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13765 		    nsPush(ctxt, iprefix, ihref);
13766 		    nsnr++;
13767 		}
13768 		ns = ns->next;
13769 	    }
13770 	    cur = cur->parent;
13771 	}
13772     }
13773 
13774     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13775 	/*
13776 	 * ID/IDREF registration will be done in xmlValidateElement below
13777 	 */
13778 	ctxt->loadsubset |= XML_SKIP_IDS;
13779     }
13780 
13781 #ifdef LIBXML_HTML_ENABLED
13782     if (doc->type == XML_HTML_DOCUMENT_NODE)
13783         __htmlParseContent(ctxt);
13784     else
13785 #endif
13786 	xmlParseContent(ctxt);
13787 
13788     nsPop(ctxt, nsnr);
13789     if ((RAW == '<') && (NXT(1) == '/')) {
13790 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13791     } else if (RAW != 0) {
13792 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13793     }
13794     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13795 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13796 	ctxt->wellFormed = 0;
13797     }
13798 
13799     if (!ctxt->wellFormed) {
13800         if (ctxt->errNo == 0)
13801 	    ret = XML_ERR_INTERNAL_ERROR;
13802 	else
13803 	    ret = (xmlParserErrors)ctxt->errNo;
13804     } else {
13805         ret = XML_ERR_OK;
13806     }
13807 
13808     /*
13809      * Return the newly created nodeset after unlinking it from
13810      * the pseudo sibling.
13811      */
13812 
13813     cur = fake->next;
13814     fake->next = NULL;
13815     node->last = fake;
13816 
13817     if (cur != NULL) {
13818 	cur->prev = NULL;
13819     }
13820 
13821     *lst = cur;
13822 
13823     while (cur != NULL) {
13824 	cur->parent = NULL;
13825 	cur = cur->next;
13826     }
13827 
13828     xmlUnlinkNode(fake);
13829     xmlFreeNode(fake);
13830 
13831 
13832     if (ret != XML_ERR_OK) {
13833         xmlFreeNodeList(*lst);
13834 	*lst = NULL;
13835     }
13836 
13837     if (doc->dict != NULL)
13838         ctxt->dict = NULL;
13839     xmlFreeParserCtxt(ctxt);
13840 
13841     return(ret);
13842 #else /* !SAX2 */
13843     return(XML_ERR_INTERNAL_ERROR);
13844 #endif
13845 }
13846 
13847 #ifdef LIBXML_SAX1_ENABLED
13848 /**
13849  * xmlParseBalancedChunkMemoryRecover:
13850  * @doc:  the document the chunk pertains to
13851  * @sax:  the SAX handler bloc (possibly NULL)
13852  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13853  * @depth:  Used for loop detection, use 0
13854  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13855  * @lst:  the return value for the set of parsed nodes
13856  * @recover: return nodes even if the data is broken (use 0)
13857  *
13858  *
13859  * Parse a well-balanced chunk of an XML document
13860  * called by the parser
13861  * The allowed sequence for the Well Balanced Chunk is the one defined by
13862  * the content production in the XML grammar:
13863  *
13864  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13865  *
13866  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13867  *    the parser error code otherwise
13868  *
13869  * In case recover is set to 1, the nodelist will not be empty even if
13870  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13871  * some extent.
13872  */
13873 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13874 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13875      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13876      int recover) {
13877     xmlParserCtxtPtr ctxt;
13878     xmlDocPtr newDoc;
13879     xmlSAXHandlerPtr oldsax = NULL;
13880     xmlNodePtr content, newRoot;
13881     int size;
13882     int ret = 0;
13883 
13884     if (depth > 40) {
13885 	return(XML_ERR_ENTITY_LOOP);
13886     }
13887 
13888 
13889     if (lst != NULL)
13890         *lst = NULL;
13891     if (string == NULL)
13892         return(-1);
13893 
13894     size = xmlStrlen(string);
13895 
13896     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13897     if (ctxt == NULL) return(-1);
13898     ctxt->userData = ctxt;
13899     if (sax != NULL) {
13900 	oldsax = ctxt->sax;
13901         ctxt->sax = sax;
13902 	if (user_data != NULL)
13903 	    ctxt->userData = user_data;
13904     }
13905     newDoc = xmlNewDoc(BAD_CAST "1.0");
13906     if (newDoc == NULL) {
13907 	xmlFreeParserCtxt(ctxt);
13908 	return(-1);
13909     }
13910     newDoc->properties = XML_DOC_INTERNAL;
13911     if ((doc != NULL) && (doc->dict != NULL)) {
13912         xmlDictFree(ctxt->dict);
13913 	ctxt->dict = doc->dict;
13914 	xmlDictReference(ctxt->dict);
13915 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13916 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13917 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13918 	ctxt->dictNames = 1;
13919     } else {
13920 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13921     }
13922     if (doc != NULL) {
13923 	newDoc->intSubset = doc->intSubset;
13924 	newDoc->extSubset = doc->extSubset;
13925     }
13926     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13927     if (newRoot == NULL) {
13928 	if (sax != NULL)
13929 	    ctxt->sax = oldsax;
13930 	xmlFreeParserCtxt(ctxt);
13931 	newDoc->intSubset = NULL;
13932 	newDoc->extSubset = NULL;
13933         xmlFreeDoc(newDoc);
13934 	return(-1);
13935     }
13936     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13937     nodePush(ctxt, newRoot);
13938     if (doc == NULL) {
13939 	ctxt->myDoc = newDoc;
13940     } else {
13941 	ctxt->myDoc = newDoc;
13942 	newDoc->children->doc = doc;
13943 	/* Ensure that doc has XML spec namespace */
13944 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13945 	newDoc->oldNs = doc->oldNs;
13946     }
13947     ctxt->instate = XML_PARSER_CONTENT;
13948     ctxt->depth = depth;
13949 
13950     /*
13951      * Doing validity checking on chunk doesn't make sense
13952      */
13953     ctxt->validate = 0;
13954     ctxt->loadsubset = 0;
13955     xmlDetectSAX2(ctxt);
13956 
13957     if ( doc != NULL ){
13958         content = doc->children;
13959         doc->children = NULL;
13960         xmlParseContent(ctxt);
13961         doc->children = content;
13962     }
13963     else {
13964         xmlParseContent(ctxt);
13965     }
13966     if ((RAW == '<') && (NXT(1) == '/')) {
13967 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13968     } else if (RAW != 0) {
13969 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13970     }
13971     if (ctxt->node != newDoc->children) {
13972 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13973     }
13974 
13975     if (!ctxt->wellFormed) {
13976         if (ctxt->errNo == 0)
13977 	    ret = 1;
13978 	else
13979 	    ret = ctxt->errNo;
13980     } else {
13981       ret = 0;
13982     }
13983 
13984     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13985 	xmlNodePtr cur;
13986 
13987 	/*
13988 	 * Return the newly created nodeset after unlinking it from
13989 	 * they pseudo parent.
13990 	 */
13991 	cur = newDoc->children->children;
13992 	*lst = cur;
13993 	while (cur != NULL) {
13994 	    xmlSetTreeDoc(cur, doc);
13995 	    cur->parent = NULL;
13996 	    cur = cur->next;
13997 	}
13998 	newDoc->children->children = NULL;
13999     }
14000 
14001     if (sax != NULL)
14002 	ctxt->sax = oldsax;
14003     xmlFreeParserCtxt(ctxt);
14004     newDoc->intSubset = NULL;
14005     newDoc->extSubset = NULL;
14006     newDoc->oldNs = NULL;
14007     xmlFreeDoc(newDoc);
14008 
14009     return(ret);
14010 }
14011 
14012 /**
14013  * xmlSAXParseEntity:
14014  * @sax:  the SAX handler block
14015  * @filename:  the filename
14016  *
14017  * parse an XML external entity out of context and build a tree.
14018  * It use the given SAX function block to handle the parsing callback.
14019  * If sax is NULL, fallback to the default DOM tree building routines.
14020  *
14021  * [78] extParsedEnt ::= TextDecl? content
14022  *
14023  * This correspond to a "Well Balanced" chunk
14024  *
14025  * Returns the resulting document tree
14026  */
14027 
14028 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)14029 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14030     xmlDocPtr ret;
14031     xmlParserCtxtPtr ctxt;
14032 
14033     ctxt = xmlCreateFileParserCtxt(filename);
14034     if (ctxt == NULL) {
14035 	return(NULL);
14036     }
14037     if (sax != NULL) {
14038 	if (ctxt->sax != NULL)
14039 	    xmlFree(ctxt->sax);
14040         ctxt->sax = sax;
14041         ctxt->userData = NULL;
14042     }
14043 
14044     xmlParseExtParsedEnt(ctxt);
14045 
14046     if (ctxt->wellFormed)
14047 	ret = ctxt->myDoc;
14048     else {
14049         ret = NULL;
14050         xmlFreeDoc(ctxt->myDoc);
14051         ctxt->myDoc = NULL;
14052     }
14053     if (sax != NULL)
14054         ctxt->sax = NULL;
14055     xmlFreeParserCtxt(ctxt);
14056 
14057     return(ret);
14058 }
14059 
14060 /**
14061  * xmlParseEntity:
14062  * @filename:  the filename
14063  *
14064  * parse an XML external entity out of context and build a tree.
14065  *
14066  * [78] extParsedEnt ::= TextDecl? content
14067  *
14068  * This correspond to a "Well Balanced" chunk
14069  *
14070  * Returns the resulting document tree
14071  */
14072 
14073 xmlDocPtr
xmlParseEntity(const char * filename)14074 xmlParseEntity(const char *filename) {
14075     return(xmlSAXParseEntity(NULL, filename));
14076 }
14077 #endif /* LIBXML_SAX1_ENABLED */
14078 
14079 /**
14080  * xmlCreateEntityParserCtxtInternal:
14081  * @URL:  the entity URL
14082  * @ID:  the entity PUBLIC ID
14083  * @base:  a possible base for the target URI
14084  * @pctx:  parser context used to set options on new context
14085  *
14086  * Create a parser context for an external entity
14087  * Automatic support for ZLIB/Compress compressed document is provided
14088  * by default if found at compile-time.
14089  *
14090  * Returns the new parser context or NULL
14091  */
14092 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)14093 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14094 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
14095     xmlParserCtxtPtr ctxt;
14096     xmlParserInputPtr inputStream;
14097     char *directory = NULL;
14098     xmlChar *uri;
14099 
14100     ctxt = xmlNewParserCtxt();
14101     if (ctxt == NULL) {
14102 	return(NULL);
14103     }
14104 
14105     if (pctx != NULL) {
14106         ctxt->options = pctx->options;
14107         ctxt->_private = pctx->_private;
14108     }
14109 
14110     uri = xmlBuildURI(URL, base);
14111 
14112     if (uri == NULL) {
14113 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14114 	if (inputStream == NULL) {
14115 	    xmlFreeParserCtxt(ctxt);
14116 	    return(NULL);
14117 	}
14118 
14119 	inputPush(ctxt, inputStream);
14120 
14121 	if ((ctxt->directory == NULL) && (directory == NULL))
14122 	    directory = xmlParserGetDirectory((char *)URL);
14123 	if ((ctxt->directory == NULL) && (directory != NULL))
14124 	    ctxt->directory = directory;
14125     } else {
14126 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14127 	if (inputStream == NULL) {
14128 	    xmlFree(uri);
14129 	    xmlFreeParserCtxt(ctxt);
14130 	    return(NULL);
14131 	}
14132 
14133 	inputPush(ctxt, inputStream);
14134 
14135 	if ((ctxt->directory == NULL) && (directory == NULL))
14136 	    directory = xmlParserGetDirectory((char *)uri);
14137 	if ((ctxt->directory == NULL) && (directory != NULL))
14138 	    ctxt->directory = directory;
14139 	xmlFree(uri);
14140     }
14141     return(ctxt);
14142 }
14143 
14144 /**
14145  * xmlCreateEntityParserCtxt:
14146  * @URL:  the entity URL
14147  * @ID:  the entity PUBLIC ID
14148  * @base:  a possible base for the target URI
14149  *
14150  * Create a parser context for an external entity
14151  * Automatic support for ZLIB/Compress compressed document is provided
14152  * by default if found at compile-time.
14153  *
14154  * Returns the new parser context or NULL
14155  */
14156 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14157 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14158 	                  const xmlChar *base) {
14159     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14160 
14161 }
14162 
14163 /************************************************************************
14164  *									*
14165  *		Front ends when parsing from a file			*
14166  *									*
14167  ************************************************************************/
14168 
14169 /**
14170  * xmlCreateURLParserCtxt:
14171  * @filename:  the filename or URL
14172  * @options:  a combination of xmlParserOption
14173  *
14174  * Create a parser context for a file or URL content.
14175  * Automatic support for ZLIB/Compress compressed document is provided
14176  * by default if found at compile-time and for file accesses
14177  *
14178  * Returns the new parser context or NULL
14179  */
14180 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14181 xmlCreateURLParserCtxt(const char *filename, int options)
14182 {
14183     xmlParserCtxtPtr ctxt;
14184     xmlParserInputPtr inputStream;
14185     char *directory = NULL;
14186 
14187     ctxt = xmlNewParserCtxt();
14188     if (ctxt == NULL) {
14189 	xmlErrMemory(NULL, "cannot allocate parser context");
14190 	return(NULL);
14191     }
14192 
14193     if (options)
14194 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14195     ctxt->linenumbers = 1;
14196 
14197     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14198     if (inputStream == NULL) {
14199 	xmlFreeParserCtxt(ctxt);
14200 	return(NULL);
14201     }
14202 
14203     inputPush(ctxt, inputStream);
14204     if ((ctxt->directory == NULL) && (directory == NULL))
14205         directory = xmlParserGetDirectory(filename);
14206     if ((ctxt->directory == NULL) && (directory != NULL))
14207         ctxt->directory = directory;
14208 
14209     return(ctxt);
14210 }
14211 
14212 /**
14213  * xmlCreateFileParserCtxt:
14214  * @filename:  the filename
14215  *
14216  * Create a parser context for a file content.
14217  * Automatic support for ZLIB/Compress compressed document is provided
14218  * by default if found at compile-time.
14219  *
14220  * Returns the new parser context or NULL
14221  */
14222 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14223 xmlCreateFileParserCtxt(const char *filename)
14224 {
14225     return(xmlCreateURLParserCtxt(filename, 0));
14226 }
14227 
14228 #ifdef LIBXML_SAX1_ENABLED
14229 /**
14230  * xmlSAXParseFileWithData:
14231  * @sax:  the SAX handler block
14232  * @filename:  the filename
14233  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14234  *             documents
14235  * @data:  the userdata
14236  *
14237  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14238  * compressed document is provided by default if found at compile-time.
14239  * It use the given SAX function block to handle the parsing callback.
14240  * If sax is NULL, fallback to the default DOM tree building routines.
14241  *
14242  * User data (void *) is stored within the parser context in the
14243  * context's _private member, so it is available nearly everywhere in libxml
14244  *
14245  * Returns the resulting document tree
14246  */
14247 
14248 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14249 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14250                         int recovery, void *data) {
14251     xmlDocPtr ret;
14252     xmlParserCtxtPtr ctxt;
14253 
14254     xmlInitParser();
14255 
14256     ctxt = xmlCreateFileParserCtxt(filename);
14257     if (ctxt == NULL) {
14258 	return(NULL);
14259     }
14260     if (sax != NULL) {
14261 	if (ctxt->sax != NULL)
14262 	    xmlFree(ctxt->sax);
14263         ctxt->sax = sax;
14264     }
14265     xmlDetectSAX2(ctxt);
14266     if (data!=NULL) {
14267 	ctxt->_private = data;
14268     }
14269 
14270     if (ctxt->directory == NULL)
14271         ctxt->directory = xmlParserGetDirectory(filename);
14272 
14273     ctxt->recovery = recovery;
14274 
14275     xmlParseDocument(ctxt);
14276 
14277     if ((ctxt->wellFormed) || recovery) {
14278         ret = ctxt->myDoc;
14279 	if (ret != NULL) {
14280 	    if (ctxt->input->buf->compressed > 0)
14281 		ret->compression = 9;
14282 	    else
14283 		ret->compression = ctxt->input->buf->compressed;
14284 	}
14285     }
14286     else {
14287        ret = NULL;
14288        xmlFreeDoc(ctxt->myDoc);
14289        ctxt->myDoc = NULL;
14290     }
14291     if (sax != NULL)
14292         ctxt->sax = NULL;
14293     xmlFreeParserCtxt(ctxt);
14294 
14295     return(ret);
14296 }
14297 
14298 /**
14299  * xmlSAXParseFile:
14300  * @sax:  the SAX handler block
14301  * @filename:  the filename
14302  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14303  *             documents
14304  *
14305  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14306  * compressed document is provided by default if found at compile-time.
14307  * It use the given SAX function block to handle the parsing callback.
14308  * If sax is NULL, fallback to the default DOM tree building routines.
14309  *
14310  * Returns the resulting document tree
14311  */
14312 
14313 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14314 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14315                           int recovery) {
14316     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14317 }
14318 
14319 /**
14320  * xmlRecoverDoc:
14321  * @cur:  a pointer to an array of xmlChar
14322  *
14323  * parse an XML in-memory document and build a tree.
14324  * In the case the document is not Well Formed, a attempt to build a
14325  * tree is tried anyway
14326  *
14327  * Returns the resulting document tree or NULL in case of failure
14328  */
14329 
14330 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14331 xmlRecoverDoc(const xmlChar *cur) {
14332     return(xmlSAXParseDoc(NULL, cur, 1));
14333 }
14334 
14335 /**
14336  * xmlParseFile:
14337  * @filename:  the filename
14338  *
14339  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14340  * compressed document is provided by default if found at compile-time.
14341  *
14342  * Returns the resulting document tree if the file was wellformed,
14343  * NULL otherwise.
14344  */
14345 
14346 xmlDocPtr
xmlParseFile(const char * filename)14347 xmlParseFile(const char *filename) {
14348     return(xmlSAXParseFile(NULL, filename, 0));
14349 }
14350 
14351 /**
14352  * xmlRecoverFile:
14353  * @filename:  the filename
14354  *
14355  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14356  * compressed document is provided by default if found at compile-time.
14357  * In the case the document is not Well Formed, it attempts to build
14358  * a tree anyway
14359  *
14360  * Returns the resulting document tree or NULL in case of failure
14361  */
14362 
14363 xmlDocPtr
xmlRecoverFile(const char * filename)14364 xmlRecoverFile(const char *filename) {
14365     return(xmlSAXParseFile(NULL, filename, 1));
14366 }
14367 
14368 
14369 /**
14370  * xmlSetupParserForBuffer:
14371  * @ctxt:  an XML parser context
14372  * @buffer:  a xmlChar * buffer
14373  * @filename:  a file name
14374  *
14375  * Setup the parser context to parse a new buffer; Clears any prior
14376  * contents from the parser context. The buffer parameter must not be
14377  * NULL, but the filename parameter can be
14378  */
14379 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14380 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14381                              const char* filename)
14382 {
14383     xmlParserInputPtr input;
14384 
14385     if ((ctxt == NULL) || (buffer == NULL))
14386         return;
14387 
14388     input = xmlNewInputStream(ctxt);
14389     if (input == NULL) {
14390         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14391         xmlClearParserCtxt(ctxt);
14392         return;
14393     }
14394 
14395     xmlClearParserCtxt(ctxt);
14396     if (filename != NULL)
14397         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14398     input->base = buffer;
14399     input->cur = buffer;
14400     input->end = &buffer[xmlStrlen(buffer)];
14401     inputPush(ctxt, input);
14402 }
14403 
14404 /**
14405  * xmlSAXUserParseFile:
14406  * @sax:  a SAX handler
14407  * @user_data:  The user data returned on SAX callbacks
14408  * @filename:  a file name
14409  *
14410  * parse an XML file and call the given SAX handler routines.
14411  * Automatic support for ZLIB/Compress compressed document is provided
14412  *
14413  * Returns 0 in case of success or a error number otherwise
14414  */
14415 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14416 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14417                     const char *filename) {
14418     int ret = 0;
14419     xmlParserCtxtPtr ctxt;
14420 
14421     ctxt = xmlCreateFileParserCtxt(filename);
14422     if (ctxt == NULL) return -1;
14423     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14424 	xmlFree(ctxt->sax);
14425     ctxt->sax = sax;
14426     xmlDetectSAX2(ctxt);
14427 
14428     if (user_data != NULL)
14429 	ctxt->userData = user_data;
14430 
14431     xmlParseDocument(ctxt);
14432 
14433     if (ctxt->wellFormed)
14434 	ret = 0;
14435     else {
14436         if (ctxt->errNo != 0)
14437 	    ret = ctxt->errNo;
14438 	else
14439 	    ret = -1;
14440     }
14441     if (sax != NULL)
14442 	ctxt->sax = NULL;
14443     if (ctxt->myDoc != NULL) {
14444         xmlFreeDoc(ctxt->myDoc);
14445 	ctxt->myDoc = NULL;
14446     }
14447     xmlFreeParserCtxt(ctxt);
14448 
14449     return ret;
14450 }
14451 #endif /* LIBXML_SAX1_ENABLED */
14452 
14453 /************************************************************************
14454  *									*
14455  *		Front ends when parsing from memory			*
14456  *									*
14457  ************************************************************************/
14458 
14459 /**
14460  * xmlCreateMemoryParserCtxt:
14461  * @buffer:  a pointer to a char array
14462  * @size:  the size of the array
14463  *
14464  * Create a parser context for an XML in-memory document.
14465  *
14466  * Returns the new parser context or NULL
14467  */
14468 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14469 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14470     xmlParserCtxtPtr ctxt;
14471     xmlParserInputPtr input;
14472     xmlParserInputBufferPtr buf;
14473 
14474     if (buffer == NULL)
14475 	return(NULL);
14476     if (size <= 0)
14477 	return(NULL);
14478 
14479     ctxt = xmlNewParserCtxt();
14480     if (ctxt == NULL)
14481 	return(NULL);
14482 
14483     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14484     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14485     if (buf == NULL) {
14486 	xmlFreeParserCtxt(ctxt);
14487 	return(NULL);
14488     }
14489 
14490     input = xmlNewInputStream(ctxt);
14491     if (input == NULL) {
14492 	xmlFreeParserInputBuffer(buf);
14493 	xmlFreeParserCtxt(ctxt);
14494 	return(NULL);
14495     }
14496 
14497     input->filename = NULL;
14498     input->buf = buf;
14499     xmlBufResetInput(input->buf->buffer, input);
14500 
14501     inputPush(ctxt, input);
14502     return(ctxt);
14503 }
14504 
14505 #ifdef LIBXML_SAX1_ENABLED
14506 /**
14507  * xmlSAXParseMemoryWithData:
14508  * @sax:  the SAX handler block
14509  * @buffer:  an pointer to a char array
14510  * @size:  the size of the array
14511  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14512  *             documents
14513  * @data:  the userdata
14514  *
14515  * parse an XML in-memory block and use the given SAX function block
14516  * to handle the parsing callback. If sax is NULL, fallback to the default
14517  * DOM tree building routines.
14518  *
14519  * User data (void *) is stored within the parser context in the
14520  * context's _private member, so it is available nearly everywhere in libxml
14521  *
14522  * Returns the resulting document tree
14523  */
14524 
14525 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14526 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14527 	          int size, int recovery, void *data) {
14528     xmlDocPtr ret;
14529     xmlParserCtxtPtr ctxt;
14530 
14531     xmlInitParser();
14532 
14533     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14534     if (ctxt == NULL) return(NULL);
14535     if (sax != NULL) {
14536 	if (ctxt->sax != NULL)
14537 	    xmlFree(ctxt->sax);
14538         ctxt->sax = sax;
14539     }
14540     xmlDetectSAX2(ctxt);
14541     if (data!=NULL) {
14542 	ctxt->_private=data;
14543     }
14544 
14545     ctxt->recovery = recovery;
14546 
14547     xmlParseDocument(ctxt);
14548 
14549     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14550     else {
14551        ret = NULL;
14552        xmlFreeDoc(ctxt->myDoc);
14553        ctxt->myDoc = NULL;
14554     }
14555     if (sax != NULL)
14556 	ctxt->sax = NULL;
14557     xmlFreeParserCtxt(ctxt);
14558 
14559     return(ret);
14560 }
14561 
14562 /**
14563  * xmlSAXParseMemory:
14564  * @sax:  the SAX handler block
14565  * @buffer:  an pointer to a char array
14566  * @size:  the size of the array
14567  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14568  *             documents
14569  *
14570  * parse an XML in-memory block and use the given SAX function block
14571  * to handle the parsing callback. If sax is NULL, fallback to the default
14572  * DOM tree building routines.
14573  *
14574  * Returns the resulting document tree
14575  */
14576 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14577 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14578 	          int size, int recovery) {
14579     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14580 }
14581 
14582 /**
14583  * xmlParseMemory:
14584  * @buffer:  an pointer to a char array
14585  * @size:  the size of the array
14586  *
14587  * parse an XML in-memory block and build a tree.
14588  *
14589  * Returns the resulting document tree
14590  */
14591 
xmlParseMemory(const char * buffer,int size)14592 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14593    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14594 }
14595 
14596 /**
14597  * xmlRecoverMemory:
14598  * @buffer:  an pointer to a char array
14599  * @size:  the size of the array
14600  *
14601  * parse an XML in-memory block and build a tree.
14602  * In the case the document is not Well Formed, an attempt to
14603  * build a tree is tried anyway
14604  *
14605  * Returns the resulting document tree or NULL in case of error
14606  */
14607 
xmlRecoverMemory(const char * buffer,int size)14608 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14609    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14610 }
14611 
14612 /**
14613  * xmlSAXUserParseMemory:
14614  * @sax:  a SAX handler
14615  * @user_data:  The user data returned on SAX callbacks
14616  * @buffer:  an in-memory XML document input
14617  * @size:  the length of the XML document in bytes
14618  *
14619  * A better SAX parsing routine.
14620  * parse an XML in-memory buffer and call the given SAX handler routines.
14621  *
14622  * Returns 0 in case of success or a error number otherwise
14623  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14624 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14625 			  const char *buffer, int size) {
14626     int ret = 0;
14627     xmlParserCtxtPtr ctxt;
14628 
14629     xmlInitParser();
14630 
14631     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14632     if (ctxt == NULL) return -1;
14633     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14634         xmlFree(ctxt->sax);
14635     ctxt->sax = sax;
14636     xmlDetectSAX2(ctxt);
14637 
14638     if (user_data != NULL)
14639 	ctxt->userData = user_data;
14640 
14641     xmlParseDocument(ctxt);
14642 
14643     if (ctxt->wellFormed)
14644 	ret = 0;
14645     else {
14646         if (ctxt->errNo != 0)
14647 	    ret = ctxt->errNo;
14648 	else
14649 	    ret = -1;
14650     }
14651     if (sax != NULL)
14652         ctxt->sax = NULL;
14653     if (ctxt->myDoc != NULL) {
14654         xmlFreeDoc(ctxt->myDoc);
14655 	ctxt->myDoc = NULL;
14656     }
14657     xmlFreeParserCtxt(ctxt);
14658 
14659     return ret;
14660 }
14661 #endif /* LIBXML_SAX1_ENABLED */
14662 
14663 /**
14664  * xmlCreateDocParserCtxt:
14665  * @cur:  a pointer to an array of xmlChar
14666  *
14667  * Creates a parser context for an XML in-memory document.
14668  *
14669  * Returns the new parser context or NULL
14670  */
14671 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14672 xmlCreateDocParserCtxt(const xmlChar *cur) {
14673     int len;
14674 
14675     if (cur == NULL)
14676 	return(NULL);
14677     len = xmlStrlen(cur);
14678     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14679 }
14680 
14681 #ifdef LIBXML_SAX1_ENABLED
14682 /**
14683  * xmlSAXParseDoc:
14684  * @sax:  the SAX handler block
14685  * @cur:  a pointer to an array of xmlChar
14686  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14687  *             documents
14688  *
14689  * parse an XML in-memory document and build a tree.
14690  * It use the given SAX function block to handle the parsing callback.
14691  * If sax is NULL, fallback to the default DOM tree building routines.
14692  *
14693  * Returns the resulting document tree
14694  */
14695 
14696 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14697 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14698     xmlDocPtr ret;
14699     xmlParserCtxtPtr ctxt;
14700     xmlSAXHandlerPtr oldsax = NULL;
14701 
14702     if (cur == NULL) return(NULL);
14703 
14704 
14705     ctxt = xmlCreateDocParserCtxt(cur);
14706     if (ctxt == NULL) return(NULL);
14707     if (sax != NULL) {
14708         oldsax = ctxt->sax;
14709         ctxt->sax = sax;
14710         ctxt->userData = NULL;
14711     }
14712     xmlDetectSAX2(ctxt);
14713 
14714     xmlParseDocument(ctxt);
14715     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14716     else {
14717        ret = NULL;
14718        xmlFreeDoc(ctxt->myDoc);
14719        ctxt->myDoc = NULL;
14720     }
14721     if (sax != NULL)
14722 	ctxt->sax = oldsax;
14723     xmlFreeParserCtxt(ctxt);
14724 
14725     return(ret);
14726 }
14727 
14728 /**
14729  * xmlParseDoc:
14730  * @cur:  a pointer to an array of xmlChar
14731  *
14732  * parse an XML in-memory document and build a tree.
14733  *
14734  * Returns the resulting document tree
14735  */
14736 
14737 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14738 xmlParseDoc(const xmlChar *cur) {
14739     return(xmlSAXParseDoc(NULL, cur, 0));
14740 }
14741 #endif /* LIBXML_SAX1_ENABLED */
14742 
14743 #ifdef LIBXML_LEGACY_ENABLED
14744 /************************************************************************
14745  *									*
14746  *	Specific function to keep track of entities references		*
14747  *	and used by the XSLT debugger					*
14748  *									*
14749  ************************************************************************/
14750 
14751 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14752 
14753 /**
14754  * xmlAddEntityReference:
14755  * @ent : A valid entity
14756  * @firstNode : A valid first node for children of entity
14757  * @lastNode : A valid last node of children entity
14758  *
14759  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14760  */
14761 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14762 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14763                       xmlNodePtr lastNode)
14764 {
14765     if (xmlEntityRefFunc != NULL) {
14766         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14767     }
14768 }
14769 
14770 
14771 /**
14772  * xmlSetEntityReferenceFunc:
14773  * @func: A valid function
14774  *
14775  * Set the function to call call back when a xml reference has been made
14776  */
14777 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14778 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14779 {
14780     xmlEntityRefFunc = func;
14781 }
14782 #endif /* LIBXML_LEGACY_ENABLED */
14783 
14784 /************************************************************************
14785  *									*
14786  *				Miscellaneous				*
14787  *									*
14788  ************************************************************************/
14789 
14790 #ifdef LIBXML_XPATH_ENABLED
14791 #include <libxml/xpath.h>
14792 #endif
14793 
14794 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14795 static int xmlParserInitialized = 0;
14796 
14797 /**
14798  * xmlInitParser:
14799  *
14800  * Initialization function for the XML parser.
14801  * This is not reentrant. Call once before processing in case of
14802  * use in multithreaded programs.
14803  */
14804 
14805 void
xmlInitParser(void)14806 xmlInitParser(void) {
14807     if (xmlParserInitialized != 0)
14808 	return;
14809 
14810 #ifdef LIBXML_THREAD_ENABLED
14811     __xmlGlobalInitMutexLock();
14812     if (xmlParserInitialized == 0) {
14813 #endif
14814 	xmlInitThreads();
14815 	xmlInitGlobals();
14816 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14817 	    (xmlGenericError == NULL))
14818 	    initGenericErrorDefaultFunc(NULL);
14819 	xmlInitMemory();
14820         xmlInitializeDict();
14821 	xmlInitCharEncodingHandlers();
14822 	xmlDefaultSAXHandlerInit();
14823 	xmlRegisterDefaultInputCallbacks();
14824 #ifdef LIBXML_OUTPUT_ENABLED
14825 	xmlRegisterDefaultOutputCallbacks();
14826 #endif /* LIBXML_OUTPUT_ENABLED */
14827 #ifdef LIBXML_HTML_ENABLED
14828 	htmlInitAutoClose();
14829 	htmlDefaultSAXHandlerInit();
14830 #endif
14831 #ifdef LIBXML_XPATH_ENABLED
14832 	xmlXPathInit();
14833 #endif
14834 	xmlParserInitialized = 1;
14835 #ifdef LIBXML_THREAD_ENABLED
14836     }
14837     __xmlGlobalInitMutexUnlock();
14838 #endif
14839 }
14840 
14841 /**
14842  * xmlCleanupParser:
14843  *
14844  * This function name is somewhat misleading. It does not clean up
14845  * parser state, it cleans up memory allocated by the library itself.
14846  * It is a cleanup function for the XML library. It tries to reclaim all
14847  * related global memory allocated for the library processing.
14848  * It doesn't deallocate any document related memory. One should
14849  * call xmlCleanupParser() only when the process has finished using
14850  * the library and all XML/HTML documents built with it.
14851  * See also xmlInitParser() which has the opposite function of preparing
14852  * the library for operations.
14853  *
14854  * WARNING: if your application is multithreaded or has plugin support
14855  *          calling this may crash the application if another thread or
14856  *          a plugin is still using libxml2. It's sometimes very hard to
14857  *          guess if libxml2 is in use in the application, some libraries
14858  *          or plugins may use it without notice. In case of doubt abstain
14859  *          from calling this function or do it just before calling exit()
14860  *          to avoid leak reports from valgrind !
14861  */
14862 
14863 void
xmlCleanupParser(void)14864 xmlCleanupParser(void) {
14865     if (!xmlParserInitialized)
14866 	return;
14867 
14868     xmlCleanupCharEncodingHandlers();
14869 #ifdef LIBXML_CATALOG_ENABLED
14870     xmlCatalogCleanup();
14871 #endif
14872     xmlDictCleanup();
14873     xmlCleanupInputCallbacks();
14874 #ifdef LIBXML_OUTPUT_ENABLED
14875     xmlCleanupOutputCallbacks();
14876 #endif
14877 #ifdef LIBXML_SCHEMAS_ENABLED
14878     xmlSchemaCleanupTypes();
14879     xmlRelaxNGCleanupTypes();
14880 #endif
14881     xmlResetLastError();
14882     xmlCleanupGlobals();
14883     xmlCleanupThreads(); /* must be last if called not from the main thread */
14884     xmlCleanupMemory();
14885     xmlParserInitialized = 0;
14886 }
14887 
14888 /************************************************************************
14889  *									*
14890  *	New set (2.6.0) of simpler and more flexible APIs		*
14891  *									*
14892  ************************************************************************/
14893 
14894 /**
14895  * DICT_FREE:
14896  * @str:  a string
14897  *
14898  * Free a string if it is not owned by the "dict" dictionnary in the
14899  * current scope
14900  */
14901 #define DICT_FREE(str)						\
14902 	if ((str) && ((!dict) ||				\
14903 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14904 	    xmlFree((char *)(str));
14905 
14906 /**
14907  * xmlCtxtReset:
14908  * @ctxt: an XML parser context
14909  *
14910  * Reset a parser context
14911  */
14912 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14913 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14914 {
14915     xmlParserInputPtr input;
14916     xmlDictPtr dict;
14917 
14918     if (ctxt == NULL)
14919         return;
14920 
14921     dict = ctxt->dict;
14922 
14923     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14924         xmlFreeInputStream(input);
14925     }
14926     ctxt->inputNr = 0;
14927     ctxt->input = NULL;
14928 
14929     ctxt->spaceNr = 0;
14930     if (ctxt->spaceTab != NULL) {
14931 	ctxt->spaceTab[0] = -1;
14932 	ctxt->space = &ctxt->spaceTab[0];
14933     } else {
14934         ctxt->space = NULL;
14935     }
14936 
14937 
14938     ctxt->nodeNr = 0;
14939     ctxt->node = NULL;
14940 
14941     ctxt->nameNr = 0;
14942     ctxt->name = NULL;
14943 
14944     DICT_FREE(ctxt->version);
14945     ctxt->version = NULL;
14946     DICT_FREE(ctxt->encoding);
14947     ctxt->encoding = NULL;
14948     DICT_FREE(ctxt->directory);
14949     ctxt->directory = NULL;
14950     DICT_FREE(ctxt->extSubURI);
14951     ctxt->extSubURI = NULL;
14952     DICT_FREE(ctxt->extSubSystem);
14953     ctxt->extSubSystem = NULL;
14954     if (ctxt->myDoc != NULL)
14955         xmlFreeDoc(ctxt->myDoc);
14956     ctxt->myDoc = NULL;
14957 
14958     ctxt->standalone = -1;
14959     ctxt->hasExternalSubset = 0;
14960     ctxt->hasPErefs = 0;
14961     ctxt->html = 0;
14962     ctxt->external = 0;
14963     ctxt->instate = XML_PARSER_START;
14964     ctxt->token = 0;
14965 
14966     ctxt->wellFormed = 1;
14967     ctxt->nsWellFormed = 1;
14968     ctxt->disableSAX = 0;
14969     ctxt->valid = 1;
14970 #if 0
14971     ctxt->vctxt.userData = ctxt;
14972     ctxt->vctxt.error = xmlParserValidityError;
14973     ctxt->vctxt.warning = xmlParserValidityWarning;
14974 #endif
14975     ctxt->record_info = 0;
14976     ctxt->nbChars = 0;
14977     ctxt->checkIndex = 0;
14978     ctxt->inSubset = 0;
14979     ctxt->errNo = XML_ERR_OK;
14980     ctxt->depth = 0;
14981     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14982     ctxt->catalogs = NULL;
14983     ctxt->nbentities = 0;
14984     ctxt->sizeentities = 0;
14985     ctxt->sizeentcopy = 0;
14986     xmlInitNodeInfoSeq(&ctxt->node_seq);
14987 
14988     if (ctxt->attsDefault != NULL) {
14989         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14990         ctxt->attsDefault = NULL;
14991     }
14992     if (ctxt->attsSpecial != NULL) {
14993         xmlHashFree(ctxt->attsSpecial, NULL);
14994         ctxt->attsSpecial = NULL;
14995     }
14996 
14997 #ifdef LIBXML_CATALOG_ENABLED
14998     if (ctxt->catalogs != NULL)
14999 	xmlCatalogFreeLocal(ctxt->catalogs);
15000 #endif
15001     if (ctxt->lastError.code != XML_ERR_OK)
15002         xmlResetError(&ctxt->lastError);
15003 }
15004 
15005 /**
15006  * xmlCtxtResetPush:
15007  * @ctxt: an XML parser context
15008  * @chunk:  a pointer to an array of chars
15009  * @size:  number of chars in the array
15010  * @filename:  an optional file name or URI
15011  * @encoding:  the document encoding, or NULL
15012  *
15013  * Reset a push parser context
15014  *
15015  * Returns 0 in case of success and 1 in case of error
15016  */
15017 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)15018 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15019                  int size, const char *filename, const char *encoding)
15020 {
15021     xmlParserInputPtr inputStream;
15022     xmlParserInputBufferPtr buf;
15023     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15024 
15025     if (ctxt == NULL)
15026         return(1);
15027 
15028     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15029         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15030 
15031     buf = xmlAllocParserInputBuffer(enc);
15032     if (buf == NULL)
15033         return(1);
15034 
15035     if (ctxt == NULL) {
15036         xmlFreeParserInputBuffer(buf);
15037         return(1);
15038     }
15039 
15040     xmlCtxtReset(ctxt);
15041 
15042     if (ctxt->pushTab == NULL) {
15043         ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15044 	                                    sizeof(xmlChar *));
15045         if (ctxt->pushTab == NULL) {
15046 	    xmlErrMemory(ctxt, NULL);
15047             xmlFreeParserInputBuffer(buf);
15048             return(1);
15049         }
15050     }
15051 
15052     if (filename == NULL) {
15053         ctxt->directory = NULL;
15054     } else {
15055         ctxt->directory = xmlParserGetDirectory(filename);
15056     }
15057 
15058     inputStream = xmlNewInputStream(ctxt);
15059     if (inputStream == NULL) {
15060         xmlFreeParserInputBuffer(buf);
15061         return(1);
15062     }
15063 
15064     if (filename == NULL)
15065         inputStream->filename = NULL;
15066     else
15067         inputStream->filename = (char *)
15068             xmlCanonicPath((const xmlChar *) filename);
15069     inputStream->buf = buf;
15070     xmlBufResetInput(buf->buffer, inputStream);
15071 
15072     inputPush(ctxt, inputStream);
15073 
15074     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15075         (ctxt->input->buf != NULL)) {
15076 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15077         size_t cur = ctxt->input->cur - ctxt->input->base;
15078 
15079         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15080 
15081         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15082 #ifdef DEBUG_PUSH
15083         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15084 #endif
15085     }
15086 
15087     if (encoding != NULL) {
15088         xmlCharEncodingHandlerPtr hdlr;
15089 
15090         if (ctxt->encoding != NULL)
15091 	    xmlFree((xmlChar *) ctxt->encoding);
15092         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15093 
15094         hdlr = xmlFindCharEncodingHandler(encoding);
15095         if (hdlr != NULL) {
15096             xmlSwitchToEncoding(ctxt, hdlr);
15097 	} else {
15098 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15099 			      "Unsupported encoding %s\n", BAD_CAST encoding);
15100         }
15101     } else if (enc != XML_CHAR_ENCODING_NONE) {
15102         xmlSwitchEncoding(ctxt, enc);
15103     }
15104 
15105     return(0);
15106 }
15107 
15108 
15109 /**
15110  * xmlCtxtUseOptionsInternal:
15111  * @ctxt: an XML parser context
15112  * @options:  a combination of xmlParserOption
15113  * @encoding:  the user provided encoding to use
15114  *
15115  * Applies the options to the parser context
15116  *
15117  * Returns 0 in case of success, the set of unknown or unimplemented options
15118  *         in case of error.
15119  */
15120 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15121 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15122 {
15123     if (ctxt == NULL)
15124         return(-1);
15125     if (encoding != NULL) {
15126         if (ctxt->encoding != NULL)
15127 	    xmlFree((xmlChar *) ctxt->encoding);
15128         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15129     }
15130     if (options & XML_PARSE_RECOVER) {
15131         ctxt->recovery = 1;
15132         options -= XML_PARSE_RECOVER;
15133 	ctxt->options |= XML_PARSE_RECOVER;
15134     } else
15135         ctxt->recovery = 0;
15136     if (options & XML_PARSE_DTDLOAD) {
15137         ctxt->loadsubset = XML_DETECT_IDS;
15138         options -= XML_PARSE_DTDLOAD;
15139 	ctxt->options |= XML_PARSE_DTDLOAD;
15140     } else
15141         ctxt->loadsubset = 0;
15142     if (options & XML_PARSE_DTDATTR) {
15143         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15144         options -= XML_PARSE_DTDATTR;
15145 	ctxt->options |= XML_PARSE_DTDATTR;
15146     }
15147     if (options & XML_PARSE_NOENT) {
15148         ctxt->replaceEntities = 1;
15149         /* ctxt->loadsubset |= XML_DETECT_IDS; */
15150         options -= XML_PARSE_NOENT;
15151 	ctxt->options |= XML_PARSE_NOENT;
15152     } else
15153         ctxt->replaceEntities = 0;
15154     if (options & XML_PARSE_PEDANTIC) {
15155         ctxt->pedantic = 1;
15156         options -= XML_PARSE_PEDANTIC;
15157 	ctxt->options |= XML_PARSE_PEDANTIC;
15158     } else
15159         ctxt->pedantic = 0;
15160     if (options & XML_PARSE_NOBLANKS) {
15161         ctxt->keepBlanks = 0;
15162         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15163         options -= XML_PARSE_NOBLANKS;
15164 	ctxt->options |= XML_PARSE_NOBLANKS;
15165     } else
15166         ctxt->keepBlanks = 1;
15167     if (options & XML_PARSE_DTDVALID) {
15168         ctxt->validate = 1;
15169         if (options & XML_PARSE_NOWARNING)
15170             ctxt->vctxt.warning = NULL;
15171         if (options & XML_PARSE_NOERROR)
15172             ctxt->vctxt.error = NULL;
15173         options -= XML_PARSE_DTDVALID;
15174 	ctxt->options |= XML_PARSE_DTDVALID;
15175     } else
15176         ctxt->validate = 0;
15177     if (options & XML_PARSE_NOWARNING) {
15178         ctxt->sax->warning = NULL;
15179         options -= XML_PARSE_NOWARNING;
15180     }
15181     if (options & XML_PARSE_NOERROR) {
15182         ctxt->sax->error = NULL;
15183         ctxt->sax->fatalError = NULL;
15184         options -= XML_PARSE_NOERROR;
15185     }
15186 #ifdef LIBXML_SAX1_ENABLED
15187     if (options & XML_PARSE_SAX1) {
15188         ctxt->sax->startElement = xmlSAX2StartElement;
15189         ctxt->sax->endElement = xmlSAX2EndElement;
15190         ctxt->sax->startElementNs = NULL;
15191         ctxt->sax->endElementNs = NULL;
15192         ctxt->sax->initialized = 1;
15193         options -= XML_PARSE_SAX1;
15194 	ctxt->options |= XML_PARSE_SAX1;
15195     }
15196 #endif /* LIBXML_SAX1_ENABLED */
15197     if (options & XML_PARSE_NODICT) {
15198         ctxt->dictNames = 0;
15199         options -= XML_PARSE_NODICT;
15200 	ctxt->options |= XML_PARSE_NODICT;
15201     } else {
15202         ctxt->dictNames = 1;
15203     }
15204     if (options & XML_PARSE_NOCDATA) {
15205         ctxt->sax->cdataBlock = NULL;
15206         options -= XML_PARSE_NOCDATA;
15207 	ctxt->options |= XML_PARSE_NOCDATA;
15208     }
15209     if (options & XML_PARSE_NSCLEAN) {
15210 	ctxt->options |= XML_PARSE_NSCLEAN;
15211         options -= XML_PARSE_NSCLEAN;
15212     }
15213     if (options & XML_PARSE_NONET) {
15214 	ctxt->options |= XML_PARSE_NONET;
15215         options -= XML_PARSE_NONET;
15216     }
15217     if (options & XML_PARSE_COMPACT) {
15218 	ctxt->options |= XML_PARSE_COMPACT;
15219         options -= XML_PARSE_COMPACT;
15220     }
15221     if (options & XML_PARSE_OLD10) {
15222 	ctxt->options |= XML_PARSE_OLD10;
15223         options -= XML_PARSE_OLD10;
15224     }
15225     if (options & XML_PARSE_NOBASEFIX) {
15226 	ctxt->options |= XML_PARSE_NOBASEFIX;
15227         options -= XML_PARSE_NOBASEFIX;
15228     }
15229     if (options & XML_PARSE_HUGE) {
15230 	ctxt->options |= XML_PARSE_HUGE;
15231         options -= XML_PARSE_HUGE;
15232         if (ctxt->dict != NULL)
15233             xmlDictSetLimit(ctxt->dict, 0);
15234     }
15235     if (options & XML_PARSE_OLDSAX) {
15236 	ctxt->options |= XML_PARSE_OLDSAX;
15237         options -= XML_PARSE_OLDSAX;
15238     }
15239     if (options & XML_PARSE_IGNORE_ENC) {
15240 	ctxt->options |= XML_PARSE_IGNORE_ENC;
15241         options -= XML_PARSE_IGNORE_ENC;
15242     }
15243     if (options & XML_PARSE_BIG_LINES) {
15244 	ctxt->options |= XML_PARSE_BIG_LINES;
15245         options -= XML_PARSE_BIG_LINES;
15246     }
15247     ctxt->linenumbers = 1;
15248     return (options);
15249 }
15250 
15251 /**
15252  * xmlCtxtUseOptions:
15253  * @ctxt: an XML parser context
15254  * @options:  a combination of xmlParserOption
15255  *
15256  * Applies the options to the parser context
15257  *
15258  * Returns 0 in case of success, the set of unknown or unimplemented options
15259  *         in case of error.
15260  */
15261 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15262 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15263 {
15264    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15265 }
15266 
15267 /**
15268  * xmlDoRead:
15269  * @ctxt:  an XML parser context
15270  * @URL:  the base URL to use for the document
15271  * @encoding:  the document encoding, or NULL
15272  * @options:  a combination of xmlParserOption
15273  * @reuse:  keep the context for reuse
15274  *
15275  * Common front-end for the xmlRead functions
15276  *
15277  * Returns the resulting document tree or NULL
15278  */
15279 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15280 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15281           int options, int reuse)
15282 {
15283     xmlDocPtr ret;
15284 
15285     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15286     if (encoding != NULL) {
15287         xmlCharEncodingHandlerPtr hdlr;
15288 
15289 	hdlr = xmlFindCharEncodingHandler(encoding);
15290 	if (hdlr != NULL)
15291 	    xmlSwitchToEncoding(ctxt, hdlr);
15292     }
15293     if ((URL != NULL) && (ctxt->input != NULL) &&
15294         (ctxt->input->filename == NULL))
15295         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15296     xmlParseDocument(ctxt);
15297     if ((ctxt->wellFormed) || ctxt->recovery)
15298         ret = ctxt->myDoc;
15299     else {
15300         ret = NULL;
15301 	if (ctxt->myDoc != NULL) {
15302 	    xmlFreeDoc(ctxt->myDoc);
15303 	}
15304     }
15305     ctxt->myDoc = NULL;
15306     if (!reuse) {
15307 	xmlFreeParserCtxt(ctxt);
15308     }
15309 
15310     return (ret);
15311 }
15312 
15313 /**
15314  * xmlReadDoc:
15315  * @cur:  a pointer to a zero terminated string
15316  * @URL:  the base URL to use for the document
15317  * @encoding:  the document encoding, or NULL
15318  * @options:  a combination of xmlParserOption
15319  *
15320  * parse an XML in-memory document and build a tree.
15321  *
15322  * Returns the resulting document tree
15323  */
15324 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15325 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15326 {
15327     xmlParserCtxtPtr ctxt;
15328 
15329     if (cur == NULL)
15330         return (NULL);
15331     xmlInitParser();
15332 
15333     ctxt = xmlCreateDocParserCtxt(cur);
15334     if (ctxt == NULL)
15335         return (NULL);
15336     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15337 }
15338 
15339 /**
15340  * xmlReadFile:
15341  * @filename:  a file or URL
15342  * @encoding:  the document encoding, or NULL
15343  * @options:  a combination of xmlParserOption
15344  *
15345  * parse an XML file from the filesystem or the network.
15346  *
15347  * Returns the resulting document tree
15348  */
15349 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15350 xmlReadFile(const char *filename, const char *encoding, int options)
15351 {
15352     xmlParserCtxtPtr ctxt;
15353 
15354     xmlInitParser();
15355     ctxt = xmlCreateURLParserCtxt(filename, options);
15356     if (ctxt == NULL)
15357         return (NULL);
15358     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15359 }
15360 
15361 /**
15362  * xmlReadMemory:
15363  * @buffer:  a pointer to a char array
15364  * @size:  the size of the array
15365  * @URL:  the base URL to use for the document
15366  * @encoding:  the document encoding, or NULL
15367  * @options:  a combination of xmlParserOption
15368  *
15369  * parse an XML in-memory document and build a tree.
15370  *
15371  * Returns the resulting document tree
15372  */
15373 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15374 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15375 {
15376     xmlParserCtxtPtr ctxt;
15377 
15378     xmlInitParser();
15379     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15380     if (ctxt == NULL)
15381         return (NULL);
15382     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15383 }
15384 
15385 /**
15386  * xmlReadFd:
15387  * @fd:  an open file descriptor
15388  * @URL:  the base URL to use for the document
15389  * @encoding:  the document encoding, or NULL
15390  * @options:  a combination of xmlParserOption
15391  *
15392  * parse an XML from a file descriptor and build a tree.
15393  * NOTE that the file descriptor will not be closed when the
15394  *      reader is closed or reset.
15395  *
15396  * Returns the resulting document tree
15397  */
15398 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15399 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15400 {
15401     xmlParserCtxtPtr ctxt;
15402     xmlParserInputBufferPtr input;
15403     xmlParserInputPtr stream;
15404 
15405     if (fd < 0)
15406         return (NULL);
15407     xmlInitParser();
15408 
15409     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15410     if (input == NULL)
15411         return (NULL);
15412     input->closecallback = NULL;
15413     ctxt = xmlNewParserCtxt();
15414     if (ctxt == NULL) {
15415         xmlFreeParserInputBuffer(input);
15416         return (NULL);
15417     }
15418     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15419     if (stream == NULL) {
15420         xmlFreeParserInputBuffer(input);
15421 	xmlFreeParserCtxt(ctxt);
15422         return (NULL);
15423     }
15424     inputPush(ctxt, stream);
15425     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15426 }
15427 
15428 /**
15429  * xmlReadIO:
15430  * @ioread:  an I/O read function
15431  * @ioclose:  an I/O close function
15432  * @ioctx:  an I/O handler
15433  * @URL:  the base URL to use for the document
15434  * @encoding:  the document encoding, or NULL
15435  * @options:  a combination of xmlParserOption
15436  *
15437  * parse an XML document from I/O functions and source and build a tree.
15438  *
15439  * Returns the resulting document tree
15440  */
15441 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15442 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15443           void *ioctx, const char *URL, const char *encoding, int options)
15444 {
15445     xmlParserCtxtPtr ctxt;
15446     xmlParserInputBufferPtr input;
15447     xmlParserInputPtr stream;
15448 
15449     if (ioread == NULL)
15450         return (NULL);
15451     xmlInitParser();
15452 
15453     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15454                                          XML_CHAR_ENCODING_NONE);
15455     if (input == NULL) {
15456         if (ioclose != NULL)
15457             ioclose(ioctx);
15458         return (NULL);
15459     }
15460     ctxt = xmlNewParserCtxt();
15461     if (ctxt == NULL) {
15462         xmlFreeParserInputBuffer(input);
15463         return (NULL);
15464     }
15465     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15466     if (stream == NULL) {
15467         xmlFreeParserInputBuffer(input);
15468 	xmlFreeParserCtxt(ctxt);
15469         return (NULL);
15470     }
15471     inputPush(ctxt, stream);
15472     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15473 }
15474 
15475 /**
15476  * xmlCtxtReadDoc:
15477  * @ctxt:  an XML parser context
15478  * @cur:  a pointer to a zero terminated string
15479  * @URL:  the base URL to use for the document
15480  * @encoding:  the document encoding, or NULL
15481  * @options:  a combination of xmlParserOption
15482  *
15483  * parse an XML in-memory document and build a tree.
15484  * This reuses the existing @ctxt parser context
15485  *
15486  * Returns the resulting document tree
15487  */
15488 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15489 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15490                const char *URL, const char *encoding, int options)
15491 {
15492     xmlParserInputPtr stream;
15493 
15494     if (cur == NULL)
15495         return (NULL);
15496     if (ctxt == NULL)
15497         return (NULL);
15498     xmlInitParser();
15499 
15500     xmlCtxtReset(ctxt);
15501 
15502     stream = xmlNewStringInputStream(ctxt, cur);
15503     if (stream == NULL) {
15504         return (NULL);
15505     }
15506     inputPush(ctxt, stream);
15507     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15508 }
15509 
15510 /**
15511  * xmlCtxtReadFile:
15512  * @ctxt:  an XML parser context
15513  * @filename:  a file or URL
15514  * @encoding:  the document encoding, or NULL
15515  * @options:  a combination of xmlParserOption
15516  *
15517  * parse an XML file from the filesystem or the network.
15518  * This reuses the existing @ctxt parser context
15519  *
15520  * Returns the resulting document tree
15521  */
15522 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15523 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15524                 const char *encoding, int options)
15525 {
15526     xmlParserInputPtr stream;
15527 
15528     if (filename == NULL)
15529         return (NULL);
15530     if (ctxt == NULL)
15531         return (NULL);
15532     xmlInitParser();
15533 
15534     xmlCtxtReset(ctxt);
15535 
15536     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15537     if (stream == NULL) {
15538         return (NULL);
15539     }
15540     inputPush(ctxt, stream);
15541     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15542 }
15543 
15544 /**
15545  * xmlCtxtReadMemory:
15546  * @ctxt:  an XML parser context
15547  * @buffer:  a pointer to a char array
15548  * @size:  the size of the array
15549  * @URL:  the base URL to use for the document
15550  * @encoding:  the document encoding, or NULL
15551  * @options:  a combination of xmlParserOption
15552  *
15553  * parse an XML in-memory document and build a tree.
15554  * This reuses the existing @ctxt parser context
15555  *
15556  * Returns the resulting document tree
15557  */
15558 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15559 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15560                   const char *URL, const char *encoding, int options)
15561 {
15562     xmlParserInputBufferPtr input;
15563     xmlParserInputPtr stream;
15564 
15565     if (ctxt == NULL)
15566         return (NULL);
15567     if (buffer == NULL)
15568         return (NULL);
15569     xmlInitParser();
15570 
15571     xmlCtxtReset(ctxt);
15572 
15573     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15574     if (input == NULL) {
15575 	return(NULL);
15576     }
15577 
15578     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15579     if (stream == NULL) {
15580 	xmlFreeParserInputBuffer(input);
15581 	return(NULL);
15582     }
15583 
15584     inputPush(ctxt, stream);
15585     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15586 }
15587 
15588 /**
15589  * xmlCtxtReadFd:
15590  * @ctxt:  an XML parser context
15591  * @fd:  an open file descriptor
15592  * @URL:  the base URL to use for the document
15593  * @encoding:  the document encoding, or NULL
15594  * @options:  a combination of xmlParserOption
15595  *
15596  * parse an XML from a file descriptor and build a tree.
15597  * This reuses the existing @ctxt parser context
15598  * NOTE that the file descriptor will not be closed when the
15599  *      reader is closed or reset.
15600  *
15601  * Returns the resulting document tree
15602  */
15603 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15604 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15605               const char *URL, const char *encoding, int options)
15606 {
15607     xmlParserInputBufferPtr input;
15608     xmlParserInputPtr stream;
15609 
15610     if (fd < 0)
15611         return (NULL);
15612     if (ctxt == NULL)
15613         return (NULL);
15614     xmlInitParser();
15615 
15616     xmlCtxtReset(ctxt);
15617 
15618 
15619     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15620     if (input == NULL)
15621         return (NULL);
15622     input->closecallback = NULL;
15623     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15624     if (stream == NULL) {
15625         xmlFreeParserInputBuffer(input);
15626         return (NULL);
15627     }
15628     inputPush(ctxt, stream);
15629     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15630 }
15631 
15632 /**
15633  * xmlCtxtReadIO:
15634  * @ctxt:  an XML parser context
15635  * @ioread:  an I/O read function
15636  * @ioclose:  an I/O close function
15637  * @ioctx:  an I/O handler
15638  * @URL:  the base URL to use for the document
15639  * @encoding:  the document encoding, or NULL
15640  * @options:  a combination of xmlParserOption
15641  *
15642  * parse an XML document from I/O functions and source and build a tree.
15643  * This reuses the existing @ctxt parser context
15644  *
15645  * Returns the resulting document tree
15646  */
15647 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15648 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15649               xmlInputCloseCallback ioclose, void *ioctx,
15650 	      const char *URL,
15651               const char *encoding, int options)
15652 {
15653     xmlParserInputBufferPtr input;
15654     xmlParserInputPtr stream;
15655 
15656     if (ioread == NULL)
15657         return (NULL);
15658     if (ctxt == NULL)
15659         return (NULL);
15660     xmlInitParser();
15661 
15662     xmlCtxtReset(ctxt);
15663 
15664     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15665                                          XML_CHAR_ENCODING_NONE);
15666     if (input == NULL) {
15667         if (ioclose != NULL)
15668             ioclose(ioctx);
15669         return (NULL);
15670     }
15671     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15672     if (stream == NULL) {
15673         xmlFreeParserInputBuffer(input);
15674         return (NULL);
15675     }
15676     inputPush(ctxt, stream);
15677     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15678 }
15679 
15680 #define bottom_parser
15681 #include "elfgcchack.h"
15682