1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86 
87 #include "buf.h"
88 #include "enc.h"
89 
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92 
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
96 
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98 
99 /************************************************************************
100  *									*
101  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
102  *									*
103  ************************************************************************/
104 
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
107 
108 /*
109  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110  *    replacement over the size in byte of the input indicates that you have
111  *    and eponential behaviour. A value of 10 correspond to at least 3 entity
112  *    replacement per byte of input.
113  */
114 #define XML_PARSER_NON_LINEAR 10
115 
116 /*
117  * xmlParserEntityCheck
118  *
119  * Function to check non-linear entity expansion behaviour
120  * This is here to detect and stop exponential linear entity expansion
121  * This is not a limitation of the parser but a safety
122  * boundary feature. It can be disabled with the XML_PARSE_HUGE
123  * parser option.
124  */
125 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127                      xmlEntityPtr ent, size_t replacement)
128 {
129     size_t consumed = 0;
130 
131     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132         return (0);
133     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134         return (1);
135 
136     /*
137      * This may look absurd but is needed to detect
138      * entities problems
139      */
140     if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 	(ent->content != NULL) && (ent->checked == 0) &&
142 	(ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
143 	unsigned long oldnbent = ctxt->nbentities;
144 	xmlChar *rep;
145 
146 	ent->checked = 1;
147 
148         ++ctxt->depth;
149 	rep = xmlStringDecodeEntities(ctxt, ent->content,
150 				  XML_SUBSTITUTE_REF, 0, 0, 0);
151         --ctxt->depth;
152 	if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
153 	    ent->content[0] = 0;
154 	}
155 
156 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 	if (rep != NULL) {
158 	    if (xmlStrchr(rep, '<'))
159 		ent->checked |= 1;
160 	    xmlFree(rep);
161 	    rep = NULL;
162 	}
163     }
164     if (replacement != 0) {
165 	if (replacement < XML_MAX_TEXT_LENGTH)
166 	    return(0);
167 
168         /*
169 	 * If the volume of entity copy reaches 10 times the
170 	 * amount of parsed data and over the large text threshold
171 	 * then that's very likely to be an abuse.
172 	 */
173         if (ctxt->input != NULL) {
174 	    consumed = ctxt->input->consumed +
175 	               (ctxt->input->cur - ctxt->input->base);
176 	}
177         consumed += ctxt->sizeentities;
178 
179         if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 	    return(0);
181     } else if (size != 0) {
182         /*
183          * Do the check based on the replacement size of the entity
184          */
185         if (size < XML_PARSER_BIG_ENTITY)
186 	    return(0);
187 
188         /*
189          * A limit on the amount of text data reasonably used
190          */
191         if (ctxt->input != NULL) {
192             consumed = ctxt->input->consumed +
193                 (ctxt->input->cur - ctxt->input->base);
194         }
195         consumed += ctxt->sizeentities;
196 
197         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199             return (0);
200     } else if (ent != NULL) {
201         /*
202          * use the number of parsed entities in the replacement
203          */
204         size = ent->checked / 2;
205 
206         /*
207          * The amount of data parsed counting entities size only once
208          */
209         if (ctxt->input != NULL) {
210             consumed = ctxt->input->consumed +
211                 (ctxt->input->cur - ctxt->input->base);
212         }
213         consumed += ctxt->sizeentities;
214 
215         /*
216          * Check the density of entities for the amount of data
217 	 * knowing an entity reference will take at least 3 bytes
218          */
219         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220             return (0);
221     } else {
222         /*
223          * strange we got no data for checking
224          */
225 	if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 	     (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 	    (ctxt->nbentities <= 10000))
228 	    return (0);
229     }
230     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231     return (1);
232 }
233 
234 /**
235  * xmlParserMaxDepth:
236  *
237  * arbitrary depth limit for the XML documents that we allow to
238  * process. This is not a limitation of the parser but a safety
239  * boundary feature. It can be disabled with the XML_PARSE_HUGE
240  * parser option.
241  */
242 unsigned int xmlParserMaxDepth = 256;
243 
244 
245 
246 #define SAX2 1
247 #define XML_PARSER_BIG_BUFFER_SIZE 300
248 #define XML_PARSER_BUFFER_SIZE 100
249 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250 
251 /**
252  * XML_PARSER_CHUNK_SIZE
253  *
254  * When calling GROW that's the minimal amount of data
255  * the parser expected to have received. It is not a hard
256  * limit but an optimization when reading strings like Names
257  * It is not strictly needed as long as inputs available characters
258  * are followed by 0, which should be provided by the I/O level
259  */
260 #define XML_PARSER_CHUNK_SIZE 100
261 
262 /*
263  * List of XML prefixed PI allowed by W3C specs
264  */
265 
266 static const char *xmlW3CPIs[] = {
267     "xml-stylesheet",
268     "xml-model",
269     NULL
270 };
271 
272 
273 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
274 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275                                               const xmlChar **str);
276 
277 static xmlParserErrors
278 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 	              xmlSAXHandlerPtr sax,
280 		      void *user_data, int depth, const xmlChar *URL,
281 		      const xmlChar *ID, xmlNodePtr *list);
282 
283 static int
284 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285                           const char *encoding);
286 #ifdef LIBXML_LEGACY_ENABLED
287 static void
288 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289                       xmlNodePtr lastNode);
290 #endif /* LIBXML_LEGACY_ENABLED */
291 
292 static xmlParserErrors
293 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
295 
296 static int
297 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298 
299 /************************************************************************
300  *									*
301  *		Some factorized error routines				*
302  *									*
303  ************************************************************************/
304 
305 /**
306  * xmlErrAttributeDup:
307  * @ctxt:  an XML parser context
308  * @prefix:  the attribute prefix
309  * @localname:  the attribute localname
310  *
311  * Handle a redefinition of attribute error
312  */
313 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)314 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315                    const xmlChar * localname)
316 {
317     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318         (ctxt->instate == XML_PARSER_EOF))
319 	return;
320     if (ctxt != NULL)
321 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
322 
323     if (prefix == NULL)
324         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
325                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
326                         (const char *) localname, NULL, NULL, 0, 0,
327                         "Attribute %s redefined\n", localname);
328     else
329         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
330                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
331                         (const char *) prefix, (const char *) localname,
332                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333                         localname);
334     if (ctxt != NULL) {
335 	ctxt->wellFormed = 0;
336 	if (ctxt->recovery == 0)
337 	    ctxt->disableSAX = 1;
338     }
339 }
340 
341 /**
342  * xmlFatalErr:
343  * @ctxt:  an XML parser context
344  * @error:  the error number
345  * @extra:  extra information string
346  *
347  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348  */
349 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)350 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
351 {
352     const char *errmsg;
353 
354     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355         (ctxt->instate == XML_PARSER_EOF))
356 	return;
357     switch (error) {
358         case XML_ERR_INVALID_HEX_CHARREF:
359             errmsg = "CharRef: invalid hexadecimal value";
360             break;
361         case XML_ERR_INVALID_DEC_CHARREF:
362             errmsg = "CharRef: invalid decimal value";
363             break;
364         case XML_ERR_INVALID_CHARREF:
365             errmsg = "CharRef: invalid value";
366             break;
367         case XML_ERR_INTERNAL_ERROR:
368             errmsg = "internal error";
369             break;
370         case XML_ERR_PEREF_AT_EOF:
371             errmsg = "PEReference at end of document";
372             break;
373         case XML_ERR_PEREF_IN_PROLOG:
374             errmsg = "PEReference in prolog";
375             break;
376         case XML_ERR_PEREF_IN_EPILOG:
377             errmsg = "PEReference in epilog";
378             break;
379         case XML_ERR_PEREF_NO_NAME:
380             errmsg = "PEReference: no name";
381             break;
382         case XML_ERR_PEREF_SEMICOL_MISSING:
383             errmsg = "PEReference: expecting ';'";
384             break;
385         case XML_ERR_ENTITY_LOOP:
386             errmsg = "Detected an entity reference loop";
387             break;
388         case XML_ERR_ENTITY_NOT_STARTED:
389             errmsg = "EntityValue: \" or ' expected";
390             break;
391         case XML_ERR_ENTITY_PE_INTERNAL:
392             errmsg = "PEReferences forbidden in internal subset";
393             break;
394         case XML_ERR_ENTITY_NOT_FINISHED:
395             errmsg = "EntityValue: \" or ' expected";
396             break;
397         case XML_ERR_ATTRIBUTE_NOT_STARTED:
398             errmsg = "AttValue: \" or ' expected";
399             break;
400         case XML_ERR_LT_IN_ATTRIBUTE:
401             errmsg = "Unescaped '<' not allowed in attributes values";
402             break;
403         case XML_ERR_LITERAL_NOT_STARTED:
404             errmsg = "SystemLiteral \" or ' expected";
405             break;
406         case XML_ERR_LITERAL_NOT_FINISHED:
407             errmsg = "Unfinished System or Public ID \" or ' expected";
408             break;
409         case XML_ERR_MISPLACED_CDATA_END:
410             errmsg = "Sequence ']]>' not allowed in content";
411             break;
412         case XML_ERR_URI_REQUIRED:
413             errmsg = "SYSTEM or PUBLIC, the URI is missing";
414             break;
415         case XML_ERR_PUBID_REQUIRED:
416             errmsg = "PUBLIC, the Public Identifier is missing";
417             break;
418         case XML_ERR_HYPHEN_IN_COMMENT:
419             errmsg = "Comment must not contain '--' (double-hyphen)";
420             break;
421         case XML_ERR_PI_NOT_STARTED:
422             errmsg = "xmlParsePI : no target name";
423             break;
424         case XML_ERR_RESERVED_XML_NAME:
425             errmsg = "Invalid PI name";
426             break;
427         case XML_ERR_NOTATION_NOT_STARTED:
428             errmsg = "NOTATION: Name expected here";
429             break;
430         case XML_ERR_NOTATION_NOT_FINISHED:
431             errmsg = "'>' required to close NOTATION declaration";
432             break;
433         case XML_ERR_VALUE_REQUIRED:
434             errmsg = "Entity value required";
435             break;
436         case XML_ERR_URI_FRAGMENT:
437             errmsg = "Fragment not allowed";
438             break;
439         case XML_ERR_ATTLIST_NOT_STARTED:
440             errmsg = "'(' required to start ATTLIST enumeration";
441             break;
442         case XML_ERR_NMTOKEN_REQUIRED:
443             errmsg = "NmToken expected in ATTLIST enumeration";
444             break;
445         case XML_ERR_ATTLIST_NOT_FINISHED:
446             errmsg = "')' required to finish ATTLIST enumeration";
447             break;
448         case XML_ERR_MIXED_NOT_STARTED:
449             errmsg = "MixedContentDecl : '|' or ')*' expected";
450             break;
451         case XML_ERR_PCDATA_REQUIRED:
452             errmsg = "MixedContentDecl : '#PCDATA' expected";
453             break;
454         case XML_ERR_ELEMCONTENT_NOT_STARTED:
455             errmsg = "ContentDecl : Name or '(' expected";
456             break;
457         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
458             errmsg = "ContentDecl : ',' '|' or ')' expected";
459             break;
460         case XML_ERR_PEREF_IN_INT_SUBSET:
461             errmsg =
462                 "PEReference: forbidden within markup decl in internal subset";
463             break;
464         case XML_ERR_GT_REQUIRED:
465             errmsg = "expected '>'";
466             break;
467         case XML_ERR_CONDSEC_INVALID:
468             errmsg = "XML conditional section '[' expected";
469             break;
470         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
471             errmsg = "Content error in the external subset";
472             break;
473         case XML_ERR_CONDSEC_INVALID_KEYWORD:
474             errmsg =
475                 "conditional section INCLUDE or IGNORE keyword expected";
476             break;
477         case XML_ERR_CONDSEC_NOT_FINISHED:
478             errmsg = "XML conditional section not closed";
479             break;
480         case XML_ERR_XMLDECL_NOT_STARTED:
481             errmsg = "Text declaration '<?xml' required";
482             break;
483         case XML_ERR_XMLDECL_NOT_FINISHED:
484             errmsg = "parsing XML declaration: '?>' expected";
485             break;
486         case XML_ERR_EXT_ENTITY_STANDALONE:
487             errmsg = "external parsed entities cannot be standalone";
488             break;
489         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
490             errmsg = "EntityRef: expecting ';'";
491             break;
492         case XML_ERR_DOCTYPE_NOT_FINISHED:
493             errmsg = "DOCTYPE improperly terminated";
494             break;
495         case XML_ERR_LTSLASH_REQUIRED:
496             errmsg = "EndTag: '</' not found";
497             break;
498         case XML_ERR_EQUAL_REQUIRED:
499             errmsg = "expected '='";
500             break;
501         case XML_ERR_STRING_NOT_CLOSED:
502             errmsg = "String not closed expecting \" or '";
503             break;
504         case XML_ERR_STRING_NOT_STARTED:
505             errmsg = "String not started expecting ' or \"";
506             break;
507         case XML_ERR_ENCODING_NAME:
508             errmsg = "Invalid XML encoding name";
509             break;
510         case XML_ERR_STANDALONE_VALUE:
511             errmsg = "standalone accepts only 'yes' or 'no'";
512             break;
513         case XML_ERR_DOCUMENT_EMPTY:
514             errmsg = "Document is empty";
515             break;
516         case XML_ERR_DOCUMENT_END:
517             errmsg = "Extra content at the end of the document";
518             break;
519         case XML_ERR_NOT_WELL_BALANCED:
520             errmsg = "chunk is not well balanced";
521             break;
522         case XML_ERR_EXTRA_CONTENT:
523             errmsg = "extra content at the end of well balanced chunk";
524             break;
525         case XML_ERR_VERSION_MISSING:
526             errmsg = "Malformed declaration expecting version";
527             break;
528         case XML_ERR_NAME_TOO_LONG:
529             errmsg = "Name too long use XML_PARSE_HUGE option";
530             break;
531 #if 0
532         case:
533             errmsg = "";
534             break;
535 #endif
536         default:
537             errmsg = "Unregistered error message";
538     }
539     if (ctxt != NULL)
540 	ctxt->errNo = error;
541     if (info == NULL) {
542         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544                         errmsg);
545     } else {
546         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548                         errmsg, info);
549     }
550     if (ctxt != NULL) {
551 	ctxt->wellFormed = 0;
552 	if (ctxt->recovery == 0)
553 	    ctxt->disableSAX = 1;
554     }
555 }
556 
557 /**
558  * xmlFatalErrMsg:
559  * @ctxt:  an XML parser context
560  * @error:  the error number
561  * @msg:  the error message
562  *
563  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564  */
565 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)566 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567                const char *msg)
568 {
569     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570         (ctxt->instate == XML_PARSER_EOF))
571 	return;
572     if (ctxt != NULL)
573 	ctxt->errNo = error;
574     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
576     if (ctxt != NULL) {
577 	ctxt->wellFormed = 0;
578 	if (ctxt->recovery == 0)
579 	    ctxt->disableSAX = 1;
580     }
581 }
582 
583 /**
584  * xmlWarningMsg:
585  * @ctxt:  an XML parser context
586  * @error:  the error number
587  * @msg:  the error message
588  * @str1:  extra data
589  * @str2:  extra data
590  *
591  * Handle a warning.
592  */
593 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)594 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595               const char *msg, const xmlChar *str1, const xmlChar *str2)
596 {
597     xmlStructuredErrorFunc schannel = NULL;
598 
599     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600         (ctxt->instate == XML_PARSER_EOF))
601 	return;
602     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603         (ctxt->sax->initialized == XML_SAX2_MAGIC))
604         schannel = ctxt->sax->serror;
605     if (ctxt != NULL) {
606         __xmlRaiseError(schannel,
607                     (ctxt->sax) ? ctxt->sax->warning : NULL,
608                     ctxt->userData,
609                     ctxt, NULL, XML_FROM_PARSER, error,
610                     XML_ERR_WARNING, NULL, 0,
611 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
612 		    msg, (const char *) str1, (const char *) str2);
613     } else {
614         __xmlRaiseError(schannel, NULL, NULL,
615                     ctxt, NULL, XML_FROM_PARSER, error,
616                     XML_ERR_WARNING, NULL, 0,
617 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
618 		    msg, (const char *) str1, (const char *) str2);
619     }
620 }
621 
622 /**
623  * xmlValidityError:
624  * @ctxt:  an XML parser context
625  * @error:  the error number
626  * @msg:  the error message
627  * @str1:  extra data
628  *
629  * Handle a validity error.
630  */
631 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)632 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633               const char *msg, const xmlChar *str1, const xmlChar *str2)
634 {
635     xmlStructuredErrorFunc schannel = NULL;
636 
637     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638         (ctxt->instate == XML_PARSER_EOF))
639 	return;
640     if (ctxt != NULL) {
641 	ctxt->errNo = error;
642 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 	    schannel = ctxt->sax->serror;
644     }
645     if (ctxt != NULL) {
646         __xmlRaiseError(schannel,
647                     ctxt->vctxt.error, ctxt->vctxt.userData,
648                     ctxt, NULL, XML_FROM_DTD, error,
649                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 		    (const char *) str2, NULL, 0, 0,
651 		    msg, (const char *) str1, (const char *) str2);
652 	ctxt->valid = 0;
653     } else {
654         __xmlRaiseError(schannel, NULL, NULL,
655                     ctxt, NULL, XML_FROM_DTD, error,
656                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 		    (const char *) str2, NULL, 0, 0,
658 		    msg, (const char *) str1, (const char *) str2);
659     }
660 }
661 
662 /**
663  * xmlFatalErrMsgInt:
664  * @ctxt:  an XML parser context
665  * @error:  the error number
666  * @msg:  the error message
667  * @val:  an integer value
668  *
669  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670  */
671 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)672 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
673                   const char *msg, int val)
674 {
675     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676         (ctxt->instate == XML_PARSER_EOF))
677 	return;
678     if (ctxt != NULL)
679 	ctxt->errNo = error;
680     __xmlRaiseError(NULL, NULL, NULL,
681                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
683     if (ctxt != NULL) {
684 	ctxt->wellFormed = 0;
685 	if (ctxt->recovery == 0)
686 	    ctxt->disableSAX = 1;
687     }
688 }
689 
690 /**
691  * xmlFatalErrMsgStrIntStr:
692  * @ctxt:  an XML parser context
693  * @error:  the error number
694  * @msg:  the error message
695  * @str1:  an string info
696  * @val:  an integer value
697  * @str2:  an string info
698  *
699  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700  */
701 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)702 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
703                   const char *msg, const xmlChar *str1, int val,
704 		  const xmlChar *str2)
705 {
706     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707         (ctxt->instate == XML_PARSER_EOF))
708 	return;
709     if (ctxt != NULL)
710 	ctxt->errNo = error;
711     __xmlRaiseError(NULL, NULL, NULL,
712                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713                     NULL, 0, (const char *) str1, (const char *) str2,
714 		    NULL, val, 0, msg, str1, val, str2);
715     if (ctxt != NULL) {
716 	ctxt->wellFormed = 0;
717 	if (ctxt->recovery == 0)
718 	    ctxt->disableSAX = 1;
719     }
720 }
721 
722 /**
723  * xmlFatalErrMsgStr:
724  * @ctxt:  an XML parser context
725  * @error:  the error number
726  * @msg:  the error message
727  * @val:  a string value
728  *
729  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730  */
731 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)732 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
733                   const char *msg, const xmlChar * val)
734 {
735     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736         (ctxt->instate == XML_PARSER_EOF))
737 	return;
738     if (ctxt != NULL)
739 	ctxt->errNo = error;
740     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
741                     XML_FROM_PARSER, error, XML_ERR_FATAL,
742                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743                     val);
744     if (ctxt != NULL) {
745 	ctxt->wellFormed = 0;
746 	if (ctxt->recovery == 0)
747 	    ctxt->disableSAX = 1;
748     }
749 }
750 
751 /**
752  * xmlErrMsgStr:
753  * @ctxt:  an XML parser context
754  * @error:  the error number
755  * @msg:  the error message
756  * @val:  a string value
757  *
758  * Handle a non fatal parser error
759  */
760 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)761 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762                   const char *msg, const xmlChar * val)
763 {
764     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765         (ctxt->instate == XML_PARSER_EOF))
766 	return;
767     if (ctxt != NULL)
768 	ctxt->errNo = error;
769     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770                     XML_FROM_PARSER, error, XML_ERR_ERROR,
771                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772                     val);
773 }
774 
775 /**
776  * xmlNsErr:
777  * @ctxt:  an XML parser context
778  * @error:  the error number
779  * @msg:  the message
780  * @info1:  extra information string
781  * @info2:  extra information string
782  *
783  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784  */
785 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)786 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787          const char *msg,
788          const xmlChar * info1, const xmlChar * info2,
789          const xmlChar * info3)
790 {
791     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792         (ctxt->instate == XML_PARSER_EOF))
793 	return;
794     if (ctxt != NULL)
795 	ctxt->errNo = error;
796     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
797                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
798                     (const char *) info2, (const char *) info3, 0, 0, msg,
799                     info1, info2, info3);
800     if (ctxt != NULL)
801 	ctxt->nsWellFormed = 0;
802 }
803 
804 /**
805  * xmlNsWarn
806  * @ctxt:  an XML parser context
807  * @error:  the error number
808  * @msg:  the message
809  * @info1:  extra information string
810  * @info2:  extra information string
811  *
812  * Handle a namespace warning error
813  */
814 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)815 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816          const char *msg,
817          const xmlChar * info1, const xmlChar * info2,
818          const xmlChar * info3)
819 {
820     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821         (ctxt->instate == XML_PARSER_EOF))
822 	return;
823     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
825                     (const char *) info2, (const char *) info3, 0, 0, msg,
826                     info1, info2, info3);
827 }
828 
829 /************************************************************************
830  *									*
831  *		Library wide options					*
832  *									*
833  ************************************************************************/
834 
835 /**
836   * xmlHasFeature:
837   * @feature: the feature to be examined
838   *
839   * Examines if the library has been compiled with a given feature.
840   *
841   * Returns a non-zero value if the feature exist, otherwise zero.
842   * Returns zero (0) if the feature does not exist or an unknown
843   * unknown feature is requested, non-zero otherwise.
844   */
845 int
xmlHasFeature(xmlFeature feature)846 xmlHasFeature(xmlFeature feature)
847 {
848     switch (feature) {
849 	case XML_WITH_THREAD:
850 #ifdef LIBXML_THREAD_ENABLED
851 	    return(1);
852 #else
853 	    return(0);
854 #endif
855         case XML_WITH_TREE:
856 #ifdef LIBXML_TREE_ENABLED
857             return(1);
858 #else
859             return(0);
860 #endif
861         case XML_WITH_OUTPUT:
862 #ifdef LIBXML_OUTPUT_ENABLED
863             return(1);
864 #else
865             return(0);
866 #endif
867         case XML_WITH_PUSH:
868 #ifdef LIBXML_PUSH_ENABLED
869             return(1);
870 #else
871             return(0);
872 #endif
873         case XML_WITH_READER:
874 #ifdef LIBXML_READER_ENABLED
875             return(1);
876 #else
877             return(0);
878 #endif
879         case XML_WITH_PATTERN:
880 #ifdef LIBXML_PATTERN_ENABLED
881             return(1);
882 #else
883             return(0);
884 #endif
885         case XML_WITH_WRITER:
886 #ifdef LIBXML_WRITER_ENABLED
887             return(1);
888 #else
889             return(0);
890 #endif
891         case XML_WITH_SAX1:
892 #ifdef LIBXML_SAX1_ENABLED
893             return(1);
894 #else
895             return(0);
896 #endif
897         case XML_WITH_FTP:
898 #ifdef LIBXML_FTP_ENABLED
899             return(1);
900 #else
901             return(0);
902 #endif
903         case XML_WITH_HTTP:
904 #ifdef LIBXML_HTTP_ENABLED
905             return(1);
906 #else
907             return(0);
908 #endif
909         case XML_WITH_VALID:
910 #ifdef LIBXML_VALID_ENABLED
911             return(1);
912 #else
913             return(0);
914 #endif
915         case XML_WITH_HTML:
916 #ifdef LIBXML_HTML_ENABLED
917             return(1);
918 #else
919             return(0);
920 #endif
921         case XML_WITH_LEGACY:
922 #ifdef LIBXML_LEGACY_ENABLED
923             return(1);
924 #else
925             return(0);
926 #endif
927         case XML_WITH_C14N:
928 #ifdef LIBXML_C14N_ENABLED
929             return(1);
930 #else
931             return(0);
932 #endif
933         case XML_WITH_CATALOG:
934 #ifdef LIBXML_CATALOG_ENABLED
935             return(1);
936 #else
937             return(0);
938 #endif
939         case XML_WITH_XPATH:
940 #ifdef LIBXML_XPATH_ENABLED
941             return(1);
942 #else
943             return(0);
944 #endif
945         case XML_WITH_XPTR:
946 #ifdef LIBXML_XPTR_ENABLED
947             return(1);
948 #else
949             return(0);
950 #endif
951         case XML_WITH_XINCLUDE:
952 #ifdef LIBXML_XINCLUDE_ENABLED
953             return(1);
954 #else
955             return(0);
956 #endif
957         case XML_WITH_ICONV:
958 #ifdef LIBXML_ICONV_ENABLED
959             return(1);
960 #else
961             return(0);
962 #endif
963         case XML_WITH_ISO8859X:
964 #ifdef LIBXML_ISO8859X_ENABLED
965             return(1);
966 #else
967             return(0);
968 #endif
969         case XML_WITH_UNICODE:
970 #ifdef LIBXML_UNICODE_ENABLED
971             return(1);
972 #else
973             return(0);
974 #endif
975         case XML_WITH_REGEXP:
976 #ifdef LIBXML_REGEXP_ENABLED
977             return(1);
978 #else
979             return(0);
980 #endif
981         case XML_WITH_AUTOMATA:
982 #ifdef LIBXML_AUTOMATA_ENABLED
983             return(1);
984 #else
985             return(0);
986 #endif
987         case XML_WITH_EXPR:
988 #ifdef LIBXML_EXPR_ENABLED
989             return(1);
990 #else
991             return(0);
992 #endif
993         case XML_WITH_SCHEMAS:
994 #ifdef LIBXML_SCHEMAS_ENABLED
995             return(1);
996 #else
997             return(0);
998 #endif
999         case XML_WITH_SCHEMATRON:
1000 #ifdef LIBXML_SCHEMATRON_ENABLED
1001             return(1);
1002 #else
1003             return(0);
1004 #endif
1005         case XML_WITH_MODULES:
1006 #ifdef LIBXML_MODULES_ENABLED
1007             return(1);
1008 #else
1009             return(0);
1010 #endif
1011         case XML_WITH_DEBUG:
1012 #ifdef LIBXML_DEBUG_ENABLED
1013             return(1);
1014 #else
1015             return(0);
1016 #endif
1017         case XML_WITH_DEBUG_MEM:
1018 #ifdef DEBUG_MEMORY_LOCATION
1019             return(1);
1020 #else
1021             return(0);
1022 #endif
1023         case XML_WITH_DEBUG_RUN:
1024 #ifdef LIBXML_DEBUG_RUNTIME
1025             return(1);
1026 #else
1027             return(0);
1028 #endif
1029         case XML_WITH_ZLIB:
1030 #ifdef LIBXML_ZLIB_ENABLED
1031             return(1);
1032 #else
1033             return(0);
1034 #endif
1035         case XML_WITH_LZMA:
1036 #ifdef LIBXML_LZMA_ENABLED
1037             return(1);
1038 #else
1039             return(0);
1040 #endif
1041         case XML_WITH_ICU:
1042 #ifdef LIBXML_ICU_ENABLED
1043             return(1);
1044 #else
1045             return(0);
1046 #endif
1047         default:
1048 	    break;
1049      }
1050      return(0);
1051 }
1052 
1053 /************************************************************************
1054  *									*
1055  *		SAX2 defaulted attributes handling			*
1056  *									*
1057  ************************************************************************/
1058 
1059 /**
1060  * xmlDetectSAX2:
1061  * @ctxt:  an XML parser context
1062  *
1063  * Do the SAX2 detection and specific intialization
1064  */
1065 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1066 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067     if (ctxt == NULL) return;
1068 #ifdef LIBXML_SAX1_ENABLED
1069     if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070         ((ctxt->sax->startElementNs != NULL) ||
1071          (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1072 #else
1073     ctxt->sax2 = 1;
1074 #endif /* LIBXML_SAX1_ENABLED */
1075 
1076     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1079     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 		(ctxt->str_xml_ns == NULL)) {
1081         xmlErrMemory(ctxt, NULL);
1082     }
1083 }
1084 
1085 typedef struct _xmlDefAttrs xmlDefAttrs;
1086 typedef xmlDefAttrs *xmlDefAttrsPtr;
1087 struct _xmlDefAttrs {
1088     int nbAttrs;	/* number of defaulted attributes on that element */
1089     int maxAttrs;       /* the size of the array */
1090 #if __STDC_VERSION__ >= 199901L
1091     /* Using a C99 flexible array member avoids UBSan errors. */
1092     const xmlChar *values[]; /* array of localname/prefix/values/external */
1093 #else
1094     const xmlChar *values[5];
1095 #endif
1096 };
1097 
1098 /**
1099  * xmlAttrNormalizeSpace:
1100  * @src: the source string
1101  * @dst: the target string
1102  *
1103  * Normalize the space in non CDATA attribute values:
1104  * If the attribute type is not CDATA, then the XML processor MUST further
1105  * process the normalized attribute value by discarding any leading and
1106  * trailing space (#x20) characters, and by replacing sequences of space
1107  * (#x20) characters by a single space (#x20) character.
1108  * Note that the size of dst need to be at least src, and if one doesn't need
1109  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1110  * passing src as dst is just fine.
1111  *
1112  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1113  *         is needed.
1114  */
1115 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1116 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1117 {
1118     if ((src == NULL) || (dst == NULL))
1119         return(NULL);
1120 
1121     while (*src == 0x20) src++;
1122     while (*src != 0) {
1123 	if (*src == 0x20) {
1124 	    while (*src == 0x20) src++;
1125 	    if (*src != 0)
1126 		*dst++ = 0x20;
1127 	} else {
1128 	    *dst++ = *src++;
1129 	}
1130     }
1131     *dst = 0;
1132     if (dst == src)
1133        return(NULL);
1134     return(dst);
1135 }
1136 
1137 /**
1138  * xmlAttrNormalizeSpace2:
1139  * @src: the source string
1140  *
1141  * Normalize the space in non CDATA attribute values, a slightly more complex
1142  * front end to avoid allocation problems when running on attribute values
1143  * coming from the input.
1144  *
1145  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1146  *         is needed.
1147  */
1148 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1149 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1150 {
1151     int i;
1152     int remove_head = 0;
1153     int need_realloc = 0;
1154     const xmlChar *cur;
1155 
1156     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1157         return(NULL);
1158     i = *len;
1159     if (i <= 0)
1160         return(NULL);
1161 
1162     cur = src;
1163     while (*cur == 0x20) {
1164         cur++;
1165 	remove_head++;
1166     }
1167     while (*cur != 0) {
1168 	if (*cur == 0x20) {
1169 	    cur++;
1170 	    if ((*cur == 0x20) || (*cur == 0)) {
1171 	        need_realloc = 1;
1172 		break;
1173 	    }
1174 	} else
1175 	    cur++;
1176     }
1177     if (need_realloc) {
1178         xmlChar *ret;
1179 
1180 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1181 	if (ret == NULL) {
1182 	    xmlErrMemory(ctxt, NULL);
1183 	    return(NULL);
1184 	}
1185 	xmlAttrNormalizeSpace(ret, ret);
1186 	*len = (int) strlen((const char *)ret);
1187         return(ret);
1188     } else if (remove_head) {
1189         *len -= remove_head;
1190         memmove(src, src + remove_head, 1 + *len);
1191 	return(src);
1192     }
1193     return(NULL);
1194 }
1195 
1196 /**
1197  * xmlAddDefAttrs:
1198  * @ctxt:  an XML parser context
1199  * @fullname:  the element fullname
1200  * @fullattr:  the attribute fullname
1201  * @value:  the attribute value
1202  *
1203  * Add a defaulted attribute for an element
1204  */
1205 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1206 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1207                const xmlChar *fullname,
1208                const xmlChar *fullattr,
1209                const xmlChar *value) {
1210     xmlDefAttrsPtr defaults;
1211     int len;
1212     const xmlChar *name;
1213     const xmlChar *prefix;
1214 
1215     /*
1216      * Allows to detect attribute redefinitions
1217      */
1218     if (ctxt->attsSpecial != NULL) {
1219         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1220 	    return;
1221     }
1222 
1223     if (ctxt->attsDefault == NULL) {
1224         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1225 	if (ctxt->attsDefault == NULL)
1226 	    goto mem_error;
1227     }
1228 
1229     /*
1230      * split the element name into prefix:localname , the string found
1231      * are within the DTD and then not associated to namespace names.
1232      */
1233     name = xmlSplitQName3(fullname, &len);
1234     if (name == NULL) {
1235         name = xmlDictLookup(ctxt->dict, fullname, -1);
1236 	prefix = NULL;
1237     } else {
1238         name = xmlDictLookup(ctxt->dict, name, -1);
1239 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1240     }
1241 
1242     /*
1243      * make sure there is some storage
1244      */
1245     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1246     if (defaults == NULL) {
1247         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1248 	                   (4 * 5) * sizeof(const xmlChar *));
1249 	if (defaults == NULL)
1250 	    goto mem_error;
1251 	defaults->nbAttrs = 0;
1252 	defaults->maxAttrs = 4;
1253 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 	                        defaults, NULL) < 0) {
1255 	    xmlFree(defaults);
1256 	    goto mem_error;
1257 	}
1258     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1259         xmlDefAttrsPtr temp;
1260 
1261         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1262 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1263 	if (temp == NULL)
1264 	    goto mem_error;
1265 	defaults = temp;
1266 	defaults->maxAttrs *= 2;
1267 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1268 	                        defaults, NULL) < 0) {
1269 	    xmlFree(defaults);
1270 	    goto mem_error;
1271 	}
1272     }
1273 
1274     /*
1275      * Split the element name into prefix:localname , the string found
1276      * are within the DTD and hen not associated to namespace names.
1277      */
1278     name = xmlSplitQName3(fullattr, &len);
1279     if (name == NULL) {
1280         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1281 	prefix = NULL;
1282     } else {
1283         name = xmlDictLookup(ctxt->dict, name, -1);
1284 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1285     }
1286 
1287     defaults->values[5 * defaults->nbAttrs] = name;
1288     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1289     /* intern the string and precompute the end */
1290     len = xmlStrlen(value);
1291     value = xmlDictLookup(ctxt->dict, value, len);
1292     defaults->values[5 * defaults->nbAttrs + 2] = value;
1293     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1294     if (ctxt->external)
1295         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1296     else
1297         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1298     defaults->nbAttrs++;
1299 
1300     return;
1301 
1302 mem_error:
1303     xmlErrMemory(ctxt, NULL);
1304     return;
1305 }
1306 
1307 /**
1308  * xmlAddSpecialAttr:
1309  * @ctxt:  an XML parser context
1310  * @fullname:  the element fullname
1311  * @fullattr:  the attribute fullname
1312  * @type:  the attribute type
1313  *
1314  * Register this attribute type
1315  */
1316 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1317 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1318 		  const xmlChar *fullname,
1319 		  const xmlChar *fullattr,
1320 		  int type)
1321 {
1322     if (ctxt->attsSpecial == NULL) {
1323         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1324 	if (ctxt->attsSpecial == NULL)
1325 	    goto mem_error;
1326     }
1327 
1328     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1329         return;
1330 
1331     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1332                      (void *) (ptrdiff_t) type);
1333     return;
1334 
1335 mem_error:
1336     xmlErrMemory(ctxt, NULL);
1337     return;
1338 }
1339 
1340 /**
1341  * xmlCleanSpecialAttrCallback:
1342  *
1343  * Removes CDATA attributes from the special attribute table
1344  */
1345 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1346 xmlCleanSpecialAttrCallback(void *payload, void *data,
1347                             const xmlChar *fullname, const xmlChar *fullattr,
1348                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1349     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1350 
1351     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1352         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1353     }
1354 }
1355 
1356 /**
1357  * xmlCleanSpecialAttr:
1358  * @ctxt:  an XML parser context
1359  *
1360  * Trim the list of attributes defined to remove all those of type
1361  * CDATA as they are not special. This call should be done when finishing
1362  * to parse the DTD and before starting to parse the document root.
1363  */
1364 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1365 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1366 {
1367     if (ctxt->attsSpecial == NULL)
1368         return;
1369 
1370     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1371 
1372     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1373         xmlHashFree(ctxt->attsSpecial, NULL);
1374         ctxt->attsSpecial = NULL;
1375     }
1376     return;
1377 }
1378 
1379 /**
1380  * xmlCheckLanguageID:
1381  * @lang:  pointer to the string value
1382  *
1383  * Checks that the value conforms to the LanguageID production:
1384  *
1385  * NOTE: this is somewhat deprecated, those productions were removed from
1386  *       the XML Second edition.
1387  *
1388  * [33] LanguageID ::= Langcode ('-' Subcode)*
1389  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1390  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1391  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1392  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1393  * [38] Subcode ::= ([a-z] | [A-Z])+
1394  *
1395  * The current REC reference the sucessors of RFC 1766, currently 5646
1396  *
1397  * http://www.rfc-editor.org/rfc/rfc5646.txt
1398  * langtag       = language
1399  *                 ["-" script]
1400  *                 ["-" region]
1401  *                 *("-" variant)
1402  *                 *("-" extension)
1403  *                 ["-" privateuse]
1404  * language      = 2*3ALPHA            ; shortest ISO 639 code
1405  *                 ["-" extlang]       ; sometimes followed by
1406  *                                     ; extended language subtags
1407  *               / 4ALPHA              ; or reserved for future use
1408  *               / 5*8ALPHA            ; or registered language subtag
1409  *
1410  * extlang       = 3ALPHA              ; selected ISO 639 codes
1411  *                 *2("-" 3ALPHA)      ; permanently reserved
1412  *
1413  * script        = 4ALPHA              ; ISO 15924 code
1414  *
1415  * region        = 2ALPHA              ; ISO 3166-1 code
1416  *               / 3DIGIT              ; UN M.49 code
1417  *
1418  * variant       = 5*8alphanum         ; registered variants
1419  *               / (DIGIT 3alphanum)
1420  *
1421  * extension     = singleton 1*("-" (2*8alphanum))
1422  *
1423  *                                     ; Single alphanumerics
1424  *                                     ; "x" reserved for private use
1425  * singleton     = DIGIT               ; 0 - 9
1426  *               / %x41-57             ; A - W
1427  *               / %x59-5A             ; Y - Z
1428  *               / %x61-77             ; a - w
1429  *               / %x79-7A             ; y - z
1430  *
1431  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1432  * The parser below doesn't try to cope with extension or privateuse
1433  * that could be added but that's not interoperable anyway
1434  *
1435  * Returns 1 if correct 0 otherwise
1436  **/
1437 int
xmlCheckLanguageID(const xmlChar * lang)1438 xmlCheckLanguageID(const xmlChar * lang)
1439 {
1440     const xmlChar *cur = lang, *nxt;
1441 
1442     if (cur == NULL)
1443         return (0);
1444     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1445         ((cur[0] == 'I') && (cur[1] == '-')) ||
1446         ((cur[0] == 'x') && (cur[1] == '-')) ||
1447         ((cur[0] == 'X') && (cur[1] == '-'))) {
1448         /*
1449          * Still allow IANA code and user code which were coming
1450          * from the previous version of the XML-1.0 specification
1451          * it's deprecated but we should not fail
1452          */
1453         cur += 2;
1454         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1455                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1456             cur++;
1457         return(cur[0] == 0);
1458     }
1459     nxt = cur;
1460     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1461            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1462            nxt++;
1463     if (nxt - cur >= 4) {
1464         /*
1465          * Reserved
1466          */
1467         if ((nxt - cur > 8) || (nxt[0] != 0))
1468             return(0);
1469         return(1);
1470     }
1471     if (nxt - cur < 2)
1472         return(0);
1473     /* we got an ISO 639 code */
1474     if (nxt[0] == 0)
1475         return(1);
1476     if (nxt[0] != '-')
1477         return(0);
1478 
1479     nxt++;
1480     cur = nxt;
1481     /* now we can have extlang or script or region or variant */
1482     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1483         goto region_m49;
1484 
1485     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1486            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1487            nxt++;
1488     if (nxt - cur == 4)
1489         goto script;
1490     if (nxt - cur == 2)
1491         goto region;
1492     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1493         goto variant;
1494     if (nxt - cur != 3)
1495         return(0);
1496     /* we parsed an extlang */
1497     if (nxt[0] == 0)
1498         return(1);
1499     if (nxt[0] != '-')
1500         return(0);
1501 
1502     nxt++;
1503     cur = nxt;
1504     /* now we can have script or region or variant */
1505     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1506         goto region_m49;
1507 
1508     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510            nxt++;
1511     if (nxt - cur == 2)
1512         goto region;
1513     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1514         goto variant;
1515     if (nxt - cur != 4)
1516         return(0);
1517     /* we parsed a script */
1518 script:
1519     if (nxt[0] == 0)
1520         return(1);
1521     if (nxt[0] != '-')
1522         return(0);
1523 
1524     nxt++;
1525     cur = nxt;
1526     /* now we can have region or variant */
1527     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1528         goto region_m49;
1529 
1530     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1531            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1532            nxt++;
1533 
1534     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535         goto variant;
1536     if (nxt - cur != 2)
1537         return(0);
1538     /* we parsed a region */
1539 region:
1540     if (nxt[0] == 0)
1541         return(1);
1542     if (nxt[0] != '-')
1543         return(0);
1544 
1545     nxt++;
1546     cur = nxt;
1547     /* now we can just have a variant */
1548     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1549            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1550            nxt++;
1551 
1552     if ((nxt - cur < 5) || (nxt - cur > 8))
1553         return(0);
1554 
1555     /* we parsed a variant */
1556 variant:
1557     if (nxt[0] == 0)
1558         return(1);
1559     if (nxt[0] != '-')
1560         return(0);
1561     /* extensions and private use subtags not checked */
1562     return (1);
1563 
1564 region_m49:
1565     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1566         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1567         nxt += 3;
1568         goto region;
1569     }
1570     return(0);
1571 }
1572 
1573 /************************************************************************
1574  *									*
1575  *		Parser stacks related functions and macros		*
1576  *									*
1577  ************************************************************************/
1578 
1579 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1580                                             const xmlChar ** str);
1581 
1582 #ifdef SAX2
1583 /**
1584  * nsPush:
1585  * @ctxt:  an XML parser context
1586  * @prefix:  the namespace prefix or NULL
1587  * @URL:  the namespace name
1588  *
1589  * Pushes a new parser namespace on top of the ns stack
1590  *
1591  * Returns -1 in case of error, -2 if the namespace should be discarded
1592  *	   and the index in the stack otherwise.
1593  */
1594 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1595 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1596 {
1597     if (ctxt->options & XML_PARSE_NSCLEAN) {
1598         int i;
1599 	for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1600 	    if (ctxt->nsTab[i] == prefix) {
1601 		/* in scope */
1602 	        if (ctxt->nsTab[i + 1] == URL)
1603 		    return(-2);
1604 		/* out of scope keep it */
1605 		break;
1606 	    }
1607 	}
1608     }
1609     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1610 	ctxt->nsMax = 10;
1611 	ctxt->nsNr = 0;
1612 	ctxt->nsTab = (const xmlChar **)
1613 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1614 	if (ctxt->nsTab == NULL) {
1615 	    xmlErrMemory(ctxt, NULL);
1616 	    ctxt->nsMax = 0;
1617             return (-1);
1618 	}
1619     } else if (ctxt->nsNr >= ctxt->nsMax) {
1620         const xmlChar ** tmp;
1621         ctxt->nsMax *= 2;
1622         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1623 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1624         if (tmp == NULL) {
1625             xmlErrMemory(ctxt, NULL);
1626 	    ctxt->nsMax /= 2;
1627             return (-1);
1628         }
1629 	ctxt->nsTab = tmp;
1630     }
1631     ctxt->nsTab[ctxt->nsNr++] = prefix;
1632     ctxt->nsTab[ctxt->nsNr++] = URL;
1633     return (ctxt->nsNr);
1634 }
1635 /**
1636  * nsPop:
1637  * @ctxt: an XML parser context
1638  * @nr:  the number to pop
1639  *
1640  * Pops the top @nr parser prefix/namespace from the ns stack
1641  *
1642  * Returns the number of namespaces removed
1643  */
1644 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1645 nsPop(xmlParserCtxtPtr ctxt, int nr)
1646 {
1647     int i;
1648 
1649     if (ctxt->nsTab == NULL) return(0);
1650     if (ctxt->nsNr < nr) {
1651         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1652         nr = ctxt->nsNr;
1653     }
1654     if (ctxt->nsNr <= 0)
1655         return (0);
1656 
1657     for (i = 0;i < nr;i++) {
1658          ctxt->nsNr--;
1659 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1660     }
1661     return(nr);
1662 }
1663 #endif
1664 
1665 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1666 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1667     const xmlChar **atts;
1668     int *attallocs;
1669     int maxatts;
1670 
1671     if (ctxt->atts == NULL) {
1672 	maxatts = 55; /* allow for 10 attrs by default */
1673 	atts = (const xmlChar **)
1674 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1675 	if (atts == NULL) goto mem_error;
1676 	ctxt->atts = atts;
1677 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1678 	if (attallocs == NULL) goto mem_error;
1679 	ctxt->attallocs = attallocs;
1680 	ctxt->maxatts = maxatts;
1681     } else if (nr + 5 > ctxt->maxatts) {
1682 	maxatts = (nr + 5) * 2;
1683 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1684 				     maxatts * sizeof(const xmlChar *));
1685 	if (atts == NULL) goto mem_error;
1686 	ctxt->atts = atts;
1687 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1688 	                             (maxatts / 5) * sizeof(int));
1689 	if (attallocs == NULL) goto mem_error;
1690 	ctxt->attallocs = attallocs;
1691 	ctxt->maxatts = maxatts;
1692     }
1693     return(ctxt->maxatts);
1694 mem_error:
1695     xmlErrMemory(ctxt, NULL);
1696     return(-1);
1697 }
1698 
1699 /**
1700  * inputPush:
1701  * @ctxt:  an XML parser context
1702  * @value:  the parser input
1703  *
1704  * Pushes a new parser input on top of the input stack
1705  *
1706  * Returns -1 in case of error, the index in the stack otherwise
1707  */
1708 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1709 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1710 {
1711     if ((ctxt == NULL) || (value == NULL))
1712         return(-1);
1713     if (ctxt->inputNr >= ctxt->inputMax) {
1714         ctxt->inputMax *= 2;
1715         ctxt->inputTab =
1716             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1717                                              ctxt->inputMax *
1718                                              sizeof(ctxt->inputTab[0]));
1719         if (ctxt->inputTab == NULL) {
1720             xmlErrMemory(ctxt, NULL);
1721 	    xmlFreeInputStream(value);
1722 	    ctxt->inputMax /= 2;
1723 	    value = NULL;
1724             return (-1);
1725         }
1726     }
1727     ctxt->inputTab[ctxt->inputNr] = value;
1728     ctxt->input = value;
1729     return (ctxt->inputNr++);
1730 }
1731 /**
1732  * inputPop:
1733  * @ctxt: an XML parser context
1734  *
1735  * Pops the top parser input from the input stack
1736  *
1737  * Returns the input just removed
1738  */
1739 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1740 inputPop(xmlParserCtxtPtr ctxt)
1741 {
1742     xmlParserInputPtr ret;
1743 
1744     if (ctxt == NULL)
1745         return(NULL);
1746     if (ctxt->inputNr <= 0)
1747         return (NULL);
1748     ctxt->inputNr--;
1749     if (ctxt->inputNr > 0)
1750         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1751     else
1752         ctxt->input = NULL;
1753     ret = ctxt->inputTab[ctxt->inputNr];
1754     ctxt->inputTab[ctxt->inputNr] = NULL;
1755     return (ret);
1756 }
1757 /**
1758  * nodePush:
1759  * @ctxt:  an XML parser context
1760  * @value:  the element node
1761  *
1762  * Pushes a new element node on top of the node stack
1763  *
1764  * Returns -1 in case of error, the index in the stack otherwise
1765  */
1766 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1767 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1768 {
1769     if (ctxt == NULL) return(0);
1770     if (ctxt->nodeNr >= ctxt->nodeMax) {
1771         xmlNodePtr *tmp;
1772 
1773 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1774                                       ctxt->nodeMax * 2 *
1775                                       sizeof(ctxt->nodeTab[0]));
1776         if (tmp == NULL) {
1777             xmlErrMemory(ctxt, NULL);
1778             return (-1);
1779         }
1780         ctxt->nodeTab = tmp;
1781 	ctxt->nodeMax *= 2;
1782     }
1783     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1784         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1785 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1786 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1787 			  xmlParserMaxDepth);
1788 	xmlHaltParser(ctxt);
1789 	return(-1);
1790     }
1791     ctxt->nodeTab[ctxt->nodeNr] = value;
1792     ctxt->node = value;
1793     return (ctxt->nodeNr++);
1794 }
1795 
1796 /**
1797  * nodePop:
1798  * @ctxt: an XML parser context
1799  *
1800  * Pops the top element node from the node stack
1801  *
1802  * Returns the node just removed
1803  */
1804 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1805 nodePop(xmlParserCtxtPtr ctxt)
1806 {
1807     xmlNodePtr ret;
1808 
1809     if (ctxt == NULL) return(NULL);
1810     if (ctxt->nodeNr <= 0)
1811         return (NULL);
1812     ctxt->nodeNr--;
1813     if (ctxt->nodeNr > 0)
1814         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1815     else
1816         ctxt->node = NULL;
1817     ret = ctxt->nodeTab[ctxt->nodeNr];
1818     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1819     return (ret);
1820 }
1821 
1822 #ifdef LIBXML_PUSH_ENABLED
1823 /**
1824  * nameNsPush:
1825  * @ctxt:  an XML parser context
1826  * @value:  the element name
1827  * @prefix:  the element prefix
1828  * @URI:  the element namespace name
1829  *
1830  * Pushes a new element name/prefix/URL on top of the name stack
1831  *
1832  * Returns -1 in case of error, the index in the stack otherwise
1833  */
1834 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1835 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1836            const xmlChar *prefix, const xmlChar *URI, int nsNr)
1837 {
1838     if (ctxt->nameNr >= ctxt->nameMax) {
1839         const xmlChar * *tmp;
1840         void **tmp2;
1841         ctxt->nameMax *= 2;
1842         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843                                     ctxt->nameMax *
1844                                     sizeof(ctxt->nameTab[0]));
1845         if (tmp == NULL) {
1846 	    ctxt->nameMax /= 2;
1847 	    goto mem_error;
1848         }
1849 	ctxt->nameTab = tmp;
1850         tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1851                                     ctxt->nameMax * 3 *
1852                                     sizeof(ctxt->pushTab[0]));
1853         if (tmp2 == NULL) {
1854 	    ctxt->nameMax /= 2;
1855 	    goto mem_error;
1856         }
1857 	ctxt->pushTab = tmp2;
1858     }
1859     ctxt->nameTab[ctxt->nameNr] = value;
1860     ctxt->name = value;
1861     ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1862     ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1863     ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1864     return (ctxt->nameNr++);
1865 mem_error:
1866     xmlErrMemory(ctxt, NULL);
1867     return (-1);
1868 }
1869 /**
1870  * nameNsPop:
1871  * @ctxt: an XML parser context
1872  *
1873  * Pops the top element/prefix/URI name from the name stack
1874  *
1875  * Returns the name just removed
1876  */
1877 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1878 nameNsPop(xmlParserCtxtPtr ctxt)
1879 {
1880     const xmlChar *ret;
1881 
1882     if (ctxt->nameNr <= 0)
1883         return (NULL);
1884     ctxt->nameNr--;
1885     if (ctxt->nameNr > 0)
1886         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1887     else
1888         ctxt->name = NULL;
1889     ret = ctxt->nameTab[ctxt->nameNr];
1890     ctxt->nameTab[ctxt->nameNr] = NULL;
1891     return (ret);
1892 }
1893 #endif /* LIBXML_PUSH_ENABLED */
1894 
1895 /**
1896  * namePush:
1897  * @ctxt:  an XML parser context
1898  * @value:  the element name
1899  *
1900  * Pushes a new element name on top of the name stack
1901  *
1902  * Returns -1 in case of error, the index in the stack otherwise
1903  */
1904 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1905 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1906 {
1907     if (ctxt == NULL) return (-1);
1908 
1909     if (ctxt->nameNr >= ctxt->nameMax) {
1910         const xmlChar * *tmp;
1911         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1912                                     ctxt->nameMax * 2 *
1913                                     sizeof(ctxt->nameTab[0]));
1914         if (tmp == NULL) {
1915 	    goto mem_error;
1916         }
1917 	ctxt->nameTab = tmp;
1918         ctxt->nameMax *= 2;
1919     }
1920     ctxt->nameTab[ctxt->nameNr] = value;
1921     ctxt->name = value;
1922     return (ctxt->nameNr++);
1923 mem_error:
1924     xmlErrMemory(ctxt, NULL);
1925     return (-1);
1926 }
1927 /**
1928  * namePop:
1929  * @ctxt: an XML parser context
1930  *
1931  * Pops the top element name from the name stack
1932  *
1933  * Returns the name just removed
1934  */
1935 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1936 namePop(xmlParserCtxtPtr ctxt)
1937 {
1938     const xmlChar *ret;
1939 
1940     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1941         return (NULL);
1942     ctxt->nameNr--;
1943     if (ctxt->nameNr > 0)
1944         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1945     else
1946         ctxt->name = NULL;
1947     ret = ctxt->nameTab[ctxt->nameNr];
1948     ctxt->nameTab[ctxt->nameNr] = NULL;
1949     return (ret);
1950 }
1951 
spacePush(xmlParserCtxtPtr ctxt,int val)1952 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1953     if (ctxt->spaceNr >= ctxt->spaceMax) {
1954         int *tmp;
1955 
1956 	ctxt->spaceMax *= 2;
1957         tmp = (int *) xmlRealloc(ctxt->spaceTab,
1958 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1959         if (tmp == NULL) {
1960 	    xmlErrMemory(ctxt, NULL);
1961 	    ctxt->spaceMax /=2;
1962 	    return(-1);
1963 	}
1964 	ctxt->spaceTab = tmp;
1965     }
1966     ctxt->spaceTab[ctxt->spaceNr] = val;
1967     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1968     return(ctxt->spaceNr++);
1969 }
1970 
spacePop(xmlParserCtxtPtr ctxt)1971 static int spacePop(xmlParserCtxtPtr ctxt) {
1972     int ret;
1973     if (ctxt->spaceNr <= 0) return(0);
1974     ctxt->spaceNr--;
1975     if (ctxt->spaceNr > 0)
1976 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1977     else
1978         ctxt->space = &ctxt->spaceTab[0];
1979     ret = ctxt->spaceTab[ctxt->spaceNr];
1980     ctxt->spaceTab[ctxt->spaceNr] = -1;
1981     return(ret);
1982 }
1983 
1984 /*
1985  * Macros for accessing the content. Those should be used only by the parser,
1986  * and not exported.
1987  *
1988  * Dirty macros, i.e. one often need to make assumption on the context to
1989  * use them
1990  *
1991  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1992  *           To be used with extreme caution since operations consuming
1993  *           characters may move the input buffer to a different location !
1994  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1995  *           This should be used internally by the parser
1996  *           only to compare to ASCII values otherwise it would break when
1997  *           running with UTF-8 encoding.
1998  *   RAW     same as CUR but in the input buffer, bypass any token
1999  *           extraction that may have been done
2000  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2001  *           to compare on ASCII based substring.
2002  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2003  *           strings without newlines within the parser.
2004  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2005  *           defined char within the parser.
2006  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2007  *
2008  *   NEXT    Skip to the next character, this does the proper decoding
2009  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2010  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2011  *   CUR_CHAR(l) returns the current unicode character (int), set l
2012  *           to the number of xmlChars used for the encoding [0-5].
2013  *   CUR_SCHAR  same but operate on a string instead of the context
2014  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2015  *            the index
2016  *   GROW, SHRINK  handling of input buffers
2017  */
2018 
2019 #define RAW (*ctxt->input->cur)
2020 #define CUR (*ctxt->input->cur)
2021 #define NXT(val) ctxt->input->cur[(val)]
2022 #define CUR_PTR ctxt->input->cur
2023 #define BASE_PTR ctxt->input->base
2024 
2025 #define CMP4( s, c1, c2, c3, c4 ) \
2026   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2027     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2028 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2029   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2030 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2031   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2032 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2033   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2034 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2035   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2036 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2037   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2038     ((unsigned char *) s)[ 8 ] == c9 )
2039 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2040   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2041     ((unsigned char *) s)[ 9 ] == c10 )
2042 
2043 #define SKIP(val) do {							\
2044     ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2045     if (*ctxt->input->cur == 0)						\
2046         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2047   } while (0)
2048 
2049 #define SKIPL(val) do {							\
2050     int skipl;								\
2051     for(skipl=0; skipl<val; skipl++) {					\
2052 	if (*(ctxt->input->cur) == '\n') {				\
2053 	ctxt->input->line++; ctxt->input->col = 1;			\
2054 	} else ctxt->input->col++;					\
2055 	ctxt->nbChars++;						\
2056 	ctxt->input->cur++;						\
2057     }									\
2058     if (*ctxt->input->cur == 0)						\
2059         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2060   } while (0)
2061 
2062 #define SHRINK if ((ctxt->progressive == 0) &&				\
2063 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2064 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2065 	xmlSHRINK (ctxt);
2066 
xmlSHRINK(xmlParserCtxtPtr ctxt)2067 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2068     xmlParserInputShrink(ctxt->input);
2069     if (*ctxt->input->cur == 0)
2070         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2071 }
2072 
2073 #define GROW if ((ctxt->progressive == 0) &&				\
2074 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2075 	xmlGROW (ctxt);
2076 
xmlGROW(xmlParserCtxtPtr ctxt)2077 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2078     unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079     unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080 
2081     if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082          (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2083          ((ctxt->input->buf) &&
2084           (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2085         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2086         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2087         xmlHaltParser(ctxt);
2088 	return;
2089     }
2090     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2091     if ((ctxt->input->cur > ctxt->input->end) ||
2092         (ctxt->input->cur < ctxt->input->base)) {
2093         xmlHaltParser(ctxt);
2094         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2095 	return;
2096     }
2097     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2098         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2099 }
2100 
2101 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102 
2103 #define NEXT xmlNextChar(ctxt)
2104 
2105 #define NEXT1 {								\
2106 	ctxt->input->col++;						\
2107 	ctxt->input->cur++;						\
2108 	ctxt->nbChars++;						\
2109 	if (*ctxt->input->cur == 0)					\
2110 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2111     }
2112 
2113 #define NEXTL(l) do {							\
2114     if (*(ctxt->input->cur) == '\n') {					\
2115 	ctxt->input->line++; ctxt->input->col = 1;			\
2116     } else ctxt->input->col++;						\
2117     ctxt->input->cur += l;				\
2118   } while (0)
2119 
2120 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2121 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2122 
2123 #define COPY_BUF(l,b,i,v)						\
2124     if (l == 1) b[i++] = (xmlChar) v;					\
2125     else i += xmlCopyCharMultiByte(&b[i],v)
2126 
2127 /**
2128  * xmlSkipBlankChars:
2129  * @ctxt:  the XML parser context
2130  *
2131  * skip all blanks character found at that point in the input streams.
2132  * It pops up finished entities in the process if allowable at that point.
2133  *
2134  * Returns the number of space chars skipped
2135  */
2136 
2137 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2138 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2139     int res = 0;
2140 
2141     /*
2142      * It's Okay to use CUR/NEXT here since all the blanks are on
2143      * the ASCII range.
2144      */
2145     if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2146 	const xmlChar *cur;
2147 	/*
2148 	 * if we are in the document content, go really fast
2149 	 */
2150 	cur = ctxt->input->cur;
2151 	while (IS_BLANK_CH(*cur)) {
2152 	    if (*cur == '\n') {
2153 		ctxt->input->line++; ctxt->input->col = 1;
2154 	    } else {
2155 		ctxt->input->col++;
2156 	    }
2157 	    cur++;
2158 	    res++;
2159 	    if (*cur == 0) {
2160 		ctxt->input->cur = cur;
2161 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2162 		cur = ctxt->input->cur;
2163 	    }
2164 	}
2165 	ctxt->input->cur = cur;
2166     } else {
2167         int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2168 
2169 	while (1) {
2170             if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2171 		NEXT;
2172 	    } else if (CUR == '%') {
2173                 /*
2174                  * Need to handle support of entities branching here
2175                  */
2176 	        if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2177                     break;
2178 	        xmlParsePEReference(ctxt);
2179             } else if (CUR == 0) {
2180                 if (ctxt->inputNr <= 1)
2181                     break;
2182                 xmlPopInput(ctxt);
2183             } else {
2184                 break;
2185             }
2186 
2187             /*
2188              * Also increase the counter when entering or exiting a PERef.
2189              * The spec says: "When a parameter-entity reference is recognized
2190              * in the DTD and included, its replacement text MUST be enlarged
2191              * by the attachment of one leading and one following space (#x20)
2192              * character."
2193              */
2194 	    res++;
2195         }
2196     }
2197     return(res);
2198 }
2199 
2200 /************************************************************************
2201  *									*
2202  *		Commodity functions to handle entities			*
2203  *									*
2204  ************************************************************************/
2205 
2206 /**
2207  * xmlPopInput:
2208  * @ctxt:  an XML parser context
2209  *
2210  * xmlPopInput: the current input pointed by ctxt->input came to an end
2211  *          pop it and return the next char.
2212  *
2213  * Returns the current xmlChar in the parser context
2214  */
2215 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2216 xmlPopInput(xmlParserCtxtPtr ctxt) {
2217     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2218     if (xmlParserDebugEntities)
2219 	xmlGenericError(xmlGenericErrorContext,
2220 		"Popping input %d\n", ctxt->inputNr);
2221     if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2222         (ctxt->instate != XML_PARSER_EOF))
2223         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2224                     "Unfinished entity outside the DTD");
2225     xmlFreeInputStream(inputPop(ctxt));
2226     if (*ctxt->input->cur == 0)
2227         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2228     return(CUR);
2229 }
2230 
2231 /**
2232  * xmlPushInput:
2233  * @ctxt:  an XML parser context
2234  * @input:  an XML parser input fragment (entity, XML fragment ...).
2235  *
2236  * xmlPushInput: switch to a new input stream which is stacked on top
2237  *               of the previous one(s).
2238  * Returns -1 in case of error or the index in the input stack
2239  */
2240 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2241 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2242     int ret;
2243     if (input == NULL) return(-1);
2244 
2245     if (xmlParserDebugEntities) {
2246 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2247 	    xmlGenericError(xmlGenericErrorContext,
2248 		    "%s(%d): ", ctxt->input->filename,
2249 		    ctxt->input->line);
2250 	xmlGenericError(xmlGenericErrorContext,
2251 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2252     }
2253     if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2254         (ctxt->inputNr > 1024)) {
2255         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2256         while (ctxt->inputNr > 1)
2257             xmlFreeInputStream(inputPop(ctxt));
2258 	return(-1);
2259     }
2260     ret = inputPush(ctxt, input);
2261     if (ctxt->instate == XML_PARSER_EOF)
2262         return(-1);
2263     GROW;
2264     return(ret);
2265 }
2266 
2267 /**
2268  * xmlParseCharRef:
2269  * @ctxt:  an XML parser context
2270  *
2271  * parse Reference declarations
2272  *
2273  * [66] CharRef ::= '&#' [0-9]+ ';' |
2274  *                  '&#x' [0-9a-fA-F]+ ';'
2275  *
2276  * [ WFC: Legal Character ]
2277  * Characters referred to using character references must match the
2278  * production for Char.
2279  *
2280  * Returns the value parsed (as an int), 0 in case of error
2281  */
2282 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2283 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2284     unsigned int val = 0;
2285     int count = 0;
2286     unsigned int outofrange = 0;
2287 
2288     /*
2289      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2290      */
2291     if ((RAW == '&') && (NXT(1) == '#') &&
2292         (NXT(2) == 'x')) {
2293 	SKIP(3);
2294 	GROW;
2295 	while (RAW != ';') { /* loop blocked by count */
2296 	    if (count++ > 20) {
2297 		count = 0;
2298 		GROW;
2299                 if (ctxt->instate == XML_PARSER_EOF)
2300                     return(0);
2301 	    }
2302 	    if ((RAW >= '0') && (RAW <= '9'))
2303 	        val = val * 16 + (CUR - '0');
2304 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2305 	        val = val * 16 + (CUR - 'a') + 10;
2306 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2307 	        val = val * 16 + (CUR - 'A') + 10;
2308 	    else {
2309 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2310 		val = 0;
2311 		break;
2312 	    }
2313 	    if (val > 0x10FFFF)
2314 	        outofrange = val;
2315 
2316 	    NEXT;
2317 	    count++;
2318 	}
2319 	if (RAW == ';') {
2320 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2321 	    ctxt->input->col++;
2322 	    ctxt->nbChars ++;
2323 	    ctxt->input->cur++;
2324 	}
2325     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2326 	SKIP(2);
2327 	GROW;
2328 	while (RAW != ';') { /* loop blocked by count */
2329 	    if (count++ > 20) {
2330 		count = 0;
2331 		GROW;
2332                 if (ctxt->instate == XML_PARSER_EOF)
2333                     return(0);
2334 	    }
2335 	    if ((RAW >= '0') && (RAW <= '9'))
2336 	        val = val * 10 + (CUR - '0');
2337 	    else {
2338 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2339 		val = 0;
2340 		break;
2341 	    }
2342 	    if (val > 0x10FFFF)
2343 	        outofrange = val;
2344 
2345 	    NEXT;
2346 	    count++;
2347 	}
2348 	if (RAW == ';') {
2349 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2350 	    ctxt->input->col++;
2351 	    ctxt->nbChars ++;
2352 	    ctxt->input->cur++;
2353 	}
2354     } else {
2355         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2356     }
2357 
2358     /*
2359      * [ WFC: Legal Character ]
2360      * Characters referred to using character references must match the
2361      * production for Char.
2362      */
2363     if ((IS_CHAR(val) && (outofrange == 0))) {
2364         return(val);
2365     } else {
2366         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2367                           "xmlParseCharRef: invalid xmlChar value %d\n",
2368 	                  val);
2369     }
2370     return(0);
2371 }
2372 
2373 /**
2374  * xmlParseStringCharRef:
2375  * @ctxt:  an XML parser context
2376  * @str:  a pointer to an index in the string
2377  *
2378  * parse Reference declarations, variant parsing from a string rather
2379  * than an an input flow.
2380  *
2381  * [66] CharRef ::= '&#' [0-9]+ ';' |
2382  *                  '&#x' [0-9a-fA-F]+ ';'
2383  *
2384  * [ WFC: Legal Character ]
2385  * Characters referred to using character references must match the
2386  * production for Char.
2387  *
2388  * Returns the value parsed (as an int), 0 in case of error, str will be
2389  *         updated to the current value of the index
2390  */
2391 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2392 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2393     const xmlChar *ptr;
2394     xmlChar cur;
2395     unsigned int val = 0;
2396     unsigned int outofrange = 0;
2397 
2398     if ((str == NULL) || (*str == NULL)) return(0);
2399     ptr = *str;
2400     cur = *ptr;
2401     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2402 	ptr += 3;
2403 	cur = *ptr;
2404 	while (cur != ';') { /* Non input consuming loop */
2405 	    if ((cur >= '0') && (cur <= '9'))
2406 	        val = val * 16 + (cur - '0');
2407 	    else if ((cur >= 'a') && (cur <= 'f'))
2408 	        val = val * 16 + (cur - 'a') + 10;
2409 	    else if ((cur >= 'A') && (cur <= 'F'))
2410 	        val = val * 16 + (cur - 'A') + 10;
2411 	    else {
2412 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2413 		val = 0;
2414 		break;
2415 	    }
2416 	    if (val > 0x10FFFF)
2417 	        outofrange = val;
2418 
2419 	    ptr++;
2420 	    cur = *ptr;
2421 	}
2422 	if (cur == ';')
2423 	    ptr++;
2424     } else if  ((cur == '&') && (ptr[1] == '#')){
2425 	ptr += 2;
2426 	cur = *ptr;
2427 	while (cur != ';') { /* Non input consuming loops */
2428 	    if ((cur >= '0') && (cur <= '9'))
2429 	        val = val * 10 + (cur - '0');
2430 	    else {
2431 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2432 		val = 0;
2433 		break;
2434 	    }
2435 	    if (val > 0x10FFFF)
2436 	        outofrange = val;
2437 
2438 	    ptr++;
2439 	    cur = *ptr;
2440 	}
2441 	if (cur == ';')
2442 	    ptr++;
2443     } else {
2444 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2445 	return(0);
2446     }
2447     *str = ptr;
2448 
2449     /*
2450      * [ WFC: Legal Character ]
2451      * Characters referred to using character references must match the
2452      * production for Char.
2453      */
2454     if ((IS_CHAR(val) && (outofrange == 0))) {
2455         return(val);
2456     } else {
2457         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2458 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2459 			  val);
2460     }
2461     return(0);
2462 }
2463 
2464 /**
2465  * xmlParserHandlePEReference:
2466  * @ctxt:  the parser context
2467  *
2468  * [69] PEReference ::= '%' Name ';'
2469  *
2470  * [ WFC: No Recursion ]
2471  * A parsed entity must not contain a recursive
2472  * reference to itself, either directly or indirectly.
2473  *
2474  * [ WFC: Entity Declared ]
2475  * In a document without any DTD, a document with only an internal DTD
2476  * subset which contains no parameter entity references, or a document
2477  * with "standalone='yes'", ...  ... The declaration of a parameter
2478  * entity must precede any reference to it...
2479  *
2480  * [ VC: Entity Declared ]
2481  * In a document with an external subset or external parameter entities
2482  * with "standalone='no'", ...  ... The declaration of a parameter entity
2483  * must precede any reference to it...
2484  *
2485  * [ WFC: In DTD ]
2486  * Parameter-entity references may only appear in the DTD.
2487  * NOTE: misleading but this is handled.
2488  *
2489  * A PEReference may have been detected in the current input stream
2490  * the handling is done accordingly to
2491  *      http://www.w3.org/TR/REC-xml#entproc
2492  * i.e.
2493  *   - Included in literal in entity values
2494  *   - Included as Parameter Entity reference within DTDs
2495  */
2496 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2497 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2498     switch(ctxt->instate) {
2499 	case XML_PARSER_CDATA_SECTION:
2500 	    return;
2501         case XML_PARSER_COMMENT:
2502 	    return;
2503 	case XML_PARSER_START_TAG:
2504 	    return;
2505 	case XML_PARSER_END_TAG:
2506 	    return;
2507         case XML_PARSER_EOF:
2508 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2509 	    return;
2510         case XML_PARSER_PROLOG:
2511 	case XML_PARSER_START:
2512 	case XML_PARSER_MISC:
2513 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2514 	    return;
2515 	case XML_PARSER_ENTITY_DECL:
2516         case XML_PARSER_CONTENT:
2517         case XML_PARSER_ATTRIBUTE_VALUE:
2518         case XML_PARSER_PI:
2519 	case XML_PARSER_SYSTEM_LITERAL:
2520 	case XML_PARSER_PUBLIC_LITERAL:
2521 	    /* we just ignore it there */
2522 	    return;
2523         case XML_PARSER_EPILOG:
2524 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2525 	    return;
2526 	case XML_PARSER_ENTITY_VALUE:
2527 	    /*
2528 	     * NOTE: in the case of entity values, we don't do the
2529 	     *       substitution here since we need the literal
2530 	     *       entity value to be able to save the internal
2531 	     *       subset of the document.
2532 	     *       This will be handled by xmlStringDecodeEntities
2533 	     */
2534 	    return;
2535         case XML_PARSER_DTD:
2536 	    /*
2537 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2538 	     * In the internal DTD subset, parameter-entity references
2539 	     * can occur only where markup declarations can occur, not
2540 	     * within markup declarations.
2541 	     * In that case this is handled in xmlParseMarkupDecl
2542 	     */
2543 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2544 		return;
2545 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2546 		return;
2547             break;
2548         case XML_PARSER_IGNORE:
2549             return;
2550     }
2551 
2552     xmlParsePEReference(ctxt);
2553 }
2554 
2555 /*
2556  * Macro used to grow the current buffer.
2557  * buffer##_size is expected to be a size_t
2558  * mem_error: is expected to handle memory allocation failures
2559  */
2560 #define growBuffer(buffer, n) {						\
2561     xmlChar *tmp;							\
2562     size_t new_size = buffer##_size * 2 + n;                            \
2563     if (new_size < buffer##_size) goto mem_error;                       \
2564     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2565     if (tmp == NULL) goto mem_error;					\
2566     buffer = tmp;							\
2567     buffer##_size = new_size;                                           \
2568 }
2569 
2570 /**
2571  * xmlStringLenDecodeEntities:
2572  * @ctxt:  the parser context
2573  * @str:  the input string
2574  * @len: the string length
2575  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2576  * @end:  an end marker xmlChar, 0 if none
2577  * @end2:  an end marker xmlChar, 0 if none
2578  * @end3:  an end marker xmlChar, 0 if none
2579  *
2580  * Takes a entity string content and process to do the adequate substitutions.
2581  *
2582  * [67] Reference ::= EntityRef | CharRef
2583  *
2584  * [69] PEReference ::= '%' Name ';'
2585  *
2586  * Returns A newly allocated string with the substitution done. The caller
2587  *      must deallocate it !
2588  */
2589 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2590 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2591 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2592     xmlChar *buffer = NULL;
2593     size_t buffer_size = 0;
2594     size_t nbchars = 0;
2595 
2596     xmlChar *current = NULL;
2597     xmlChar *rep = NULL;
2598     const xmlChar *last;
2599     xmlEntityPtr ent;
2600     int c,l;
2601 
2602     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2603 	return(NULL);
2604     last = str + len;
2605 
2606     if (((ctxt->depth > 40) &&
2607          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2608 	(ctxt->depth > 1024)) {
2609 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2610 	return(NULL);
2611     }
2612 
2613     /*
2614      * allocate a translation buffer.
2615      */
2616     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2617     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2618     if (buffer == NULL) goto mem_error;
2619 
2620     /*
2621      * OK loop until we reach one of the ending char or a size limit.
2622      * we are operating on already parsed values.
2623      */
2624     if (str < last)
2625 	c = CUR_SCHAR(str, l);
2626     else
2627         c = 0;
2628     while ((c != 0) && (c != end) && /* non input consuming loop */
2629 	   (c != end2) && (c != end3)) {
2630 
2631 	if (c == 0) break;
2632         if ((c == '&') && (str[1] == '#')) {
2633 	    int val = xmlParseStringCharRef(ctxt, &str);
2634 	    if (val == 0)
2635                 goto int_error;
2636 	    COPY_BUF(0,buffer,nbchars,val);
2637 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2638 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2639 	    }
2640 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2641 	    if (xmlParserDebugEntities)
2642 		xmlGenericError(xmlGenericErrorContext,
2643 			"String decoding Entity Reference: %.30s\n",
2644 			str);
2645 	    ent = xmlParseStringEntityRef(ctxt, &str);
2646 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2647 	    if (ent != NULL)
2648 	        ctxt->nbentities += ent->checked / 2;
2649 	    if ((ent != NULL) &&
2650 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2651 		if (ent->content != NULL) {
2652 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2653 		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2654 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2655 		    }
2656 		} else {
2657 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2658 			    "predefined entity has no content\n");
2659                     goto int_error;
2660 		}
2661 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2662 		ctxt->depth++;
2663 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2664 			                      0, 0, 0);
2665 		ctxt->depth--;
2666 		if (rep == NULL)
2667                     goto int_error;
2668 
2669                 current = rep;
2670                 while (*current != 0) { /* non input consuming loop */
2671                     buffer[nbchars++] = *current++;
2672                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2673                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2674                             goto int_error;
2675                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2676                     }
2677                 }
2678                 xmlFree(rep);
2679                 rep = NULL;
2680 	    } else if (ent != NULL) {
2681 		int i = xmlStrlen(ent->name);
2682 		const xmlChar *cur = ent->name;
2683 
2684 		buffer[nbchars++] = '&';
2685 		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2686 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2687 		}
2688 		for (;i > 0;i--)
2689 		    buffer[nbchars++] = *cur++;
2690 		buffer[nbchars++] = ';';
2691 	    }
2692 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2693 	    if (xmlParserDebugEntities)
2694 		xmlGenericError(xmlGenericErrorContext,
2695 			"String decoding PE Reference: %.30s\n", str);
2696 	    ent = xmlParseStringPEReference(ctxt, &str);
2697 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2698 	    if (ent != NULL)
2699 	        ctxt->nbentities += ent->checked / 2;
2700 	    if (ent != NULL) {
2701                 if (ent->content == NULL) {
2702 		    /*
2703 		     * Note: external parsed entities will not be loaded,
2704 		     * it is not required for a non-validating parser to
2705 		     * complete external PEreferences coming from the
2706 		     * internal subset
2707 		     */
2708 		    if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2709 			((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2710 			(ctxt->validate != 0)) {
2711 			xmlLoadEntityContent(ctxt, ent);
2712 		    } else {
2713 			xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2714 		  "not validating will not read content for PE entity %s\n",
2715 		                      ent->name, NULL);
2716 		    }
2717 		}
2718 		ctxt->depth++;
2719 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2720 			                      0, 0, 0);
2721 		ctxt->depth--;
2722 		if (rep == NULL)
2723                     goto int_error;
2724                 current = rep;
2725                 while (*current != 0) { /* non input consuming loop */
2726                     buffer[nbchars++] = *current++;
2727                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2728                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2729                             goto int_error;
2730                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2731                     }
2732                 }
2733                 xmlFree(rep);
2734                 rep = NULL;
2735 	    }
2736 	} else {
2737 	    COPY_BUF(l,buffer,nbchars,c);
2738 	    str += l;
2739 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2740 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2741 	    }
2742 	}
2743 	if (str < last)
2744 	    c = CUR_SCHAR(str, l);
2745 	else
2746 	    c = 0;
2747     }
2748     buffer[nbchars] = 0;
2749     return(buffer);
2750 
2751 mem_error:
2752     xmlErrMemory(ctxt, NULL);
2753 int_error:
2754     if (rep != NULL)
2755         xmlFree(rep);
2756     if (buffer != NULL)
2757         xmlFree(buffer);
2758     return(NULL);
2759 }
2760 
2761 /**
2762  * xmlStringDecodeEntities:
2763  * @ctxt:  the parser context
2764  * @str:  the input string
2765  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2766  * @end:  an end marker xmlChar, 0 if none
2767  * @end2:  an end marker xmlChar, 0 if none
2768  * @end3:  an end marker xmlChar, 0 if none
2769  *
2770  * Takes a entity string content and process to do the adequate substitutions.
2771  *
2772  * [67] Reference ::= EntityRef | CharRef
2773  *
2774  * [69] PEReference ::= '%' Name ';'
2775  *
2776  * Returns A newly allocated string with the substitution done. The caller
2777  *      must deallocate it !
2778  */
2779 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2780 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2781 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2782     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2783     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2784            end, end2, end3));
2785 }
2786 
2787 /************************************************************************
2788  *									*
2789  *		Commodity functions, cleanup needed ?			*
2790  *									*
2791  ************************************************************************/
2792 
2793 /**
2794  * areBlanks:
2795  * @ctxt:  an XML parser context
2796  * @str:  a xmlChar *
2797  * @len:  the size of @str
2798  * @blank_chars: we know the chars are blanks
2799  *
2800  * Is this a sequence of blank chars that one can ignore ?
2801  *
2802  * Returns 1 if ignorable 0 otherwise.
2803  */
2804 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2805 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2806                      int blank_chars) {
2807     int i, ret;
2808     xmlNodePtr lastChild;
2809 
2810     /*
2811      * Don't spend time trying to differentiate them, the same callback is
2812      * used !
2813      */
2814     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2815 	return(0);
2816 
2817     /*
2818      * Check for xml:space value.
2819      */
2820     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2821         (*(ctxt->space) == -2))
2822 	return(0);
2823 
2824     /*
2825      * Check that the string is made of blanks
2826      */
2827     if (blank_chars == 0) {
2828 	for (i = 0;i < len;i++)
2829 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2830     }
2831 
2832     /*
2833      * Look if the element is mixed content in the DTD if available
2834      */
2835     if (ctxt->node == NULL) return(0);
2836     if (ctxt->myDoc != NULL) {
2837 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2838         if (ret == 0) return(1);
2839         if (ret == 1) return(0);
2840     }
2841 
2842     /*
2843      * Otherwise, heuristic :-\
2844      */
2845     if ((RAW != '<') && (RAW != 0xD)) return(0);
2846     if ((ctxt->node->children == NULL) &&
2847 	(RAW == '<') && (NXT(1) == '/')) return(0);
2848 
2849     lastChild = xmlGetLastChild(ctxt->node);
2850     if (lastChild == NULL) {
2851         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2852             (ctxt->node->content != NULL)) return(0);
2853     } else if (xmlNodeIsText(lastChild))
2854         return(0);
2855     else if ((ctxt->node->children != NULL) &&
2856              (xmlNodeIsText(ctxt->node->children)))
2857         return(0);
2858     return(1);
2859 }
2860 
2861 /************************************************************************
2862  *									*
2863  *		Extra stuff for namespace support			*
2864  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2865  *									*
2866  ************************************************************************/
2867 
2868 /**
2869  * xmlSplitQName:
2870  * @ctxt:  an XML parser context
2871  * @name:  an XML parser context
2872  * @prefix:  a xmlChar **
2873  *
2874  * parse an UTF8 encoded XML qualified name string
2875  *
2876  * [NS 5] QName ::= (Prefix ':')? LocalPart
2877  *
2878  * [NS 6] Prefix ::= NCName
2879  *
2880  * [NS 7] LocalPart ::= NCName
2881  *
2882  * Returns the local part, and prefix is updated
2883  *   to get the Prefix if any.
2884  */
2885 
2886 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2887 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2888     xmlChar buf[XML_MAX_NAMELEN + 5];
2889     xmlChar *buffer = NULL;
2890     int len = 0;
2891     int max = XML_MAX_NAMELEN;
2892     xmlChar *ret = NULL;
2893     const xmlChar *cur = name;
2894     int c;
2895 
2896     if (prefix == NULL) return(NULL);
2897     *prefix = NULL;
2898 
2899     if (cur == NULL) return(NULL);
2900 
2901 #ifndef XML_XML_NAMESPACE
2902     /* xml: prefix is not really a namespace */
2903     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2904         (cur[2] == 'l') && (cur[3] == ':'))
2905 	return(xmlStrdup(name));
2906 #endif
2907 
2908     /* nasty but well=formed */
2909     if (cur[0] == ':')
2910 	return(xmlStrdup(name));
2911 
2912     c = *cur++;
2913     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2914 	buf[len++] = c;
2915 	c = *cur++;
2916     }
2917     if (len >= max) {
2918 	/*
2919 	 * Okay someone managed to make a huge name, so he's ready to pay
2920 	 * for the processing speed.
2921 	 */
2922 	max = len * 2;
2923 
2924 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2925 	if (buffer == NULL) {
2926 	    xmlErrMemory(ctxt, NULL);
2927 	    return(NULL);
2928 	}
2929 	memcpy(buffer, buf, len);
2930 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2931 	    if (len + 10 > max) {
2932 	        xmlChar *tmp;
2933 
2934 		max *= 2;
2935 		tmp = (xmlChar *) xmlRealloc(buffer,
2936 						max * sizeof(xmlChar));
2937 		if (tmp == NULL) {
2938 		    xmlFree(buffer);
2939 		    xmlErrMemory(ctxt, NULL);
2940 		    return(NULL);
2941 		}
2942 		buffer = tmp;
2943 	    }
2944 	    buffer[len++] = c;
2945 	    c = *cur++;
2946 	}
2947 	buffer[len] = 0;
2948     }
2949 
2950     if ((c == ':') && (*cur == 0)) {
2951         if (buffer != NULL)
2952 	    xmlFree(buffer);
2953 	*prefix = NULL;
2954 	return(xmlStrdup(name));
2955     }
2956 
2957     if (buffer == NULL)
2958 	ret = xmlStrndup(buf, len);
2959     else {
2960 	ret = buffer;
2961 	buffer = NULL;
2962 	max = XML_MAX_NAMELEN;
2963     }
2964 
2965 
2966     if (c == ':') {
2967 	c = *cur;
2968         *prefix = ret;
2969 	if (c == 0) {
2970 	    return(xmlStrndup(BAD_CAST "", 0));
2971 	}
2972 	len = 0;
2973 
2974 	/*
2975 	 * Check that the first character is proper to start
2976 	 * a new name
2977 	 */
2978 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
2979 	      ((c >= 0x41) && (c <= 0x5A)) ||
2980 	      (c == '_') || (c == ':'))) {
2981 	    int l;
2982 	    int first = CUR_SCHAR(cur, l);
2983 
2984 	    if (!IS_LETTER(first) && (first != '_')) {
2985 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2986 			    "Name %s is not XML Namespace compliant\n",
2987 				  name);
2988 	    }
2989 	}
2990 	cur++;
2991 
2992 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2993 	    buf[len++] = c;
2994 	    c = *cur++;
2995 	}
2996 	if (len >= max) {
2997 	    /*
2998 	     * Okay someone managed to make a huge name, so he's ready to pay
2999 	     * for the processing speed.
3000 	     */
3001 	    max = len * 2;
3002 
3003 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3004 	    if (buffer == NULL) {
3005 	        xmlErrMemory(ctxt, NULL);
3006 		return(NULL);
3007 	    }
3008 	    memcpy(buffer, buf, len);
3009 	    while (c != 0) { /* tested bigname2.xml */
3010 		if (len + 10 > max) {
3011 		    xmlChar *tmp;
3012 
3013 		    max *= 2;
3014 		    tmp = (xmlChar *) xmlRealloc(buffer,
3015 						    max * sizeof(xmlChar));
3016 		    if (tmp == NULL) {
3017 			xmlErrMemory(ctxt, NULL);
3018 			xmlFree(buffer);
3019 			return(NULL);
3020 		    }
3021 		    buffer = tmp;
3022 		}
3023 		buffer[len++] = c;
3024 		c = *cur++;
3025 	    }
3026 	    buffer[len] = 0;
3027 	}
3028 
3029 	if (buffer == NULL)
3030 	    ret = xmlStrndup(buf, len);
3031 	else {
3032 	    ret = buffer;
3033 	}
3034     }
3035 
3036     return(ret);
3037 }
3038 
3039 /************************************************************************
3040  *									*
3041  *			The parser itself				*
3042  *	Relates to http://www.w3.org/TR/REC-xml				*
3043  *									*
3044  ************************************************************************/
3045 
3046 /************************************************************************
3047  *									*
3048  *	Routines to parse Name, NCName and NmToken			*
3049  *									*
3050  ************************************************************************/
3051 #ifdef DEBUG
3052 static unsigned long nbParseName = 0;
3053 static unsigned long nbParseNmToken = 0;
3054 static unsigned long nbParseNCName = 0;
3055 static unsigned long nbParseNCNameComplex = 0;
3056 static unsigned long nbParseNameComplex = 0;
3057 static unsigned long nbParseStringName = 0;
3058 #endif
3059 
3060 /*
3061  * The two following functions are related to the change of accepted
3062  * characters for Name and NmToken in the Revision 5 of XML-1.0
3063  * They correspond to the modified production [4] and the new production [4a]
3064  * changes in that revision. Also note that the macros used for the
3065  * productions Letter, Digit, CombiningChar and Extender are not needed
3066  * anymore.
3067  * We still keep compatibility to pre-revision5 parsing semantic if the
3068  * new XML_PARSE_OLD10 option is given to the parser.
3069  */
3070 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3071 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3072     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3073         /*
3074 	 * Use the new checks of production [4] [4a] amd [5] of the
3075 	 * Update 5 of XML-1.0
3076 	 */
3077 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3078 	    (((c >= 'a') && (c <= 'z')) ||
3079 	     ((c >= 'A') && (c <= 'Z')) ||
3080 	     (c == '_') || (c == ':') ||
3081 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3082 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3083 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3084 	     ((c >= 0x370) && (c <= 0x37D)) ||
3085 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3086 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3087 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3088 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3089 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3090 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3091 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3092 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3093 	    return(1);
3094     } else {
3095         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3096 	    return(1);
3097     }
3098     return(0);
3099 }
3100 
3101 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3102 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3103     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3104         /*
3105 	 * Use the new checks of production [4] [4a] amd [5] of the
3106 	 * Update 5 of XML-1.0
3107 	 */
3108 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3109 	    (((c >= 'a') && (c <= 'z')) ||
3110 	     ((c >= 'A') && (c <= 'Z')) ||
3111 	     ((c >= '0') && (c <= '9')) || /* !start */
3112 	     (c == '_') || (c == ':') ||
3113 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3114 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3115 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3116 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3117 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3118 	     ((c >= 0x370) && (c <= 0x37D)) ||
3119 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3120 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3121 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3122 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3123 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3124 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3125 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3126 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3127 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3128 	     return(1);
3129     } else {
3130         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3131             (c == '.') || (c == '-') ||
3132 	    (c == '_') || (c == ':') ||
3133 	    (IS_COMBINING(c)) ||
3134 	    (IS_EXTENDER(c)))
3135 	    return(1);
3136     }
3137     return(0);
3138 }
3139 
3140 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3141                                           int *len, int *alloc, int normalize);
3142 
3143 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3144 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3145     int len = 0, l;
3146     int c;
3147     int count = 0;
3148 
3149 #ifdef DEBUG
3150     nbParseNameComplex++;
3151 #endif
3152 
3153     /*
3154      * Handler for more complex cases
3155      */
3156     GROW;
3157     if (ctxt->instate == XML_PARSER_EOF)
3158         return(NULL);
3159     c = CUR_CHAR(l);
3160     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3161         /*
3162 	 * Use the new checks of production [4] [4a] amd [5] of the
3163 	 * Update 5 of XML-1.0
3164 	 */
3165 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3166 	    (!(((c >= 'a') && (c <= 'z')) ||
3167 	       ((c >= 'A') && (c <= 'Z')) ||
3168 	       (c == '_') || (c == ':') ||
3169 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3170 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3171 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3172 	       ((c >= 0x370) && (c <= 0x37D)) ||
3173 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3174 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3175 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3176 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3177 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3178 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3179 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3180 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3181 	    return(NULL);
3182 	}
3183 	len += l;
3184 	NEXTL(l);
3185 	c = CUR_CHAR(l);
3186 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3187 	       (((c >= 'a') && (c <= 'z')) ||
3188 	        ((c >= 'A') && (c <= 'Z')) ||
3189 	        ((c >= '0') && (c <= '9')) || /* !start */
3190 	        (c == '_') || (c == ':') ||
3191 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3192 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3193 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3194 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3195 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3196 	        ((c >= 0x370) && (c <= 0x37D)) ||
3197 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3198 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3199 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3200 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3201 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3202 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3203 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3204 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3205 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3206 		)) {
3207 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3208 		count = 0;
3209 		GROW;
3210                 if (ctxt->instate == XML_PARSER_EOF)
3211                     return(NULL);
3212 	    }
3213 	    len += l;
3214 	    NEXTL(l);
3215 	    c = CUR_CHAR(l);
3216 	}
3217     } else {
3218 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3219 	    (!IS_LETTER(c) && (c != '_') &&
3220 	     (c != ':'))) {
3221 	    return(NULL);
3222 	}
3223 	len += l;
3224 	NEXTL(l);
3225 	c = CUR_CHAR(l);
3226 
3227 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3228 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3229 		(c == '.') || (c == '-') ||
3230 		(c == '_') || (c == ':') ||
3231 		(IS_COMBINING(c)) ||
3232 		(IS_EXTENDER(c)))) {
3233 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3234 		count = 0;
3235 		GROW;
3236                 if (ctxt->instate == XML_PARSER_EOF)
3237                     return(NULL);
3238 	    }
3239 	    len += l;
3240 	    NEXTL(l);
3241 	    c = CUR_CHAR(l);
3242 	}
3243     }
3244     if ((len > XML_MAX_NAME_LENGTH) &&
3245         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3246         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3247         return(NULL);
3248     }
3249     if (ctxt->input->cur - ctxt->input->base < len) {
3250         /*
3251          * There were a couple of bugs where PERefs lead to to a change
3252          * of the buffer. Check the buffer size to avoid passing an invalid
3253          * pointer to xmlDictLookup.
3254          */
3255         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3256                     "unexpected change of input buffer");
3257         return (NULL);
3258     }
3259     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3260         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3261     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3262 }
3263 
3264 /**
3265  * xmlParseName:
3266  * @ctxt:  an XML parser context
3267  *
3268  * parse an XML name.
3269  *
3270  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3271  *                  CombiningChar | Extender
3272  *
3273  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3274  *
3275  * [6] Names ::= Name (#x20 Name)*
3276  *
3277  * Returns the Name parsed or NULL
3278  */
3279 
3280 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3281 xmlParseName(xmlParserCtxtPtr ctxt) {
3282     const xmlChar *in;
3283     const xmlChar *ret;
3284     int count = 0;
3285 
3286     GROW;
3287 
3288 #ifdef DEBUG
3289     nbParseName++;
3290 #endif
3291 
3292     /*
3293      * Accelerator for simple ASCII names
3294      */
3295     in = ctxt->input->cur;
3296     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3297 	((*in >= 0x41) && (*in <= 0x5A)) ||
3298 	(*in == '_') || (*in == ':')) {
3299 	in++;
3300 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3301 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3302 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3303 	       (*in == '_') || (*in == '-') ||
3304 	       (*in == ':') || (*in == '.'))
3305 	    in++;
3306 	if ((*in > 0) && (*in < 0x80)) {
3307 	    count = in - ctxt->input->cur;
3308             if ((count > XML_MAX_NAME_LENGTH) &&
3309                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3310                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3311                 return(NULL);
3312             }
3313 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3314 	    ctxt->input->cur = in;
3315 	    ctxt->nbChars += count;
3316 	    ctxt->input->col += count;
3317 	    if (ret == NULL)
3318 	        xmlErrMemory(ctxt, NULL);
3319 	    return(ret);
3320 	}
3321     }
3322     /* accelerator for special cases */
3323     return(xmlParseNameComplex(ctxt));
3324 }
3325 
3326 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3327 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3328     int len = 0, l;
3329     int c;
3330     int count = 0;
3331     size_t startPosition = 0;
3332 
3333 #ifdef DEBUG
3334     nbParseNCNameComplex++;
3335 #endif
3336 
3337     /*
3338      * Handler for more complex cases
3339      */
3340     GROW;
3341     startPosition = CUR_PTR - BASE_PTR;
3342     c = CUR_CHAR(l);
3343     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3344 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3345 	return(NULL);
3346     }
3347 
3348     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3349 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3350 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3351             if ((len > XML_MAX_NAME_LENGTH) &&
3352                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3353                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3354                 return(NULL);
3355             }
3356 	    count = 0;
3357 	    GROW;
3358             if (ctxt->instate == XML_PARSER_EOF)
3359                 return(NULL);
3360 	}
3361 	len += l;
3362 	NEXTL(l);
3363 	c = CUR_CHAR(l);
3364 	if (c == 0) {
3365 	    count = 0;
3366 	    /*
3367 	     * when shrinking to extend the buffer we really need to preserve
3368 	     * the part of the name we already parsed. Hence rolling back
3369 	     * by current lenght.
3370 	     */
3371 	    ctxt->input->cur -= l;
3372 	    GROW;
3373             if (ctxt->instate == XML_PARSER_EOF)
3374                 return(NULL);
3375 	    ctxt->input->cur += l;
3376 	    c = CUR_CHAR(l);
3377 	}
3378     }
3379     if ((len > XML_MAX_NAME_LENGTH) &&
3380         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3381         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3382         return(NULL);
3383     }
3384     return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3385 }
3386 
3387 /**
3388  * xmlParseNCName:
3389  * @ctxt:  an XML parser context
3390  * @len:  length of the string parsed
3391  *
3392  * parse an XML name.
3393  *
3394  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3395  *                      CombiningChar | Extender
3396  *
3397  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3398  *
3399  * Returns the Name parsed or NULL
3400  */
3401 
3402 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3403 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3404     const xmlChar *in, *e;
3405     const xmlChar *ret;
3406     int count = 0;
3407 
3408 #ifdef DEBUG
3409     nbParseNCName++;
3410 #endif
3411 
3412     /*
3413      * Accelerator for simple ASCII names
3414      */
3415     in = ctxt->input->cur;
3416     e = ctxt->input->end;
3417     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3418 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3419 	 (*in == '_')) && (in < e)) {
3420 	in++;
3421 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3422 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3423 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3424 	        (*in == '_') || (*in == '-') ||
3425 	        (*in == '.')) && (in < e))
3426 	    in++;
3427 	if (in >= e)
3428 	    goto complex;
3429 	if ((*in > 0) && (*in < 0x80)) {
3430 	    count = in - ctxt->input->cur;
3431             if ((count > XML_MAX_NAME_LENGTH) &&
3432                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3433                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3434                 return(NULL);
3435             }
3436 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3437 	    ctxt->input->cur = in;
3438 	    ctxt->nbChars += count;
3439 	    ctxt->input->col += count;
3440 	    if (ret == NULL) {
3441 	        xmlErrMemory(ctxt, NULL);
3442 	    }
3443 	    return(ret);
3444 	}
3445     }
3446 complex:
3447     return(xmlParseNCNameComplex(ctxt));
3448 }
3449 
3450 /**
3451  * xmlParseNameAndCompare:
3452  * @ctxt:  an XML parser context
3453  *
3454  * parse an XML name and compares for match
3455  * (specialized for endtag parsing)
3456  *
3457  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3458  * and the name for mismatch
3459  */
3460 
3461 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3462 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3463     register const xmlChar *cmp = other;
3464     register const xmlChar *in;
3465     const xmlChar *ret;
3466 
3467     GROW;
3468     if (ctxt->instate == XML_PARSER_EOF)
3469         return(NULL);
3470 
3471     in = ctxt->input->cur;
3472     while (*in != 0 && *in == *cmp) {
3473 	++in;
3474 	++cmp;
3475 	ctxt->input->col++;
3476     }
3477     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3478 	/* success */
3479 	ctxt->input->cur = in;
3480 	return (const xmlChar*) 1;
3481     }
3482     /* failure (or end of input buffer), check with full function */
3483     ret = xmlParseName (ctxt);
3484     /* strings coming from the dictionary direct compare possible */
3485     if (ret == other) {
3486 	return (const xmlChar*) 1;
3487     }
3488     return ret;
3489 }
3490 
3491 /**
3492  * xmlParseStringName:
3493  * @ctxt:  an XML parser context
3494  * @str:  a pointer to the string pointer (IN/OUT)
3495  *
3496  * parse an XML name.
3497  *
3498  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3499  *                  CombiningChar | Extender
3500  *
3501  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3502  *
3503  * [6] Names ::= Name (#x20 Name)*
3504  *
3505  * Returns the Name parsed or NULL. The @str pointer
3506  * is updated to the current location in the string.
3507  */
3508 
3509 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3510 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3511     xmlChar buf[XML_MAX_NAMELEN + 5];
3512     const xmlChar *cur = *str;
3513     int len = 0, l;
3514     int c;
3515 
3516 #ifdef DEBUG
3517     nbParseStringName++;
3518 #endif
3519 
3520     c = CUR_SCHAR(cur, l);
3521     if (!xmlIsNameStartChar(ctxt, c)) {
3522 	return(NULL);
3523     }
3524 
3525     COPY_BUF(l,buf,len,c);
3526     cur += l;
3527     c = CUR_SCHAR(cur, l);
3528     while (xmlIsNameChar(ctxt, c)) {
3529 	COPY_BUF(l,buf,len,c);
3530 	cur += l;
3531 	c = CUR_SCHAR(cur, l);
3532 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3533 	    /*
3534 	     * Okay someone managed to make a huge name, so he's ready to pay
3535 	     * for the processing speed.
3536 	     */
3537 	    xmlChar *buffer;
3538 	    int max = len * 2;
3539 
3540 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3541 	    if (buffer == NULL) {
3542 	        xmlErrMemory(ctxt, NULL);
3543 		return(NULL);
3544 	    }
3545 	    memcpy(buffer, buf, len);
3546 	    while (xmlIsNameChar(ctxt, c)) {
3547 		if (len + 10 > max) {
3548 		    xmlChar *tmp;
3549 
3550                     if ((len > XML_MAX_NAME_LENGTH) &&
3551                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 			xmlFree(buffer);
3554                         return(NULL);
3555                     }
3556 		    max *= 2;
3557 		    tmp = (xmlChar *) xmlRealloc(buffer,
3558 			                            max * sizeof(xmlChar));
3559 		    if (tmp == NULL) {
3560 			xmlErrMemory(ctxt, NULL);
3561 			xmlFree(buffer);
3562 			return(NULL);
3563 		    }
3564 		    buffer = tmp;
3565 		}
3566 		COPY_BUF(l,buffer,len,c);
3567 		cur += l;
3568 		c = CUR_SCHAR(cur, l);
3569 	    }
3570 	    buffer[len] = 0;
3571 	    *str = cur;
3572 	    return(buffer);
3573 	}
3574     }
3575     if ((len > XML_MAX_NAME_LENGTH) &&
3576         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3577         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3578         return(NULL);
3579     }
3580     *str = cur;
3581     return(xmlStrndup(buf, len));
3582 }
3583 
3584 /**
3585  * xmlParseNmtoken:
3586  * @ctxt:  an XML parser context
3587  *
3588  * parse an XML Nmtoken.
3589  *
3590  * [7] Nmtoken ::= (NameChar)+
3591  *
3592  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3593  *
3594  * Returns the Nmtoken parsed or NULL
3595  */
3596 
3597 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3598 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3599     xmlChar buf[XML_MAX_NAMELEN + 5];
3600     int len = 0, l;
3601     int c;
3602     int count = 0;
3603 
3604 #ifdef DEBUG
3605     nbParseNmToken++;
3606 #endif
3607 
3608     GROW;
3609     if (ctxt->instate == XML_PARSER_EOF)
3610         return(NULL);
3611     c = CUR_CHAR(l);
3612 
3613     while (xmlIsNameChar(ctxt, c)) {
3614 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3615 	    count = 0;
3616 	    GROW;
3617 	}
3618 	COPY_BUF(l,buf,len,c);
3619 	NEXTL(l);
3620 	c = CUR_CHAR(l);
3621 	if (c == 0) {
3622 	    count = 0;
3623 	    GROW;
3624 	    if (ctxt->instate == XML_PARSER_EOF)
3625 		return(NULL);
3626             c = CUR_CHAR(l);
3627 	}
3628 	if (len >= XML_MAX_NAMELEN) {
3629 	    /*
3630 	     * Okay someone managed to make a huge token, so he's ready to pay
3631 	     * for the processing speed.
3632 	     */
3633 	    xmlChar *buffer;
3634 	    int max = len * 2;
3635 
3636 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3637 	    if (buffer == NULL) {
3638 	        xmlErrMemory(ctxt, NULL);
3639 		return(NULL);
3640 	    }
3641 	    memcpy(buffer, buf, len);
3642 	    while (xmlIsNameChar(ctxt, c)) {
3643 		if (count++ > XML_PARSER_CHUNK_SIZE) {
3644 		    count = 0;
3645 		    GROW;
3646                     if (ctxt->instate == XML_PARSER_EOF) {
3647                         xmlFree(buffer);
3648                         return(NULL);
3649                     }
3650 		}
3651 		if (len + 10 > max) {
3652 		    xmlChar *tmp;
3653 
3654                     if ((max > XML_MAX_NAME_LENGTH) &&
3655                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3656                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3657                         xmlFree(buffer);
3658                         return(NULL);
3659                     }
3660 		    max *= 2;
3661 		    tmp = (xmlChar *) xmlRealloc(buffer,
3662 			                            max * sizeof(xmlChar));
3663 		    if (tmp == NULL) {
3664 			xmlErrMemory(ctxt, NULL);
3665 			xmlFree(buffer);
3666 			return(NULL);
3667 		    }
3668 		    buffer = tmp;
3669 		}
3670 		COPY_BUF(l,buffer,len,c);
3671 		NEXTL(l);
3672 		c = CUR_CHAR(l);
3673 	    }
3674 	    buffer[len] = 0;
3675 	    return(buffer);
3676 	}
3677     }
3678     if (len == 0)
3679         return(NULL);
3680     if ((len > XML_MAX_NAME_LENGTH) &&
3681         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3682         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3683         return(NULL);
3684     }
3685     return(xmlStrndup(buf, len));
3686 }
3687 
3688 /**
3689  * xmlParseEntityValue:
3690  * @ctxt:  an XML parser context
3691  * @orig:  if non-NULL store a copy of the original entity value
3692  *
3693  * parse a value for ENTITY declarations
3694  *
3695  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3696  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3697  *
3698  * Returns the EntityValue parsed with reference substituted or NULL
3699  */
3700 
3701 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3702 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3703     xmlChar *buf = NULL;
3704     int len = 0;
3705     int size = XML_PARSER_BUFFER_SIZE;
3706     int c, l;
3707     xmlChar stop;
3708     xmlChar *ret = NULL;
3709     const xmlChar *cur = NULL;
3710     xmlParserInputPtr input;
3711 
3712     if (RAW == '"') stop = '"';
3713     else if (RAW == '\'') stop = '\'';
3714     else {
3715 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3716 	return(NULL);
3717     }
3718     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3719     if (buf == NULL) {
3720 	xmlErrMemory(ctxt, NULL);
3721 	return(NULL);
3722     }
3723 
3724     /*
3725      * The content of the entity definition is copied in a buffer.
3726      */
3727 
3728     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3729     input = ctxt->input;
3730     GROW;
3731     if (ctxt->instate == XML_PARSER_EOF)
3732         goto error;
3733     NEXT;
3734     c = CUR_CHAR(l);
3735     /*
3736      * NOTE: 4.4.5 Included in Literal
3737      * When a parameter entity reference appears in a literal entity
3738      * value, ... a single or double quote character in the replacement
3739      * text is always treated as a normal data character and will not
3740      * terminate the literal.
3741      * In practice it means we stop the loop only when back at parsing
3742      * the initial entity and the quote is found
3743      */
3744     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3745 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3746 	if (len + 5 >= size) {
3747 	    xmlChar *tmp;
3748 
3749 	    size *= 2;
3750 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3751 	    if (tmp == NULL) {
3752 		xmlErrMemory(ctxt, NULL);
3753                 goto error;
3754 	    }
3755 	    buf = tmp;
3756 	}
3757 	COPY_BUF(l,buf,len,c);
3758 	NEXTL(l);
3759 
3760 	GROW;
3761 	c = CUR_CHAR(l);
3762 	if (c == 0) {
3763 	    GROW;
3764 	    c = CUR_CHAR(l);
3765 	}
3766     }
3767     buf[len] = 0;
3768     if (ctxt->instate == XML_PARSER_EOF)
3769         goto error;
3770     if (c != stop) {
3771         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3772         goto error;
3773     }
3774     NEXT;
3775 
3776     /*
3777      * Raise problem w.r.t. '&' and '%' being used in non-entities
3778      * reference constructs. Note Charref will be handled in
3779      * xmlStringDecodeEntities()
3780      */
3781     cur = buf;
3782     while (*cur != 0) { /* non input consuming */
3783 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3784 	    xmlChar *name;
3785 	    xmlChar tmp = *cur;
3786             int nameOk = 0;
3787 
3788 	    cur++;
3789 	    name = xmlParseStringName(ctxt, &cur);
3790             if (name != NULL) {
3791                 nameOk = 1;
3792                 xmlFree(name);
3793             }
3794             if ((nameOk == 0) || (*cur != ';')) {
3795 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3796 	    "EntityValue: '%c' forbidden except for entities references\n",
3797 	                          tmp);
3798                 goto error;
3799 	    }
3800 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3801 		(ctxt->inputNr == 1)) {
3802 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3803                 goto error;
3804 	    }
3805 	    if (*cur == 0)
3806 	        break;
3807 	}
3808 	cur++;
3809     }
3810 
3811     /*
3812      * Then PEReference entities are substituted.
3813      *
3814      * NOTE: 4.4.7 Bypassed
3815      * When a general entity reference appears in the EntityValue in
3816      * an entity declaration, it is bypassed and left as is.
3817      * so XML_SUBSTITUTE_REF is not set here.
3818      */
3819     ++ctxt->depth;
3820     ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3821                                   0, 0, 0);
3822     --ctxt->depth;
3823     if (orig != NULL) {
3824         *orig = buf;
3825         buf = NULL;
3826     }
3827 
3828 error:
3829     if (buf != NULL)
3830         xmlFree(buf);
3831     return(ret);
3832 }
3833 
3834 /**
3835  * xmlParseAttValueComplex:
3836  * @ctxt:  an XML parser context
3837  * @len:   the resulting attribute len
3838  * @normalize:  wether to apply the inner normalization
3839  *
3840  * parse a value for an attribute, this is the fallback function
3841  * of xmlParseAttValue() when the attribute parsing requires handling
3842  * of non-ASCII characters, or normalization compaction.
3843  *
3844  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3845  */
3846 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3847 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3848     xmlChar limit = 0;
3849     xmlChar *buf = NULL;
3850     xmlChar *rep = NULL;
3851     size_t len = 0;
3852     size_t buf_size = 0;
3853     int c, l, in_space = 0;
3854     xmlChar *current = NULL;
3855     xmlEntityPtr ent;
3856 
3857     if (NXT(0) == '"') {
3858 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3859 	limit = '"';
3860         NEXT;
3861     } else if (NXT(0) == '\'') {
3862 	limit = '\'';
3863 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3864         NEXT;
3865     } else {
3866 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3867 	return(NULL);
3868     }
3869 
3870     /*
3871      * allocate a translation buffer.
3872      */
3873     buf_size = XML_PARSER_BUFFER_SIZE;
3874     buf = (xmlChar *) xmlMallocAtomic(buf_size);
3875     if (buf == NULL) goto mem_error;
3876 
3877     /*
3878      * OK loop until we reach one of the ending char or a size limit.
3879      */
3880     c = CUR_CHAR(l);
3881     while (((NXT(0) != limit) && /* checked */
3882             (IS_CHAR(c)) && (c != '<')) &&
3883             (ctxt->instate != XML_PARSER_EOF)) {
3884         /*
3885          * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3886          * special option is given
3887          */
3888         if ((len > XML_MAX_TEXT_LENGTH) &&
3889             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3890             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3891                            "AttValue length too long\n");
3892             goto mem_error;
3893         }
3894 	if (c == 0) break;
3895 	if (c == '&') {
3896 	    in_space = 0;
3897 	    if (NXT(1) == '#') {
3898 		int val = xmlParseCharRef(ctxt);
3899 
3900 		if (val == '&') {
3901 		    if (ctxt->replaceEntities) {
3902 			if (len + 10 > buf_size) {
3903 			    growBuffer(buf, 10);
3904 			}
3905 			buf[len++] = '&';
3906 		    } else {
3907 			/*
3908 			 * The reparsing will be done in xmlStringGetNodeList()
3909 			 * called by the attribute() function in SAX.c
3910 			 */
3911 			if (len + 10 > buf_size) {
3912 			    growBuffer(buf, 10);
3913 			}
3914 			buf[len++] = '&';
3915 			buf[len++] = '#';
3916 			buf[len++] = '3';
3917 			buf[len++] = '8';
3918 			buf[len++] = ';';
3919 		    }
3920 		} else if (val != 0) {
3921 		    if (len + 10 > buf_size) {
3922 			growBuffer(buf, 10);
3923 		    }
3924 		    len += xmlCopyChar(0, &buf[len], val);
3925 		}
3926 	    } else {
3927 		ent = xmlParseEntityRef(ctxt);
3928 		ctxt->nbentities++;
3929 		if (ent != NULL)
3930 		    ctxt->nbentities += ent->owner;
3931 		if ((ent != NULL) &&
3932 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3933 		    if (len + 10 > buf_size) {
3934 			growBuffer(buf, 10);
3935 		    }
3936 		    if ((ctxt->replaceEntities == 0) &&
3937 		        (ent->content[0] == '&')) {
3938 			buf[len++] = '&';
3939 			buf[len++] = '#';
3940 			buf[len++] = '3';
3941 			buf[len++] = '8';
3942 			buf[len++] = ';';
3943 		    } else {
3944 			buf[len++] = ent->content[0];
3945 		    }
3946 		} else if ((ent != NULL) &&
3947 		           (ctxt->replaceEntities != 0)) {
3948 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3949 			++ctxt->depth;
3950 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3951 						      XML_SUBSTITUTE_REF,
3952 						      0, 0, 0);
3953 			--ctxt->depth;
3954 			if (rep != NULL) {
3955 			    current = rep;
3956 			    while (*current != 0) { /* non input consuming */
3957                                 if ((*current == 0xD) || (*current == 0xA) ||
3958                                     (*current == 0x9)) {
3959                                     buf[len++] = 0x20;
3960                                     current++;
3961                                 } else
3962                                     buf[len++] = *current++;
3963 				if (len + 10 > buf_size) {
3964 				    growBuffer(buf, 10);
3965 				}
3966 			    }
3967 			    xmlFree(rep);
3968 			    rep = NULL;
3969 			}
3970 		    } else {
3971 			if (len + 10 > buf_size) {
3972 			    growBuffer(buf, 10);
3973 			}
3974 			if (ent->content != NULL)
3975 			    buf[len++] = ent->content[0];
3976 		    }
3977 		} else if (ent != NULL) {
3978 		    int i = xmlStrlen(ent->name);
3979 		    const xmlChar *cur = ent->name;
3980 
3981 		    /*
3982 		     * This may look absurd but is needed to detect
3983 		     * entities problems
3984 		     */
3985 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3986 			(ent->content != NULL) && (ent->checked == 0)) {
3987 			unsigned long oldnbent = ctxt->nbentities;
3988 
3989 			++ctxt->depth;
3990 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3991 						  XML_SUBSTITUTE_REF, 0, 0, 0);
3992 			--ctxt->depth;
3993 
3994 			ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
3995 			if (rep != NULL) {
3996 			    if (xmlStrchr(rep, '<'))
3997 			        ent->checked |= 1;
3998 			    xmlFree(rep);
3999 			    rep = NULL;
4000 			} else {
4001                             ent->content[0] = 0;
4002                         }
4003 		    }
4004 
4005 		    /*
4006 		     * Just output the reference
4007 		     */
4008 		    buf[len++] = '&';
4009 		    while (len + i + 10 > buf_size) {
4010 			growBuffer(buf, i + 10);
4011 		    }
4012 		    for (;i > 0;i--)
4013 			buf[len++] = *cur++;
4014 		    buf[len++] = ';';
4015 		}
4016 	    }
4017 	} else {
4018 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4019 	        if ((len != 0) || (!normalize)) {
4020 		    if ((!normalize) || (!in_space)) {
4021 			COPY_BUF(l,buf,len,0x20);
4022 			while (len + 10 > buf_size) {
4023 			    growBuffer(buf, 10);
4024 			}
4025 		    }
4026 		    in_space = 1;
4027 		}
4028 	    } else {
4029 	        in_space = 0;
4030 		COPY_BUF(l,buf,len,c);
4031 		if (len + 10 > buf_size) {
4032 		    growBuffer(buf, 10);
4033 		}
4034 	    }
4035 	    NEXTL(l);
4036 	}
4037 	GROW;
4038 	c = CUR_CHAR(l);
4039     }
4040     if (ctxt->instate == XML_PARSER_EOF)
4041         goto error;
4042 
4043     if ((in_space) && (normalize)) {
4044         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4045     }
4046     buf[len] = 0;
4047     if (RAW == '<') {
4048 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4049     } else if (RAW != limit) {
4050 	if ((c != 0) && (!IS_CHAR(c))) {
4051 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4052 			   "invalid character in attribute value\n");
4053 	} else {
4054 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4055 			   "AttValue: ' expected\n");
4056         }
4057     } else
4058 	NEXT;
4059 
4060     /*
4061      * There we potentially risk an overflow, don't allow attribute value of
4062      * length more than INT_MAX it is a very reasonnable assumption !
4063      */
4064     if (len >= INT_MAX) {
4065         xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4066                        "AttValue length too long\n");
4067         goto mem_error;
4068     }
4069 
4070     if (attlen != NULL) *attlen = (int) len;
4071     return(buf);
4072 
4073 mem_error:
4074     xmlErrMemory(ctxt, NULL);
4075 error:
4076     if (buf != NULL)
4077         xmlFree(buf);
4078     if (rep != NULL)
4079         xmlFree(rep);
4080     return(NULL);
4081 }
4082 
4083 /**
4084  * xmlParseAttValue:
4085  * @ctxt:  an XML parser context
4086  *
4087  * parse a value for an attribute
4088  * Note: the parser won't do substitution of entities here, this
4089  * will be handled later in xmlStringGetNodeList
4090  *
4091  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4092  *                   "'" ([^<&'] | Reference)* "'"
4093  *
4094  * 3.3.3 Attribute-Value Normalization:
4095  * Before the value of an attribute is passed to the application or
4096  * checked for validity, the XML processor must normalize it as follows:
4097  * - a character reference is processed by appending the referenced
4098  *   character to the attribute value
4099  * - an entity reference is processed by recursively processing the
4100  *   replacement text of the entity
4101  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4102  *   appending #x20 to the normalized value, except that only a single
4103  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4104  *   parsed entity or the literal entity value of an internal parsed entity
4105  * - other characters are processed by appending them to the normalized value
4106  * If the declared value is not CDATA, then the XML processor must further
4107  * process the normalized attribute value by discarding any leading and
4108  * trailing space (#x20) characters, and by replacing sequences of space
4109  * (#x20) characters by a single space (#x20) character.
4110  * All attributes for which no declaration has been read should be treated
4111  * by a non-validating parser as if declared CDATA.
4112  *
4113  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4114  */
4115 
4116 
4117 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4118 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4119     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4120     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4121 }
4122 
4123 /**
4124  * xmlParseSystemLiteral:
4125  * @ctxt:  an XML parser context
4126  *
4127  * parse an XML Literal
4128  *
4129  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4130  *
4131  * Returns the SystemLiteral parsed or NULL
4132  */
4133 
4134 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4135 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4136     xmlChar *buf = NULL;
4137     int len = 0;
4138     int size = XML_PARSER_BUFFER_SIZE;
4139     int cur, l;
4140     xmlChar stop;
4141     int state = ctxt->instate;
4142     int count = 0;
4143 
4144     SHRINK;
4145     if (RAW == '"') {
4146         NEXT;
4147 	stop = '"';
4148     } else if (RAW == '\'') {
4149         NEXT;
4150 	stop = '\'';
4151     } else {
4152 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4153 	return(NULL);
4154     }
4155 
4156     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4157     if (buf == NULL) {
4158         xmlErrMemory(ctxt, NULL);
4159 	return(NULL);
4160     }
4161     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4162     cur = CUR_CHAR(l);
4163     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4164 	if (len + 5 >= size) {
4165 	    xmlChar *tmp;
4166 
4167             if ((size > XML_MAX_NAME_LENGTH) &&
4168                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4169                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4170                 xmlFree(buf);
4171 		ctxt->instate = (xmlParserInputState) state;
4172                 return(NULL);
4173             }
4174 	    size *= 2;
4175 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4176 	    if (tmp == NULL) {
4177 	        xmlFree(buf);
4178 		xmlErrMemory(ctxt, NULL);
4179 		ctxt->instate = (xmlParserInputState) state;
4180 		return(NULL);
4181 	    }
4182 	    buf = tmp;
4183 	}
4184 	count++;
4185 	if (count > 50) {
4186 	    GROW;
4187 	    count = 0;
4188             if (ctxt->instate == XML_PARSER_EOF) {
4189 	        xmlFree(buf);
4190 		return(NULL);
4191             }
4192 	}
4193 	COPY_BUF(l,buf,len,cur);
4194 	NEXTL(l);
4195 	cur = CUR_CHAR(l);
4196 	if (cur == 0) {
4197 	    GROW;
4198 	    SHRINK;
4199 	    cur = CUR_CHAR(l);
4200 	}
4201     }
4202     buf[len] = 0;
4203     ctxt->instate = (xmlParserInputState) state;
4204     if (!IS_CHAR(cur)) {
4205 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4206     } else {
4207 	NEXT;
4208     }
4209     return(buf);
4210 }
4211 
4212 /**
4213  * xmlParsePubidLiteral:
4214  * @ctxt:  an XML parser context
4215  *
4216  * parse an XML public literal
4217  *
4218  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4219  *
4220  * Returns the PubidLiteral parsed or NULL.
4221  */
4222 
4223 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4224 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4225     xmlChar *buf = NULL;
4226     int len = 0;
4227     int size = XML_PARSER_BUFFER_SIZE;
4228     xmlChar cur;
4229     xmlChar stop;
4230     int count = 0;
4231     xmlParserInputState oldstate = ctxt->instate;
4232 
4233     SHRINK;
4234     if (RAW == '"') {
4235         NEXT;
4236 	stop = '"';
4237     } else if (RAW == '\'') {
4238         NEXT;
4239 	stop = '\'';
4240     } else {
4241 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4242 	return(NULL);
4243     }
4244     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4245     if (buf == NULL) {
4246 	xmlErrMemory(ctxt, NULL);
4247 	return(NULL);
4248     }
4249     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4250     cur = CUR;
4251     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4252 	if (len + 1 >= size) {
4253 	    xmlChar *tmp;
4254 
4255             if ((size > XML_MAX_NAME_LENGTH) &&
4256                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4257                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4258                 xmlFree(buf);
4259                 return(NULL);
4260             }
4261 	    size *= 2;
4262 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4263 	    if (tmp == NULL) {
4264 		xmlErrMemory(ctxt, NULL);
4265 		xmlFree(buf);
4266 		return(NULL);
4267 	    }
4268 	    buf = tmp;
4269 	}
4270 	buf[len++] = cur;
4271 	count++;
4272 	if (count > 50) {
4273 	    GROW;
4274 	    count = 0;
4275             if (ctxt->instate == XML_PARSER_EOF) {
4276 		xmlFree(buf);
4277 		return(NULL);
4278             }
4279 	}
4280 	NEXT;
4281 	cur = CUR;
4282 	if (cur == 0) {
4283 	    GROW;
4284 	    SHRINK;
4285 	    cur = CUR;
4286 	}
4287     }
4288     buf[len] = 0;
4289     if (cur != stop) {
4290 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4291     } else {
4292 	NEXT;
4293     }
4294     ctxt->instate = oldstate;
4295     return(buf);
4296 }
4297 
4298 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4299 
4300 /*
4301  * used for the test in the inner loop of the char data testing
4302  */
4303 static const unsigned char test_char_data[256] = {
4304     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4305     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4306     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4307     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4308     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4309     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4310     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4311     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4312     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4313     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4314     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4315     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4316     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4317     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4318     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4319     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4320     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4321     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4322     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4323     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4324     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4325     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4326     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4336 };
4337 
4338 /**
4339  * xmlParseCharData:
4340  * @ctxt:  an XML parser context
4341  * @cdata:  int indicating whether we are within a CDATA section
4342  *
4343  * parse a CharData section.
4344  * if we are within a CDATA section ']]>' marks an end of section.
4345  *
4346  * The right angle bracket (>) may be represented using the string "&gt;",
4347  * and must, for compatibility, be escaped using "&gt;" or a character
4348  * reference when it appears in the string "]]>" in content, when that
4349  * string is not marking the end of a CDATA section.
4350  *
4351  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4352  */
4353 
4354 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4355 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4356     const xmlChar *in;
4357     int nbchar = 0;
4358     int line = ctxt->input->line;
4359     int col = ctxt->input->col;
4360     int ccol;
4361 
4362     SHRINK;
4363     GROW;
4364     /*
4365      * Accelerated common case where input don't need to be
4366      * modified before passing it to the handler.
4367      */
4368     if (!cdata) {
4369 	in = ctxt->input->cur;
4370 	do {
4371 get_more_space:
4372 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4373 	    if (*in == 0xA) {
4374 		do {
4375 		    ctxt->input->line++; ctxt->input->col = 1;
4376 		    in++;
4377 		} while (*in == 0xA);
4378 		goto get_more_space;
4379 	    }
4380 	    if (*in == '<') {
4381 		nbchar = in - ctxt->input->cur;
4382 		if (nbchar > 0) {
4383 		    const xmlChar *tmp = ctxt->input->cur;
4384 		    ctxt->input->cur = in;
4385 
4386 		    if ((ctxt->sax != NULL) &&
4387 		        (ctxt->sax->ignorableWhitespace !=
4388 		         ctxt->sax->characters)) {
4389 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4390 			    if (ctxt->sax->ignorableWhitespace != NULL)
4391 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4392 						       tmp, nbchar);
4393 			} else {
4394 			    if (ctxt->sax->characters != NULL)
4395 				ctxt->sax->characters(ctxt->userData,
4396 						      tmp, nbchar);
4397 			    if (*ctxt->space == -1)
4398 			        *ctxt->space = -2;
4399 			}
4400 		    } else if ((ctxt->sax != NULL) &&
4401 		               (ctxt->sax->characters != NULL)) {
4402 			ctxt->sax->characters(ctxt->userData,
4403 					      tmp, nbchar);
4404 		    }
4405 		}
4406 		return;
4407 	    }
4408 
4409 get_more:
4410             ccol = ctxt->input->col;
4411 	    while (test_char_data[*in]) {
4412 		in++;
4413 		ccol++;
4414 	    }
4415 	    ctxt->input->col = ccol;
4416 	    if (*in == 0xA) {
4417 		do {
4418 		    ctxt->input->line++; ctxt->input->col = 1;
4419 		    in++;
4420 		} while (*in == 0xA);
4421 		goto get_more;
4422 	    }
4423 	    if (*in == ']') {
4424 		if ((in[1] == ']') && (in[2] == '>')) {
4425 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4426 		    ctxt->input->cur = in + 1;
4427 		    return;
4428 		}
4429 		in++;
4430 		ctxt->input->col++;
4431 		goto get_more;
4432 	    }
4433 	    nbchar = in - ctxt->input->cur;
4434 	    if (nbchar > 0) {
4435 		if ((ctxt->sax != NULL) &&
4436 		    (ctxt->sax->ignorableWhitespace !=
4437 		     ctxt->sax->characters) &&
4438 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4439 		    const xmlChar *tmp = ctxt->input->cur;
4440 		    ctxt->input->cur = in;
4441 
4442 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4443 		        if (ctxt->sax->ignorableWhitespace != NULL)
4444 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4445 							   tmp, nbchar);
4446 		    } else {
4447 		        if (ctxt->sax->characters != NULL)
4448 			    ctxt->sax->characters(ctxt->userData,
4449 						  tmp, nbchar);
4450 			if (*ctxt->space == -1)
4451 			    *ctxt->space = -2;
4452 		    }
4453                     line = ctxt->input->line;
4454                     col = ctxt->input->col;
4455 		} else if (ctxt->sax != NULL) {
4456 		    if (ctxt->sax->characters != NULL)
4457 			ctxt->sax->characters(ctxt->userData,
4458 					      ctxt->input->cur, nbchar);
4459                     line = ctxt->input->line;
4460                     col = ctxt->input->col;
4461 		}
4462                 /* something really bad happened in the SAX callback */
4463                 if (ctxt->instate != XML_PARSER_CONTENT)
4464                     return;
4465 	    }
4466 	    ctxt->input->cur = in;
4467 	    if (*in == 0xD) {
4468 		in++;
4469 		if (*in == 0xA) {
4470 		    ctxt->input->cur = in;
4471 		    in++;
4472 		    ctxt->input->line++; ctxt->input->col = 1;
4473 		    continue; /* while */
4474 		}
4475 		in--;
4476 	    }
4477 	    if (*in == '<') {
4478 		return;
4479 	    }
4480 	    if (*in == '&') {
4481 		return;
4482 	    }
4483 	    SHRINK;
4484 	    GROW;
4485             if (ctxt->instate == XML_PARSER_EOF)
4486 		return;
4487 	    in = ctxt->input->cur;
4488 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4489 	nbchar = 0;
4490     }
4491     ctxt->input->line = line;
4492     ctxt->input->col = col;
4493     xmlParseCharDataComplex(ctxt, cdata);
4494 }
4495 
4496 /**
4497  * xmlParseCharDataComplex:
4498  * @ctxt:  an XML parser context
4499  * @cdata:  int indicating whether we are within a CDATA section
4500  *
4501  * parse a CharData section.this is the fallback function
4502  * of xmlParseCharData() when the parsing requires handling
4503  * of non-ASCII characters.
4504  */
4505 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4506 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4507     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4508     int nbchar = 0;
4509     int cur, l;
4510     int count = 0;
4511 
4512     SHRINK;
4513     GROW;
4514     cur = CUR_CHAR(l);
4515     while ((cur != '<') && /* checked */
4516            (cur != '&') &&
4517 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4518 	if ((cur == ']') && (NXT(1) == ']') &&
4519 	    (NXT(2) == '>')) {
4520 	    if (cdata) break;
4521 	    else {
4522 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4523 	    }
4524 	}
4525 	COPY_BUF(l,buf,nbchar,cur);
4526 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4527 	    buf[nbchar] = 0;
4528 
4529 	    /*
4530 	     * OK the segment is to be consumed as chars.
4531 	     */
4532 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4533 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4534 		    if (ctxt->sax->ignorableWhitespace != NULL)
4535 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4536 			                               buf, nbchar);
4537 		} else {
4538 		    if (ctxt->sax->characters != NULL)
4539 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4540 		    if ((ctxt->sax->characters !=
4541 		         ctxt->sax->ignorableWhitespace) &&
4542 			(*ctxt->space == -1))
4543 			*ctxt->space = -2;
4544 		}
4545 	    }
4546 	    nbchar = 0;
4547             /* something really bad happened in the SAX callback */
4548             if (ctxt->instate != XML_PARSER_CONTENT)
4549                 return;
4550 	}
4551 	count++;
4552 	if (count > 50) {
4553 	    GROW;
4554 	    count = 0;
4555             if (ctxt->instate == XML_PARSER_EOF)
4556 		return;
4557 	}
4558 	NEXTL(l);
4559 	cur = CUR_CHAR(l);
4560     }
4561     if (nbchar != 0) {
4562         buf[nbchar] = 0;
4563 	/*
4564 	 * OK the segment is to be consumed as chars.
4565 	 */
4566 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4567 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4568 		if (ctxt->sax->ignorableWhitespace != NULL)
4569 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4570 	    } else {
4571 		if (ctxt->sax->characters != NULL)
4572 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4573 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4574 		    (*ctxt->space == -1))
4575 		    *ctxt->space = -2;
4576 	    }
4577 	}
4578     }
4579     if ((cur != 0) && (!IS_CHAR(cur))) {
4580 	/* Generate the error and skip the offending character */
4581         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4582                           "PCDATA invalid Char value %d\n",
4583 	                  cur);
4584 	NEXTL(l);
4585     }
4586 }
4587 
4588 /**
4589  * xmlParseExternalID:
4590  * @ctxt:  an XML parser context
4591  * @publicID:  a xmlChar** receiving PubidLiteral
4592  * @strict: indicate whether we should restrict parsing to only
4593  *          production [75], see NOTE below
4594  *
4595  * Parse an External ID or a Public ID
4596  *
4597  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4598  *       'PUBLIC' S PubidLiteral S SystemLiteral
4599  *
4600  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4601  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4602  *
4603  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4604  *
4605  * Returns the function returns SystemLiteral and in the second
4606  *                case publicID receives PubidLiteral, is strict is off
4607  *                it is possible to return NULL and have publicID set.
4608  */
4609 
4610 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4611 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4612     xmlChar *URI = NULL;
4613 
4614     SHRINK;
4615 
4616     *publicID = NULL;
4617     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4618         SKIP(6);
4619 	if (SKIP_BLANKS == 0) {
4620 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4621 	                   "Space required after 'SYSTEM'\n");
4622 	}
4623 	URI = xmlParseSystemLiteral(ctxt);
4624 	if (URI == NULL) {
4625 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4626         }
4627     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4628         SKIP(6);
4629 	if (SKIP_BLANKS == 0) {
4630 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4631 		    "Space required after 'PUBLIC'\n");
4632 	}
4633 	*publicID = xmlParsePubidLiteral(ctxt);
4634 	if (*publicID == NULL) {
4635 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4636 	}
4637 	if (strict) {
4638 	    /*
4639 	     * We don't handle [83] so "S SystemLiteral" is required.
4640 	     */
4641 	    if (SKIP_BLANKS == 0) {
4642 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4643 			"Space required after the Public Identifier\n");
4644 	    }
4645 	} else {
4646 	    /*
4647 	     * We handle [83] so we return immediately, if
4648 	     * "S SystemLiteral" is not detected. We skip blanks if no
4649              * system literal was found, but this is harmless since we must
4650              * be at the end of a NotationDecl.
4651 	     */
4652 	    if (SKIP_BLANKS == 0) return(NULL);
4653 	    if ((CUR != '\'') && (CUR != '"')) return(NULL);
4654 	}
4655 	URI = xmlParseSystemLiteral(ctxt);
4656 	if (URI == NULL) {
4657 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4658         }
4659     }
4660     return(URI);
4661 }
4662 
4663 /**
4664  * xmlParseCommentComplex:
4665  * @ctxt:  an XML parser context
4666  * @buf:  the already parsed part of the buffer
4667  * @len:  number of bytes filles in the buffer
4668  * @size:  allocated size of the buffer
4669  *
4670  * Skip an XML (SGML) comment <!-- .... -->
4671  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4672  *  must not occur within comments. "
4673  * This is the slow routine in case the accelerator for ascii didn't work
4674  *
4675  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4676  */
4677 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4678 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4679                        size_t len, size_t size) {
4680     int q, ql;
4681     int r, rl;
4682     int cur, l;
4683     size_t count = 0;
4684     int inputid;
4685 
4686     inputid = ctxt->input->id;
4687 
4688     if (buf == NULL) {
4689         len = 0;
4690 	size = XML_PARSER_BUFFER_SIZE;
4691 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4692 	if (buf == NULL) {
4693 	    xmlErrMemory(ctxt, NULL);
4694 	    return;
4695 	}
4696     }
4697     GROW;	/* Assure there's enough input data */
4698     q = CUR_CHAR(ql);
4699     if (q == 0)
4700         goto not_terminated;
4701     if (!IS_CHAR(q)) {
4702         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4703                           "xmlParseComment: invalid xmlChar value %d\n",
4704 	                  q);
4705 	xmlFree (buf);
4706 	return;
4707     }
4708     NEXTL(ql);
4709     r = CUR_CHAR(rl);
4710     if (r == 0)
4711         goto not_terminated;
4712     if (!IS_CHAR(r)) {
4713         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4714                           "xmlParseComment: invalid xmlChar value %d\n",
4715 	                  q);
4716 	xmlFree (buf);
4717 	return;
4718     }
4719     NEXTL(rl);
4720     cur = CUR_CHAR(l);
4721     if (cur == 0)
4722         goto not_terminated;
4723     while (IS_CHAR(cur) && /* checked */
4724            ((cur != '>') ||
4725 	    (r != '-') || (q != '-'))) {
4726 	if ((r == '-') && (q == '-')) {
4727 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4728 	}
4729         if ((len > XML_MAX_TEXT_LENGTH) &&
4730             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4731             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4732                          "Comment too big found", NULL);
4733             xmlFree (buf);
4734             return;
4735         }
4736 	if (len + 5 >= size) {
4737 	    xmlChar *new_buf;
4738             size_t new_size;
4739 
4740 	    new_size = size * 2;
4741 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4742 	    if (new_buf == NULL) {
4743 		xmlFree (buf);
4744 		xmlErrMemory(ctxt, NULL);
4745 		return;
4746 	    }
4747 	    buf = new_buf;
4748             size = new_size;
4749 	}
4750 	COPY_BUF(ql,buf,len,q);
4751 	q = r;
4752 	ql = rl;
4753 	r = cur;
4754 	rl = l;
4755 
4756 	count++;
4757 	if (count > 50) {
4758 	    GROW;
4759 	    count = 0;
4760             if (ctxt->instate == XML_PARSER_EOF) {
4761 		xmlFree(buf);
4762 		return;
4763             }
4764 	}
4765 	NEXTL(l);
4766 	cur = CUR_CHAR(l);
4767 	if (cur == 0) {
4768 	    SHRINK;
4769 	    GROW;
4770 	    cur = CUR_CHAR(l);
4771 	}
4772     }
4773     buf[len] = 0;
4774     if (cur == 0) {
4775 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4776 	                     "Comment not terminated \n<!--%.50s\n", buf);
4777     } else if (!IS_CHAR(cur)) {
4778         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779                           "xmlParseComment: invalid xmlChar value %d\n",
4780 	                  cur);
4781     } else {
4782 	if (inputid != ctxt->input->id) {
4783 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4784 		           "Comment doesn't start and stop in the same"
4785                            " entity\n");
4786 	}
4787         NEXT;
4788 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4789 	    (!ctxt->disableSAX))
4790 	    ctxt->sax->comment(ctxt->userData, buf);
4791     }
4792     xmlFree(buf);
4793     return;
4794 not_terminated:
4795     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4796 			 "Comment not terminated\n", NULL);
4797     xmlFree(buf);
4798     return;
4799 }
4800 
4801 /**
4802  * xmlParseComment:
4803  * @ctxt:  an XML parser context
4804  *
4805  * Skip an XML (SGML) comment <!-- .... -->
4806  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4807  *  must not occur within comments. "
4808  *
4809  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4810  */
4811 void
xmlParseComment(xmlParserCtxtPtr ctxt)4812 xmlParseComment(xmlParserCtxtPtr ctxt) {
4813     xmlChar *buf = NULL;
4814     size_t size = XML_PARSER_BUFFER_SIZE;
4815     size_t len = 0;
4816     xmlParserInputState state;
4817     const xmlChar *in;
4818     size_t nbchar = 0;
4819     int ccol;
4820     int inputid;
4821 
4822     /*
4823      * Check that there is a comment right here.
4824      */
4825     if ((RAW != '<') || (NXT(1) != '!') ||
4826         (NXT(2) != '-') || (NXT(3) != '-')) return;
4827     state = ctxt->instate;
4828     ctxt->instate = XML_PARSER_COMMENT;
4829     inputid = ctxt->input->id;
4830     SKIP(4);
4831     SHRINK;
4832     GROW;
4833 
4834     /*
4835      * Accelerated common case where input don't need to be
4836      * modified before passing it to the handler.
4837      */
4838     in = ctxt->input->cur;
4839     do {
4840 	if (*in == 0xA) {
4841 	    do {
4842 		ctxt->input->line++; ctxt->input->col = 1;
4843 		in++;
4844 	    } while (*in == 0xA);
4845 	}
4846 get_more:
4847         ccol = ctxt->input->col;
4848 	while (((*in > '-') && (*in <= 0x7F)) ||
4849 	       ((*in >= 0x20) && (*in < '-')) ||
4850 	       (*in == 0x09)) {
4851 		    in++;
4852 		    ccol++;
4853 	}
4854 	ctxt->input->col = ccol;
4855 	if (*in == 0xA) {
4856 	    do {
4857 		ctxt->input->line++; ctxt->input->col = 1;
4858 		in++;
4859 	    } while (*in == 0xA);
4860 	    goto get_more;
4861 	}
4862 	nbchar = in - ctxt->input->cur;
4863 	/*
4864 	 * save current set of data
4865 	 */
4866 	if (nbchar > 0) {
4867 	    if ((ctxt->sax != NULL) &&
4868 		(ctxt->sax->comment != NULL)) {
4869 		if (buf == NULL) {
4870 		    if ((*in == '-') && (in[1] == '-'))
4871 		        size = nbchar + 1;
4872 		    else
4873 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4874 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4875 		    if (buf == NULL) {
4876 		        xmlErrMemory(ctxt, NULL);
4877 			ctxt->instate = state;
4878 			return;
4879 		    }
4880 		    len = 0;
4881 		} else if (len + nbchar + 1 >= size) {
4882 		    xmlChar *new_buf;
4883 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4884 		    new_buf = (xmlChar *) xmlRealloc(buf,
4885 		                                     size * sizeof(xmlChar));
4886 		    if (new_buf == NULL) {
4887 		        xmlFree (buf);
4888 			xmlErrMemory(ctxt, NULL);
4889 			ctxt->instate = state;
4890 			return;
4891 		    }
4892 		    buf = new_buf;
4893 		}
4894 		memcpy(&buf[len], ctxt->input->cur, nbchar);
4895 		len += nbchar;
4896 		buf[len] = 0;
4897 	    }
4898 	}
4899         if ((len > XML_MAX_TEXT_LENGTH) &&
4900             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4901             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902                          "Comment too big found", NULL);
4903             xmlFree (buf);
4904             return;
4905         }
4906 	ctxt->input->cur = in;
4907 	if (*in == 0xA) {
4908 	    in++;
4909 	    ctxt->input->line++; ctxt->input->col = 1;
4910 	}
4911 	if (*in == 0xD) {
4912 	    in++;
4913 	    if (*in == 0xA) {
4914 		ctxt->input->cur = in;
4915 		in++;
4916 		ctxt->input->line++; ctxt->input->col = 1;
4917 		continue; /* while */
4918 	    }
4919 	    in--;
4920 	}
4921 	SHRINK;
4922 	GROW;
4923         if (ctxt->instate == XML_PARSER_EOF) {
4924             xmlFree(buf);
4925             return;
4926         }
4927 	in = ctxt->input->cur;
4928 	if (*in == '-') {
4929 	    if (in[1] == '-') {
4930 	        if (in[2] == '>') {
4931 		    if (ctxt->input->id != inputid) {
4932 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4933 			               "comment doesn't start and stop in the"
4934                                        " same entity\n");
4935 		    }
4936 		    SKIP(3);
4937 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4938 		        (!ctxt->disableSAX)) {
4939 			if (buf != NULL)
4940 			    ctxt->sax->comment(ctxt->userData, buf);
4941 			else
4942 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4943 		    }
4944 		    if (buf != NULL)
4945 		        xmlFree(buf);
4946 		    if (ctxt->instate != XML_PARSER_EOF)
4947 			ctxt->instate = state;
4948 		    return;
4949 		}
4950 		if (buf != NULL) {
4951 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4952 		                      "Double hyphen within comment: "
4953                                       "<!--%.50s\n",
4954 				      buf);
4955 		} else
4956 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957 		                      "Double hyphen within comment\n", NULL);
4958 		in++;
4959 		ctxt->input->col++;
4960 	    }
4961 	    in++;
4962 	    ctxt->input->col++;
4963 	    goto get_more;
4964 	}
4965     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4966     xmlParseCommentComplex(ctxt, buf, len, size);
4967     ctxt->instate = state;
4968     return;
4969 }
4970 
4971 
4972 /**
4973  * xmlParsePITarget:
4974  * @ctxt:  an XML parser context
4975  *
4976  * parse the name of a PI
4977  *
4978  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4979  *
4980  * Returns the PITarget name or NULL
4981  */
4982 
4983 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)4984 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4985     const xmlChar *name;
4986 
4987     name = xmlParseName(ctxt);
4988     if ((name != NULL) &&
4989         ((name[0] == 'x') || (name[0] == 'X')) &&
4990         ((name[1] == 'm') || (name[1] == 'M')) &&
4991         ((name[2] == 'l') || (name[2] == 'L'))) {
4992 	int i;
4993 	if ((name[0] == 'x') && (name[1] == 'm') &&
4994 	    (name[2] == 'l') && (name[3] == 0)) {
4995 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4996 		 "XML declaration allowed only at the start of the document\n");
4997 	    return(name);
4998 	} else if (name[3] == 0) {
4999 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5000 	    return(name);
5001 	}
5002 	for (i = 0;;i++) {
5003 	    if (xmlW3CPIs[i] == NULL) break;
5004 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5005 	        return(name);
5006 	}
5007 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5008 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5009 		      NULL, NULL);
5010     }
5011     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5012 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5013 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5014     }
5015     return(name);
5016 }
5017 
5018 #ifdef LIBXML_CATALOG_ENABLED
5019 /**
5020  * xmlParseCatalogPI:
5021  * @ctxt:  an XML parser context
5022  * @catalog:  the PI value string
5023  *
5024  * parse an XML Catalog Processing Instruction.
5025  *
5026  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5027  *
5028  * Occurs only if allowed by the user and if happening in the Misc
5029  * part of the document before any doctype informations
5030  * This will add the given catalog to the parsing context in order
5031  * to be used if there is a resolution need further down in the document
5032  */
5033 
5034 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5035 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5036     xmlChar *URL = NULL;
5037     const xmlChar *tmp, *base;
5038     xmlChar marker;
5039 
5040     tmp = catalog;
5041     while (IS_BLANK_CH(*tmp)) tmp++;
5042     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5043 	goto error;
5044     tmp += 7;
5045     while (IS_BLANK_CH(*tmp)) tmp++;
5046     if (*tmp != '=') {
5047 	return;
5048     }
5049     tmp++;
5050     while (IS_BLANK_CH(*tmp)) tmp++;
5051     marker = *tmp;
5052     if ((marker != '\'') && (marker != '"'))
5053 	goto error;
5054     tmp++;
5055     base = tmp;
5056     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5057     if (*tmp == 0)
5058 	goto error;
5059     URL = xmlStrndup(base, tmp - base);
5060     tmp++;
5061     while (IS_BLANK_CH(*tmp)) tmp++;
5062     if (*tmp != 0)
5063 	goto error;
5064 
5065     if (URL != NULL) {
5066 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5067 	xmlFree(URL);
5068     }
5069     return;
5070 
5071 error:
5072     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5073 	          "Catalog PI syntax error: %s\n",
5074 		  catalog, NULL);
5075     if (URL != NULL)
5076 	xmlFree(URL);
5077 }
5078 #endif
5079 
5080 /**
5081  * xmlParsePI:
5082  * @ctxt:  an XML parser context
5083  *
5084  * parse an XML Processing Instruction.
5085  *
5086  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5087  *
5088  * The processing is transfered to SAX once parsed.
5089  */
5090 
5091 void
xmlParsePI(xmlParserCtxtPtr ctxt)5092 xmlParsePI(xmlParserCtxtPtr ctxt) {
5093     xmlChar *buf = NULL;
5094     size_t len = 0;
5095     size_t size = XML_PARSER_BUFFER_SIZE;
5096     int cur, l;
5097     const xmlChar *target;
5098     xmlParserInputState state;
5099     int count = 0;
5100 
5101     if ((RAW == '<') && (NXT(1) == '?')) {
5102 	int inputid = ctxt->input->id;
5103 	state = ctxt->instate;
5104         ctxt->instate = XML_PARSER_PI;
5105 	/*
5106 	 * this is a Processing Instruction.
5107 	 */
5108 	SKIP(2);
5109 	SHRINK;
5110 
5111 	/*
5112 	 * Parse the target name and check for special support like
5113 	 * namespace.
5114 	 */
5115         target = xmlParsePITarget(ctxt);
5116 	if (target != NULL) {
5117 	    if ((RAW == '?') && (NXT(1) == '>')) {
5118 		if (inputid != ctxt->input->id) {
5119 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5120 	                           "PI declaration doesn't start and stop in"
5121                                    " the same entity\n");
5122 		}
5123 		SKIP(2);
5124 
5125 		/*
5126 		 * SAX: PI detected.
5127 		 */
5128 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5129 		    (ctxt->sax->processingInstruction != NULL))
5130 		    ctxt->sax->processingInstruction(ctxt->userData,
5131 		                                     target, NULL);
5132 		if (ctxt->instate != XML_PARSER_EOF)
5133 		    ctxt->instate = state;
5134 		return;
5135 	    }
5136 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5137 	    if (buf == NULL) {
5138 		xmlErrMemory(ctxt, NULL);
5139 		ctxt->instate = state;
5140 		return;
5141 	    }
5142 	    if (SKIP_BLANKS == 0) {
5143 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5144 			  "ParsePI: PI %s space expected\n", target);
5145 	    }
5146 	    cur = CUR_CHAR(l);
5147 	    while (IS_CHAR(cur) && /* checked */
5148 		   ((cur != '?') || (NXT(1) != '>'))) {
5149 		if (len + 5 >= size) {
5150 		    xmlChar *tmp;
5151                     size_t new_size = size * 2;
5152 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5153 		    if (tmp == NULL) {
5154 			xmlErrMemory(ctxt, NULL);
5155 			xmlFree(buf);
5156 			ctxt->instate = state;
5157 			return;
5158 		    }
5159 		    buf = tmp;
5160                     size = new_size;
5161 		}
5162 		count++;
5163 		if (count > 50) {
5164 		    GROW;
5165                     if (ctxt->instate == XML_PARSER_EOF) {
5166                         xmlFree(buf);
5167                         return;
5168                     }
5169 		    count = 0;
5170                     if ((len > XML_MAX_TEXT_LENGTH) &&
5171                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5172                         xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5173                                           "PI %s too big found", target);
5174                         xmlFree(buf);
5175                         ctxt->instate = state;
5176                         return;
5177                     }
5178 		}
5179 		COPY_BUF(l,buf,len,cur);
5180 		NEXTL(l);
5181 		cur = CUR_CHAR(l);
5182 		if (cur == 0) {
5183 		    SHRINK;
5184 		    GROW;
5185 		    cur = CUR_CHAR(l);
5186 		}
5187 	    }
5188             if ((len > XML_MAX_TEXT_LENGTH) &&
5189                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5190                 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5191                                   "PI %s too big found", target);
5192                 xmlFree(buf);
5193                 ctxt->instate = state;
5194                 return;
5195             }
5196 	    buf[len] = 0;
5197 	    if (cur != '?') {
5198 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5199 		      "ParsePI: PI %s never end ...\n", target);
5200 	    } else {
5201 		if (inputid != ctxt->input->id) {
5202 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5203 	                           "PI declaration doesn't start and stop in"
5204                                    " the same entity\n");
5205 		}
5206 		SKIP(2);
5207 
5208 #ifdef LIBXML_CATALOG_ENABLED
5209 		if (((state == XML_PARSER_MISC) ||
5210 	             (state == XML_PARSER_START)) &&
5211 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5212 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5213 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5214 			(allow == XML_CATA_ALLOW_ALL))
5215 			xmlParseCatalogPI(ctxt, buf);
5216 		}
5217 #endif
5218 
5219 
5220 		/*
5221 		 * SAX: PI detected.
5222 		 */
5223 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5224 		    (ctxt->sax->processingInstruction != NULL))
5225 		    ctxt->sax->processingInstruction(ctxt->userData,
5226 		                                     target, buf);
5227 	    }
5228 	    xmlFree(buf);
5229 	} else {
5230 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5231 	}
5232 	if (ctxt->instate != XML_PARSER_EOF)
5233 	    ctxt->instate = state;
5234     }
5235 }
5236 
5237 /**
5238  * xmlParseNotationDecl:
5239  * @ctxt:  an XML parser context
5240  *
5241  * parse a notation declaration
5242  *
5243  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5244  *
5245  * Hence there is actually 3 choices:
5246  *     'PUBLIC' S PubidLiteral
5247  *     'PUBLIC' S PubidLiteral S SystemLiteral
5248  * and 'SYSTEM' S SystemLiteral
5249  *
5250  * See the NOTE on xmlParseExternalID().
5251  */
5252 
5253 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5254 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5255     const xmlChar *name;
5256     xmlChar *Pubid;
5257     xmlChar *Systemid;
5258 
5259     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5260 	int inputid = ctxt->input->id;
5261 	SHRINK;
5262 	SKIP(10);
5263 	if (SKIP_BLANKS == 0) {
5264 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5265 			   "Space required after '<!NOTATION'\n");
5266 	    return;
5267 	}
5268 
5269         name = xmlParseName(ctxt);
5270 	if (name == NULL) {
5271 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5272 	    return;
5273 	}
5274 	if (xmlStrchr(name, ':') != NULL) {
5275 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5276 		     "colons are forbidden from notation names '%s'\n",
5277 		     name, NULL, NULL);
5278 	}
5279 	if (SKIP_BLANKS == 0) {
5280 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5281 		     "Space required after the NOTATION name'\n");
5282 	    return;
5283 	}
5284 
5285 	/*
5286 	 * Parse the IDs.
5287 	 */
5288 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5289 	SKIP_BLANKS;
5290 
5291 	if (RAW == '>') {
5292 	    if (inputid != ctxt->input->id) {
5293 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5294 	                       "Notation declaration doesn't start and stop"
5295                                " in the same entity\n");
5296 	    }
5297 	    NEXT;
5298 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299 		(ctxt->sax->notationDecl != NULL))
5300 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5301 	} else {
5302 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5303 	}
5304 	if (Systemid != NULL) xmlFree(Systemid);
5305 	if (Pubid != NULL) xmlFree(Pubid);
5306     }
5307 }
5308 
5309 /**
5310  * xmlParseEntityDecl:
5311  * @ctxt:  an XML parser context
5312  *
5313  * parse <!ENTITY declarations
5314  *
5315  * [70] EntityDecl ::= GEDecl | PEDecl
5316  *
5317  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5318  *
5319  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5320  *
5321  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5322  *
5323  * [74] PEDef ::= EntityValue | ExternalID
5324  *
5325  * [76] NDataDecl ::= S 'NDATA' S Name
5326  *
5327  * [ VC: Notation Declared ]
5328  * The Name must match the declared name of a notation.
5329  */
5330 
5331 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5332 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5333     const xmlChar *name = NULL;
5334     xmlChar *value = NULL;
5335     xmlChar *URI = NULL, *literal = NULL;
5336     const xmlChar *ndata = NULL;
5337     int isParameter = 0;
5338     xmlChar *orig = NULL;
5339 
5340     /* GROW; done in the caller */
5341     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5342 	int inputid = ctxt->input->id;
5343 	SHRINK;
5344 	SKIP(8);
5345 	if (SKIP_BLANKS == 0) {
5346 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5347 			   "Space required after '<!ENTITY'\n");
5348 	}
5349 
5350 	if (RAW == '%') {
5351 	    NEXT;
5352 	    if (SKIP_BLANKS == 0) {
5353 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5354 			       "Space required after '%%'\n");
5355 	    }
5356 	    isParameter = 1;
5357 	}
5358 
5359         name = xmlParseName(ctxt);
5360 	if (name == NULL) {
5361 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5362 	                   "xmlParseEntityDecl: no name\n");
5363             return;
5364 	}
5365 	if (xmlStrchr(name, ':') != NULL) {
5366 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5367 		     "colons are forbidden from entities names '%s'\n",
5368 		     name, NULL, NULL);
5369 	}
5370 	if (SKIP_BLANKS == 0) {
5371 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5372 			   "Space required after the entity name\n");
5373 	}
5374 
5375 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5376 	/*
5377 	 * handle the various case of definitions...
5378 	 */
5379 	if (isParameter) {
5380 	    if ((RAW == '"') || (RAW == '\'')) {
5381 	        value = xmlParseEntityValue(ctxt, &orig);
5382 		if (value) {
5383 		    if ((ctxt->sax != NULL) &&
5384 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5385 			ctxt->sax->entityDecl(ctxt->userData, name,
5386 		                    XML_INTERNAL_PARAMETER_ENTITY,
5387 				    NULL, NULL, value);
5388 		}
5389 	    } else {
5390 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5391 		if ((URI == NULL) && (literal == NULL)) {
5392 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5393 		}
5394 		if (URI) {
5395 		    xmlURIPtr uri;
5396 
5397 		    uri = xmlParseURI((const char *) URI);
5398 		    if (uri == NULL) {
5399 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5400 				     "Invalid URI: %s\n", URI);
5401 			/*
5402 			 * This really ought to be a well formedness error
5403 			 * but the XML Core WG decided otherwise c.f. issue
5404 			 * E26 of the XML erratas.
5405 			 */
5406 		    } else {
5407 			if (uri->fragment != NULL) {
5408 			    /*
5409 			     * Okay this is foolish to block those but not
5410 			     * invalid URIs.
5411 			     */
5412 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5413 			} else {
5414 			    if ((ctxt->sax != NULL) &&
5415 				(!ctxt->disableSAX) &&
5416 				(ctxt->sax->entityDecl != NULL))
5417 				ctxt->sax->entityDecl(ctxt->userData, name,
5418 					    XML_EXTERNAL_PARAMETER_ENTITY,
5419 					    literal, URI, NULL);
5420 			}
5421 			xmlFreeURI(uri);
5422 		    }
5423 		}
5424 	    }
5425 	} else {
5426 	    if ((RAW == '"') || (RAW == '\'')) {
5427 	        value = xmlParseEntityValue(ctxt, &orig);
5428 		if ((ctxt->sax != NULL) &&
5429 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5430 		    ctxt->sax->entityDecl(ctxt->userData, name,
5431 				XML_INTERNAL_GENERAL_ENTITY,
5432 				NULL, NULL, value);
5433 		/*
5434 		 * For expat compatibility in SAX mode.
5435 		 */
5436 		if ((ctxt->myDoc == NULL) ||
5437 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5438 		    if (ctxt->myDoc == NULL) {
5439 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5440 			if (ctxt->myDoc == NULL) {
5441 			    xmlErrMemory(ctxt, "New Doc failed");
5442 			    return;
5443 			}
5444 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5445 		    }
5446 		    if (ctxt->myDoc->intSubset == NULL)
5447 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5448 					    BAD_CAST "fake", NULL, NULL);
5449 
5450 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5451 			              NULL, NULL, value);
5452 		}
5453 	    } else {
5454 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5455 		if ((URI == NULL) && (literal == NULL)) {
5456 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5457 		}
5458 		if (URI) {
5459 		    xmlURIPtr uri;
5460 
5461 		    uri = xmlParseURI((const char *)URI);
5462 		    if (uri == NULL) {
5463 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5464 				     "Invalid URI: %s\n", URI);
5465 			/*
5466 			 * This really ought to be a well formedness error
5467 			 * but the XML Core WG decided otherwise c.f. issue
5468 			 * E26 of the XML erratas.
5469 			 */
5470 		    } else {
5471 			if (uri->fragment != NULL) {
5472 			    /*
5473 			     * Okay this is foolish to block those but not
5474 			     * invalid URIs.
5475 			     */
5476 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5477 			}
5478 			xmlFreeURI(uri);
5479 		    }
5480 		}
5481 		if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5482 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5483 				   "Space required before 'NDATA'\n");
5484 		}
5485 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5486 		    SKIP(5);
5487 		    if (SKIP_BLANKS == 0) {
5488 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5489 				       "Space required after 'NDATA'\n");
5490 		    }
5491 		    ndata = xmlParseName(ctxt);
5492 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5493 		        (ctxt->sax->unparsedEntityDecl != NULL))
5494 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5495 				    literal, URI, ndata);
5496 		} else {
5497 		    if ((ctxt->sax != NULL) &&
5498 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5499 			ctxt->sax->entityDecl(ctxt->userData, name,
5500 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5501 				    literal, URI, NULL);
5502 		    /*
5503 		     * For expat compatibility in SAX mode.
5504 		     * assuming the entity repalcement was asked for
5505 		     */
5506 		    if ((ctxt->replaceEntities != 0) &&
5507 			((ctxt->myDoc == NULL) ||
5508 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5509 			if (ctxt->myDoc == NULL) {
5510 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5511 			    if (ctxt->myDoc == NULL) {
5512 			        xmlErrMemory(ctxt, "New Doc failed");
5513 				return;
5514 			    }
5515 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5516 			}
5517 
5518 			if (ctxt->myDoc->intSubset == NULL)
5519 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5520 						BAD_CAST "fake", NULL, NULL);
5521 			xmlSAX2EntityDecl(ctxt, name,
5522 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5523 				          literal, URI, NULL);
5524 		    }
5525 		}
5526 	    }
5527 	}
5528 	if (ctxt->instate == XML_PARSER_EOF)
5529 	    goto done;
5530 	SKIP_BLANKS;
5531 	if (RAW != '>') {
5532 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5533 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5534 	    xmlHaltParser(ctxt);
5535 	} else {
5536 	    if (inputid != ctxt->input->id) {
5537 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5538 	                       "Entity declaration doesn't start and stop in"
5539                                " the same entity\n");
5540 	    }
5541 	    NEXT;
5542 	}
5543 	if (orig != NULL) {
5544 	    /*
5545 	     * Ugly mechanism to save the raw entity value.
5546 	     */
5547 	    xmlEntityPtr cur = NULL;
5548 
5549 	    if (isParameter) {
5550 	        if ((ctxt->sax != NULL) &&
5551 		    (ctxt->sax->getParameterEntity != NULL))
5552 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5553 	    } else {
5554 	        if ((ctxt->sax != NULL) &&
5555 		    (ctxt->sax->getEntity != NULL))
5556 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5557 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5558 		    cur = xmlSAX2GetEntity(ctxt, name);
5559 		}
5560 	    }
5561             if ((cur != NULL) && (cur->orig == NULL)) {
5562 		cur->orig = orig;
5563                 orig = NULL;
5564 	    }
5565 	}
5566 
5567 done:
5568 	if (value != NULL) xmlFree(value);
5569 	if (URI != NULL) xmlFree(URI);
5570 	if (literal != NULL) xmlFree(literal);
5571         if (orig != NULL) xmlFree(orig);
5572     }
5573 }
5574 
5575 /**
5576  * xmlParseDefaultDecl:
5577  * @ctxt:  an XML parser context
5578  * @value:  Receive a possible fixed default value for the attribute
5579  *
5580  * Parse an attribute default declaration
5581  *
5582  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5583  *
5584  * [ VC: Required Attribute ]
5585  * if the default declaration is the keyword #REQUIRED, then the
5586  * attribute must be specified for all elements of the type in the
5587  * attribute-list declaration.
5588  *
5589  * [ VC: Attribute Default Legal ]
5590  * The declared default value must meet the lexical constraints of
5591  * the declared attribute type c.f. xmlValidateAttributeDecl()
5592  *
5593  * [ VC: Fixed Attribute Default ]
5594  * if an attribute has a default value declared with the #FIXED
5595  * keyword, instances of that attribute must match the default value.
5596  *
5597  * [ WFC: No < in Attribute Values ]
5598  * handled in xmlParseAttValue()
5599  *
5600  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5601  *          or XML_ATTRIBUTE_FIXED.
5602  */
5603 
5604 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5605 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5606     int val;
5607     xmlChar *ret;
5608 
5609     *value = NULL;
5610     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5611 	SKIP(9);
5612 	return(XML_ATTRIBUTE_REQUIRED);
5613     }
5614     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5615 	SKIP(8);
5616 	return(XML_ATTRIBUTE_IMPLIED);
5617     }
5618     val = XML_ATTRIBUTE_NONE;
5619     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5620 	SKIP(6);
5621 	val = XML_ATTRIBUTE_FIXED;
5622 	if (SKIP_BLANKS == 0) {
5623 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5624 			   "Space required after '#FIXED'\n");
5625 	}
5626     }
5627     ret = xmlParseAttValue(ctxt);
5628     ctxt->instate = XML_PARSER_DTD;
5629     if (ret == NULL) {
5630 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5631 		       "Attribute default value declaration error\n");
5632     } else
5633         *value = ret;
5634     return(val);
5635 }
5636 
5637 /**
5638  * xmlParseNotationType:
5639  * @ctxt:  an XML parser context
5640  *
5641  * parse an Notation attribute type.
5642  *
5643  * Note: the leading 'NOTATION' S part has already being parsed...
5644  *
5645  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5646  *
5647  * [ VC: Notation Attributes ]
5648  * Values of this type must match one of the notation names included
5649  * in the declaration; all notation names in the declaration must be declared.
5650  *
5651  * Returns: the notation attribute tree built while parsing
5652  */
5653 
5654 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5655 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5656     const xmlChar *name;
5657     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5658 
5659     if (RAW != '(') {
5660 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5661 	return(NULL);
5662     }
5663     SHRINK;
5664     do {
5665         NEXT;
5666 	SKIP_BLANKS;
5667         name = xmlParseName(ctxt);
5668 	if (name == NULL) {
5669 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5670 			   "Name expected in NOTATION declaration\n");
5671             xmlFreeEnumeration(ret);
5672 	    return(NULL);
5673 	}
5674 	tmp = ret;
5675 	while (tmp != NULL) {
5676 	    if (xmlStrEqual(name, tmp->name)) {
5677 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5678 	  "standalone: attribute notation value token %s duplicated\n",
5679 				 name, NULL);
5680 		if (!xmlDictOwns(ctxt->dict, name))
5681 		    xmlFree((xmlChar *) name);
5682 		break;
5683 	    }
5684 	    tmp = tmp->next;
5685 	}
5686 	if (tmp == NULL) {
5687 	    cur = xmlCreateEnumeration(name);
5688 	    if (cur == NULL) {
5689                 xmlFreeEnumeration(ret);
5690                 return(NULL);
5691             }
5692 	    if (last == NULL) ret = last = cur;
5693 	    else {
5694 		last->next = cur;
5695 		last = cur;
5696 	    }
5697 	}
5698 	SKIP_BLANKS;
5699     } while (RAW == '|');
5700     if (RAW != ')') {
5701 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5702         xmlFreeEnumeration(ret);
5703 	return(NULL);
5704     }
5705     NEXT;
5706     return(ret);
5707 }
5708 
5709 /**
5710  * xmlParseEnumerationType:
5711  * @ctxt:  an XML parser context
5712  *
5713  * parse an Enumeration attribute type.
5714  *
5715  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5716  *
5717  * [ VC: Enumeration ]
5718  * Values of this type must match one of the Nmtoken tokens in
5719  * the declaration
5720  *
5721  * Returns: the enumeration attribute tree built while parsing
5722  */
5723 
5724 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5725 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5726     xmlChar *name;
5727     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5728 
5729     if (RAW != '(') {
5730 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5731 	return(NULL);
5732     }
5733     SHRINK;
5734     do {
5735         NEXT;
5736 	SKIP_BLANKS;
5737         name = xmlParseNmtoken(ctxt);
5738 	if (name == NULL) {
5739 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5740 	    return(ret);
5741 	}
5742 	tmp = ret;
5743 	while (tmp != NULL) {
5744 	    if (xmlStrEqual(name, tmp->name)) {
5745 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5746 	  "standalone: attribute enumeration value token %s duplicated\n",
5747 				 name, NULL);
5748 		if (!xmlDictOwns(ctxt->dict, name))
5749 		    xmlFree(name);
5750 		break;
5751 	    }
5752 	    tmp = tmp->next;
5753 	}
5754 	if (tmp == NULL) {
5755 	    cur = xmlCreateEnumeration(name);
5756 	    if (!xmlDictOwns(ctxt->dict, name))
5757 		xmlFree(name);
5758 	    if (cur == NULL) {
5759                 xmlFreeEnumeration(ret);
5760                 return(NULL);
5761             }
5762 	    if (last == NULL) ret = last = cur;
5763 	    else {
5764 		last->next = cur;
5765 		last = cur;
5766 	    }
5767 	}
5768 	SKIP_BLANKS;
5769     } while (RAW == '|');
5770     if (RAW != ')') {
5771 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5772 	return(ret);
5773     }
5774     NEXT;
5775     return(ret);
5776 }
5777 
5778 /**
5779  * xmlParseEnumeratedType:
5780  * @ctxt:  an XML parser context
5781  * @tree:  the enumeration tree built while parsing
5782  *
5783  * parse an Enumerated attribute type.
5784  *
5785  * [57] EnumeratedType ::= NotationType | Enumeration
5786  *
5787  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5788  *
5789  *
5790  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5791  */
5792 
5793 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5794 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5795     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5796 	SKIP(8);
5797 	if (SKIP_BLANKS == 0) {
5798 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5799 			   "Space required after 'NOTATION'\n");
5800 	    return(0);
5801 	}
5802 	*tree = xmlParseNotationType(ctxt);
5803 	if (*tree == NULL) return(0);
5804 	return(XML_ATTRIBUTE_NOTATION);
5805     }
5806     *tree = xmlParseEnumerationType(ctxt);
5807     if (*tree == NULL) return(0);
5808     return(XML_ATTRIBUTE_ENUMERATION);
5809 }
5810 
5811 /**
5812  * xmlParseAttributeType:
5813  * @ctxt:  an XML parser context
5814  * @tree:  the enumeration tree built while parsing
5815  *
5816  * parse the Attribute list def for an element
5817  *
5818  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5819  *
5820  * [55] StringType ::= 'CDATA'
5821  *
5822  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5823  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5824  *
5825  * Validity constraints for attribute values syntax are checked in
5826  * xmlValidateAttributeValue()
5827  *
5828  * [ VC: ID ]
5829  * Values of type ID must match the Name production. A name must not
5830  * appear more than once in an XML document as a value of this type;
5831  * i.e., ID values must uniquely identify the elements which bear them.
5832  *
5833  * [ VC: One ID per Element Type ]
5834  * No element type may have more than one ID attribute specified.
5835  *
5836  * [ VC: ID Attribute Default ]
5837  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5838  *
5839  * [ VC: IDREF ]
5840  * Values of type IDREF must match the Name production, and values
5841  * of type IDREFS must match Names; each IDREF Name must match the value
5842  * of an ID attribute on some element in the XML document; i.e. IDREF
5843  * values must match the value of some ID attribute.
5844  *
5845  * [ VC: Entity Name ]
5846  * Values of type ENTITY must match the Name production, values
5847  * of type ENTITIES must match Names; each Entity Name must match the
5848  * name of an unparsed entity declared in the DTD.
5849  *
5850  * [ VC: Name Token ]
5851  * Values of type NMTOKEN must match the Nmtoken production; values
5852  * of type NMTOKENS must match Nmtokens.
5853  *
5854  * Returns the attribute type
5855  */
5856 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5857 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5858     SHRINK;
5859     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5860 	SKIP(5);
5861 	return(XML_ATTRIBUTE_CDATA);
5862      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5863 	SKIP(6);
5864 	return(XML_ATTRIBUTE_IDREFS);
5865      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5866 	SKIP(5);
5867 	return(XML_ATTRIBUTE_IDREF);
5868      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5869         SKIP(2);
5870 	return(XML_ATTRIBUTE_ID);
5871      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5872 	SKIP(6);
5873 	return(XML_ATTRIBUTE_ENTITY);
5874      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5875 	SKIP(8);
5876 	return(XML_ATTRIBUTE_ENTITIES);
5877      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5878 	SKIP(8);
5879 	return(XML_ATTRIBUTE_NMTOKENS);
5880      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5881 	SKIP(7);
5882 	return(XML_ATTRIBUTE_NMTOKEN);
5883      }
5884      return(xmlParseEnumeratedType(ctxt, tree));
5885 }
5886 
5887 /**
5888  * xmlParseAttributeListDecl:
5889  * @ctxt:  an XML parser context
5890  *
5891  * : parse the Attribute list def for an element
5892  *
5893  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5894  *
5895  * [53] AttDef ::= S Name S AttType S DefaultDecl
5896  *
5897  */
5898 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5899 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5900     const xmlChar *elemName;
5901     const xmlChar *attrName;
5902     xmlEnumerationPtr tree;
5903 
5904     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5905 	int inputid = ctxt->input->id;
5906 
5907 	SKIP(9);
5908 	if (SKIP_BLANKS == 0) {
5909 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5910 		                 "Space required after '<!ATTLIST'\n");
5911 	}
5912         elemName = xmlParseName(ctxt);
5913 	if (elemName == NULL) {
5914 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5915 			   "ATTLIST: no name for Element\n");
5916 	    return;
5917 	}
5918 	SKIP_BLANKS;
5919 	GROW;
5920 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5921 	    int type;
5922 	    int def;
5923 	    xmlChar *defaultValue = NULL;
5924 
5925 	    GROW;
5926             tree = NULL;
5927 	    attrName = xmlParseName(ctxt);
5928 	    if (attrName == NULL) {
5929 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5930 			       "ATTLIST: no name for Attribute\n");
5931 		break;
5932 	    }
5933 	    GROW;
5934 	    if (SKIP_BLANKS == 0) {
5935 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5936 		        "Space required after the attribute name\n");
5937 		break;
5938 	    }
5939 
5940 	    type = xmlParseAttributeType(ctxt, &tree);
5941 	    if (type <= 0) {
5942 	        break;
5943 	    }
5944 
5945 	    GROW;
5946 	    if (SKIP_BLANKS == 0) {
5947 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5948 			       "Space required after the attribute type\n");
5949 	        if (tree != NULL)
5950 		    xmlFreeEnumeration(tree);
5951 		break;
5952 	    }
5953 
5954 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
5955 	    if (def <= 0) {
5956                 if (defaultValue != NULL)
5957 		    xmlFree(defaultValue);
5958 	        if (tree != NULL)
5959 		    xmlFreeEnumeration(tree);
5960 	        break;
5961 	    }
5962 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5963 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
5964 
5965 	    GROW;
5966             if (RAW != '>') {
5967 		if (SKIP_BLANKS == 0) {
5968 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5969 			"Space required after the attribute default value\n");
5970 		    if (defaultValue != NULL)
5971 			xmlFree(defaultValue);
5972 		    if (tree != NULL)
5973 			xmlFreeEnumeration(tree);
5974 		    break;
5975 		}
5976 	    }
5977 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5978 		(ctxt->sax->attributeDecl != NULL))
5979 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5980 	                        type, def, defaultValue, tree);
5981 	    else if (tree != NULL)
5982 		xmlFreeEnumeration(tree);
5983 
5984 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
5985 	        (def != XML_ATTRIBUTE_IMPLIED) &&
5986 		(def != XML_ATTRIBUTE_REQUIRED)) {
5987 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5988 	    }
5989 	    if (ctxt->sax2) {
5990 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5991 	    }
5992 	    if (defaultValue != NULL)
5993 	        xmlFree(defaultValue);
5994 	    GROW;
5995 	}
5996 	if (RAW == '>') {
5997 	    if (inputid != ctxt->input->id) {
5998 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5999                                "Attribute list declaration doesn't start and"
6000                                " stop in the same entity\n");
6001 	    }
6002 	    NEXT;
6003 	}
6004     }
6005 }
6006 
6007 /**
6008  * xmlParseElementMixedContentDecl:
6009  * @ctxt:  an XML parser context
6010  * @inputchk:  the input used for the current entity, needed for boundary checks
6011  *
6012  * parse the declaration for a Mixed Element content
6013  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6014  *
6015  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6016  *                '(' S? '#PCDATA' S? ')'
6017  *
6018  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6019  *
6020  * [ VC: No Duplicate Types ]
6021  * The same name must not appear more than once in a single
6022  * mixed-content declaration.
6023  *
6024  * returns: the list of the xmlElementContentPtr describing the element choices
6025  */
6026 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6027 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6028     xmlElementContentPtr ret = NULL, cur = NULL, n;
6029     const xmlChar *elem = NULL;
6030 
6031     GROW;
6032     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6033 	SKIP(7);
6034 	SKIP_BLANKS;
6035 	SHRINK;
6036 	if (RAW == ')') {
6037 	    if (ctxt->input->id != inputchk) {
6038 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6039                                "Element content declaration doesn't start and"
6040                                " stop in the same entity\n");
6041 	    }
6042 	    NEXT;
6043 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6044 	    if (ret == NULL)
6045 	        return(NULL);
6046 	    if (RAW == '*') {
6047 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6048 		NEXT;
6049 	    }
6050 	    return(ret);
6051 	}
6052 	if ((RAW == '(') || (RAW == '|')) {
6053 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6054 	    if (ret == NULL) return(NULL);
6055 	}
6056 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6057 	    NEXT;
6058 	    if (elem == NULL) {
6059 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6060 		if (ret == NULL) return(NULL);
6061 		ret->c1 = cur;
6062 		if (cur != NULL)
6063 		    cur->parent = ret;
6064 		cur = ret;
6065 	    } else {
6066 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6067 		if (n == NULL) return(NULL);
6068 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6069 		if (n->c1 != NULL)
6070 		    n->c1->parent = n;
6071 	        cur->c2 = n;
6072 		if (n != NULL)
6073 		    n->parent = cur;
6074 		cur = n;
6075 	    }
6076 	    SKIP_BLANKS;
6077 	    elem = xmlParseName(ctxt);
6078 	    if (elem == NULL) {
6079 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6080 			"xmlParseElementMixedContentDecl : Name expected\n");
6081 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6082 		return(NULL);
6083 	    }
6084 	    SKIP_BLANKS;
6085 	    GROW;
6086 	}
6087 	if ((RAW == ')') && (NXT(1) == '*')) {
6088 	    if (elem != NULL) {
6089 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6090 		                               XML_ELEMENT_CONTENT_ELEMENT);
6091 		if (cur->c2 != NULL)
6092 		    cur->c2->parent = cur;
6093             }
6094             if (ret != NULL)
6095                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6096 	    if (ctxt->input->id != inputchk) {
6097 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6098                                "Element content declaration doesn't start and"
6099                                " stop in the same entity\n");
6100 	    }
6101 	    SKIP(2);
6102 	} else {
6103 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6104 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6105 	    return(NULL);
6106 	}
6107 
6108     } else {
6109 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6110     }
6111     return(ret);
6112 }
6113 
6114 /**
6115  * xmlParseElementChildrenContentDeclPriv:
6116  * @ctxt:  an XML parser context
6117  * @inputchk:  the input used for the current entity, needed for boundary checks
6118  * @depth: the level of recursion
6119  *
6120  * parse the declaration for a Mixed Element content
6121  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6122  *
6123  *
6124  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6125  *
6126  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6127  *
6128  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6129  *
6130  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6131  *
6132  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6133  * TODO Parameter-entity replacement text must be properly nested
6134  *	with parenthesized groups. That is to say, if either of the
6135  *	opening or closing parentheses in a choice, seq, or Mixed
6136  *	construct is contained in the replacement text for a parameter
6137  *	entity, both must be contained in the same replacement text. For
6138  *	interoperability, if a parameter-entity reference appears in a
6139  *	choice, seq, or Mixed construct, its replacement text should not
6140  *	be empty, and neither the first nor last non-blank character of
6141  *	the replacement text should be a connector (| or ,).
6142  *
6143  * Returns the tree of xmlElementContentPtr describing the element
6144  *          hierarchy.
6145  */
6146 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6147 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6148                                        int depth) {
6149     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6150     const xmlChar *elem;
6151     xmlChar type = 0;
6152 
6153     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6154         (depth >  2048)) {
6155         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6156 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6157                           depth);
6158 	return(NULL);
6159     }
6160     SKIP_BLANKS;
6161     GROW;
6162     if (RAW == '(') {
6163 	int inputid = ctxt->input->id;
6164 
6165         /* Recurse on first child */
6166 	NEXT;
6167 	SKIP_BLANKS;
6168         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6169                                                            depth + 1);
6170 	SKIP_BLANKS;
6171 	GROW;
6172     } else {
6173 	elem = xmlParseName(ctxt);
6174 	if (elem == NULL) {
6175 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6176 	    return(NULL);
6177 	}
6178         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6179 	if (cur == NULL) {
6180 	    xmlErrMemory(ctxt, NULL);
6181 	    return(NULL);
6182 	}
6183 	GROW;
6184 	if (RAW == '?') {
6185 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6186 	    NEXT;
6187 	} else if (RAW == '*') {
6188 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6189 	    NEXT;
6190 	} else if (RAW == '+') {
6191 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6192 	    NEXT;
6193 	} else {
6194 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6195 	}
6196 	GROW;
6197     }
6198     SKIP_BLANKS;
6199     SHRINK;
6200     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6201         /*
6202 	 * Each loop we parse one separator and one element.
6203 	 */
6204         if (RAW == ',') {
6205 	    if (type == 0) type = CUR;
6206 
6207 	    /*
6208 	     * Detect "Name | Name , Name" error
6209 	     */
6210 	    else if (type != CUR) {
6211 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6212 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6213 		                  type);
6214 		if ((last != NULL) && (last != ret))
6215 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6216 		if (ret != NULL)
6217 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6218 		return(NULL);
6219 	    }
6220 	    NEXT;
6221 
6222 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6223 	    if (op == NULL) {
6224 		if ((last != NULL) && (last != ret))
6225 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6226 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6227 		return(NULL);
6228 	    }
6229 	    if (last == NULL) {
6230 		op->c1 = ret;
6231 		if (ret != NULL)
6232 		    ret->parent = op;
6233 		ret = cur = op;
6234 	    } else {
6235 	        cur->c2 = op;
6236 		if (op != NULL)
6237 		    op->parent = cur;
6238 		op->c1 = last;
6239 		if (last != NULL)
6240 		    last->parent = op;
6241 		cur =op;
6242 		last = NULL;
6243 	    }
6244 	} else if (RAW == '|') {
6245 	    if (type == 0) type = CUR;
6246 
6247 	    /*
6248 	     * Detect "Name , Name | Name" error
6249 	     */
6250 	    else if (type != CUR) {
6251 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6252 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6253 				  type);
6254 		if ((last != NULL) && (last != ret))
6255 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6256 		if (ret != NULL)
6257 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6258 		return(NULL);
6259 	    }
6260 	    NEXT;
6261 
6262 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6263 	    if (op == NULL) {
6264 		if ((last != NULL) && (last != ret))
6265 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6266 		if (ret != NULL)
6267 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6268 		return(NULL);
6269 	    }
6270 	    if (last == NULL) {
6271 		op->c1 = ret;
6272 		if (ret != NULL)
6273 		    ret->parent = op;
6274 		ret = cur = op;
6275 	    } else {
6276 	        cur->c2 = op;
6277 		if (op != NULL)
6278 		    op->parent = cur;
6279 		op->c1 = last;
6280 		if (last != NULL)
6281 		    last->parent = op;
6282 		cur =op;
6283 		last = NULL;
6284 	    }
6285 	} else {
6286 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6287 	    if ((last != NULL) && (last != ret))
6288 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6289 	    if (ret != NULL)
6290 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6291 	    return(NULL);
6292 	}
6293 	GROW;
6294 	SKIP_BLANKS;
6295 	GROW;
6296 	if (RAW == '(') {
6297 	    int inputid = ctxt->input->id;
6298 	    /* Recurse on second child */
6299 	    NEXT;
6300 	    SKIP_BLANKS;
6301 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6302                                                           depth + 1);
6303 	    SKIP_BLANKS;
6304 	} else {
6305 	    elem = xmlParseName(ctxt);
6306 	    if (elem == NULL) {
6307 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6308 		if (ret != NULL)
6309 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6310 		return(NULL);
6311 	    }
6312 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6313 	    if (last == NULL) {
6314 		if (ret != NULL)
6315 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6316 		return(NULL);
6317 	    }
6318 	    if (RAW == '?') {
6319 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6320 		NEXT;
6321 	    } else if (RAW == '*') {
6322 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6323 		NEXT;
6324 	    } else if (RAW == '+') {
6325 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6326 		NEXT;
6327 	    } else {
6328 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6329 	    }
6330 	}
6331 	SKIP_BLANKS;
6332 	GROW;
6333     }
6334     if ((cur != NULL) && (last != NULL)) {
6335         cur->c2 = last;
6336 	if (last != NULL)
6337 	    last->parent = cur;
6338     }
6339     if (ctxt->input->id != inputchk) {
6340 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6341                        "Element content declaration doesn't start and stop in"
6342                        " the same entity\n");
6343     }
6344     NEXT;
6345     if (RAW == '?') {
6346 	if (ret != NULL) {
6347 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6348 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6349 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6350 	    else
6351 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6352 	}
6353 	NEXT;
6354     } else if (RAW == '*') {
6355 	if (ret != NULL) {
6356 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6357 	    cur = ret;
6358 	    /*
6359 	     * Some normalization:
6360 	     * (a | b* | c?)* == (a | b | c)*
6361 	     */
6362 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6363 		if ((cur->c1 != NULL) &&
6364 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6365 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6366 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6367 		if ((cur->c2 != NULL) &&
6368 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6369 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6370 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6371 		cur = cur->c2;
6372 	    }
6373 	}
6374 	NEXT;
6375     } else if (RAW == '+') {
6376 	if (ret != NULL) {
6377 	    int found = 0;
6378 
6379 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6380 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6381 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6382 	    else
6383 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6384 	    /*
6385 	     * Some normalization:
6386 	     * (a | b*)+ == (a | b)*
6387 	     * (a | b?)+ == (a | b)*
6388 	     */
6389 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6390 		if ((cur->c1 != NULL) &&
6391 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6392 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6393 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6394 		    found = 1;
6395 		}
6396 		if ((cur->c2 != NULL) &&
6397 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6398 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6399 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6400 		    found = 1;
6401 		}
6402 		cur = cur->c2;
6403 	    }
6404 	    if (found)
6405 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6406 	}
6407 	NEXT;
6408     }
6409     return(ret);
6410 }
6411 
6412 /**
6413  * xmlParseElementChildrenContentDecl:
6414  * @ctxt:  an XML parser context
6415  * @inputchk:  the input used for the current entity, needed for boundary checks
6416  *
6417  * parse the declaration for a Mixed Element content
6418  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6419  *
6420  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6421  *
6422  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6423  *
6424  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6425  *
6426  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6427  *
6428  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6429  * TODO Parameter-entity replacement text must be properly nested
6430  *	with parenthesized groups. That is to say, if either of the
6431  *	opening or closing parentheses in a choice, seq, or Mixed
6432  *	construct is contained in the replacement text for a parameter
6433  *	entity, both must be contained in the same replacement text. For
6434  *	interoperability, if a parameter-entity reference appears in a
6435  *	choice, seq, or Mixed construct, its replacement text should not
6436  *	be empty, and neither the first nor last non-blank character of
6437  *	the replacement text should be a connector (| or ,).
6438  *
6439  * Returns the tree of xmlElementContentPtr describing the element
6440  *          hierarchy.
6441  */
6442 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6443 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6444     /* stub left for API/ABI compat */
6445     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6446 }
6447 
6448 /**
6449  * xmlParseElementContentDecl:
6450  * @ctxt:  an XML parser context
6451  * @name:  the name of the element being defined.
6452  * @result:  the Element Content pointer will be stored here if any
6453  *
6454  * parse the declaration for an Element content either Mixed or Children,
6455  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6456  *
6457  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6458  *
6459  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6460  */
6461 
6462 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6463 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6464                            xmlElementContentPtr *result) {
6465 
6466     xmlElementContentPtr tree = NULL;
6467     int inputid = ctxt->input->id;
6468     int res;
6469 
6470     *result = NULL;
6471 
6472     if (RAW != '(') {
6473 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6474 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6475 	return(-1);
6476     }
6477     NEXT;
6478     GROW;
6479     if (ctxt->instate == XML_PARSER_EOF)
6480         return(-1);
6481     SKIP_BLANKS;
6482     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6483         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6484 	res = XML_ELEMENT_TYPE_MIXED;
6485     } else {
6486         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6487 	res = XML_ELEMENT_TYPE_ELEMENT;
6488     }
6489     SKIP_BLANKS;
6490     *result = tree;
6491     return(res);
6492 }
6493 
6494 /**
6495  * xmlParseElementDecl:
6496  * @ctxt:  an XML parser context
6497  *
6498  * parse an Element declaration.
6499  *
6500  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6501  *
6502  * [ VC: Unique Element Type Declaration ]
6503  * No element type may be declared more than once
6504  *
6505  * Returns the type of the element, or -1 in case of error
6506  */
6507 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6508 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6509     const xmlChar *name;
6510     int ret = -1;
6511     xmlElementContentPtr content  = NULL;
6512 
6513     /* GROW; done in the caller */
6514     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6515 	int inputid = ctxt->input->id;
6516 
6517 	SKIP(9);
6518 	if (SKIP_BLANKS == 0) {
6519 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6520 		           "Space required after 'ELEMENT'\n");
6521 	    return(-1);
6522 	}
6523         name = xmlParseName(ctxt);
6524 	if (name == NULL) {
6525 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6526 			   "xmlParseElementDecl: no name for Element\n");
6527 	    return(-1);
6528 	}
6529 	if (SKIP_BLANKS == 0) {
6530 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6531 			   "Space required after the element name\n");
6532 	}
6533 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6534 	    SKIP(5);
6535 	    /*
6536 	     * Element must always be empty.
6537 	     */
6538 	    ret = XML_ELEMENT_TYPE_EMPTY;
6539 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6540 	           (NXT(2) == 'Y')) {
6541 	    SKIP(3);
6542 	    /*
6543 	     * Element is a generic container.
6544 	     */
6545 	    ret = XML_ELEMENT_TYPE_ANY;
6546 	} else if (RAW == '(') {
6547 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6548 	} else {
6549 	    /*
6550 	     * [ WFC: PEs in Internal Subset ] error handling.
6551 	     */
6552 	    if ((RAW == '%') && (ctxt->external == 0) &&
6553 	        (ctxt->inputNr == 1)) {
6554 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6555 	  "PEReference: forbidden within markup decl in internal subset\n");
6556 	    } else {
6557 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6558 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6559             }
6560 	    return(-1);
6561 	}
6562 
6563 	SKIP_BLANKS;
6564 
6565 	if (RAW != '>') {
6566 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6567 	    if (content != NULL) {
6568 		xmlFreeDocElementContent(ctxt->myDoc, content);
6569 	    }
6570 	} else {
6571 	    if (inputid != ctxt->input->id) {
6572 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6573                                "Element declaration doesn't start and stop in"
6574                                " the same entity\n");
6575 	    }
6576 
6577 	    NEXT;
6578 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6579 		(ctxt->sax->elementDecl != NULL)) {
6580 		if (content != NULL)
6581 		    content->parent = NULL;
6582 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6583 		                       content);
6584 		if ((content != NULL) && (content->parent == NULL)) {
6585 		    /*
6586 		     * this is a trick: if xmlAddElementDecl is called,
6587 		     * instead of copying the full tree it is plugged directly
6588 		     * if called from the parser. Avoid duplicating the
6589 		     * interfaces or change the API/ABI
6590 		     */
6591 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6592 		}
6593 	    } else if (content != NULL) {
6594 		xmlFreeDocElementContent(ctxt->myDoc, content);
6595 	    }
6596 	}
6597     }
6598     return(ret);
6599 }
6600 
6601 /**
6602  * xmlParseConditionalSections
6603  * @ctxt:  an XML parser context
6604  *
6605  * [61] conditionalSect ::= includeSect | ignoreSect
6606  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6607  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6608  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6609  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6610  */
6611 
6612 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6613 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6614     int id = ctxt->input->id;
6615 
6616     SKIP(3);
6617     SKIP_BLANKS;
6618     if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6619 	SKIP(7);
6620 	SKIP_BLANKS;
6621 	if (RAW != '[') {
6622 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6623 	    xmlHaltParser(ctxt);
6624 	    return;
6625 	} else {
6626 	    if (ctxt->input->id != id) {
6627 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6628 	                       "All markup of the conditional section is not"
6629                                " in the same entity\n");
6630 	    }
6631 	    NEXT;
6632 	}
6633 	if (xmlParserDebugEntities) {
6634 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6635 		xmlGenericError(xmlGenericErrorContext,
6636 			"%s(%d): ", ctxt->input->filename,
6637 			ctxt->input->line);
6638 	    xmlGenericError(xmlGenericErrorContext,
6639 		    "Entering INCLUDE Conditional Section\n");
6640 	}
6641 
6642         SKIP_BLANKS;
6643         GROW;
6644 	while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6645 	        (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6646 	    const xmlChar *check = CUR_PTR;
6647 	    unsigned int cons = ctxt->input->consumed;
6648 
6649 	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6650 		xmlParseConditionalSections(ctxt);
6651 	    } else
6652 		xmlParseMarkupDecl(ctxt);
6653 
6654             SKIP_BLANKS;
6655             GROW;
6656 
6657 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6658 		xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6659 		xmlHaltParser(ctxt);
6660 		break;
6661 	    }
6662 	}
6663 	if (xmlParserDebugEntities) {
6664 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6665 		xmlGenericError(xmlGenericErrorContext,
6666 			"%s(%d): ", ctxt->input->filename,
6667 			ctxt->input->line);
6668 	    xmlGenericError(xmlGenericErrorContext,
6669 		    "Leaving INCLUDE Conditional Section\n");
6670 	}
6671 
6672     } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6673 	int state;
6674 	xmlParserInputState instate;
6675 	int depth = 0;
6676 
6677 	SKIP(6);
6678 	SKIP_BLANKS;
6679 	if (RAW != '[') {
6680 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6681 	    xmlHaltParser(ctxt);
6682 	    return;
6683 	} else {
6684 	    if (ctxt->input->id != id) {
6685 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6686 	                       "All markup of the conditional section is not"
6687                                " in the same entity\n");
6688 	    }
6689 	    NEXT;
6690 	}
6691 	if (xmlParserDebugEntities) {
6692 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6693 		xmlGenericError(xmlGenericErrorContext,
6694 			"%s(%d): ", ctxt->input->filename,
6695 			ctxt->input->line);
6696 	    xmlGenericError(xmlGenericErrorContext,
6697 		    "Entering IGNORE Conditional Section\n");
6698 	}
6699 
6700 	/*
6701 	 * Parse up to the end of the conditional section
6702 	 * But disable SAX event generating DTD building in the meantime
6703 	 */
6704 	state = ctxt->disableSAX;
6705 	instate = ctxt->instate;
6706 	if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6707 	ctxt->instate = XML_PARSER_IGNORE;
6708 
6709 	while (((depth >= 0) && (RAW != 0)) &&
6710                (ctxt->instate != XML_PARSER_EOF)) {
6711 	  if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6712 	    depth++;
6713 	    SKIP(3);
6714 	    continue;
6715 	  }
6716 	  if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6717 	    if (--depth >= 0) SKIP(3);
6718 	    continue;
6719 	  }
6720 	  NEXT;
6721 	  continue;
6722 	}
6723 
6724 	ctxt->disableSAX = state;
6725 	ctxt->instate = instate;
6726 
6727 	if (xmlParserDebugEntities) {
6728 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6729 		xmlGenericError(xmlGenericErrorContext,
6730 			"%s(%d): ", ctxt->input->filename,
6731 			ctxt->input->line);
6732 	    xmlGenericError(xmlGenericErrorContext,
6733 		    "Leaving IGNORE Conditional Section\n");
6734 	}
6735 
6736     } else {
6737 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6738 	xmlHaltParser(ctxt);
6739 	return;
6740     }
6741 
6742     if (RAW == 0)
6743         SHRINK;
6744 
6745     if (RAW == 0) {
6746 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6747     } else {
6748 	if (ctxt->input->id != id) {
6749 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6750 	                   "All markup of the conditional section is not in"
6751                            " the same entity\n");
6752 	}
6753 	if ((ctxt-> instate != XML_PARSER_EOF) &&
6754 	    ((ctxt->input->cur + 3) <= ctxt->input->end))
6755 	    SKIP(3);
6756     }
6757 }
6758 
6759 /**
6760  * xmlParseMarkupDecl:
6761  * @ctxt:  an XML parser context
6762  *
6763  * parse Markup declarations
6764  *
6765  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6766  *                     NotationDecl | PI | Comment
6767  *
6768  * [ VC: Proper Declaration/PE Nesting ]
6769  * Parameter-entity replacement text must be properly nested with
6770  * markup declarations. That is to say, if either the first character
6771  * or the last character of a markup declaration (markupdecl above) is
6772  * contained in the replacement text for a parameter-entity reference,
6773  * both must be contained in the same replacement text.
6774  *
6775  * [ WFC: PEs in Internal Subset ]
6776  * In the internal DTD subset, parameter-entity references can occur
6777  * only where markup declarations can occur, not within markup declarations.
6778  * (This does not apply to references that occur in external parameter
6779  * entities or to the external subset.)
6780  */
6781 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6782 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6783     GROW;
6784     if (CUR == '<') {
6785         if (NXT(1) == '!') {
6786 	    switch (NXT(2)) {
6787 	        case 'E':
6788 		    if (NXT(3) == 'L')
6789 			xmlParseElementDecl(ctxt);
6790 		    else if (NXT(3) == 'N')
6791 			xmlParseEntityDecl(ctxt);
6792 		    break;
6793 	        case 'A':
6794 		    xmlParseAttributeListDecl(ctxt);
6795 		    break;
6796 	        case 'N':
6797 		    xmlParseNotationDecl(ctxt);
6798 		    break;
6799 	        case '-':
6800 		    xmlParseComment(ctxt);
6801 		    break;
6802 		default:
6803 		    /* there is an error but it will be detected later */
6804 		    break;
6805 	    }
6806 	} else if (NXT(1) == '?') {
6807 	    xmlParsePI(ctxt);
6808 	}
6809     }
6810 
6811     /*
6812      * detect requirement to exit there and act accordingly
6813      * and avoid having instate overriden later on
6814      */
6815     if (ctxt->instate == XML_PARSER_EOF)
6816         return;
6817 
6818     /*
6819      * Conditional sections are allowed from entities included
6820      * by PE References in the internal subset.
6821      */
6822     if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6823         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6824 	    xmlParseConditionalSections(ctxt);
6825 	}
6826     }
6827 
6828     ctxt->instate = XML_PARSER_DTD;
6829 }
6830 
6831 /**
6832  * xmlParseTextDecl:
6833  * @ctxt:  an XML parser context
6834  *
6835  * parse an XML declaration header for external entities
6836  *
6837  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6838  */
6839 
6840 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6841 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6842     xmlChar *version;
6843     const xmlChar *encoding;
6844 
6845     /*
6846      * We know that '<?xml' is here.
6847      */
6848     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6849 	SKIP(5);
6850     } else {
6851 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6852 	return;
6853     }
6854 
6855     if (SKIP_BLANKS == 0) {
6856 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6857 		       "Space needed after '<?xml'\n");
6858     }
6859 
6860     /*
6861      * We may have the VersionInfo here.
6862      */
6863     version = xmlParseVersionInfo(ctxt);
6864     if (version == NULL)
6865 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6866     else {
6867 	if (SKIP_BLANKS == 0) {
6868 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6869 		           "Space needed here\n");
6870 	}
6871     }
6872     ctxt->input->version = version;
6873 
6874     /*
6875      * We must have the encoding declaration
6876      */
6877     encoding = xmlParseEncodingDecl(ctxt);
6878     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6879 	/*
6880 	 * The XML REC instructs us to stop parsing right here
6881 	 */
6882         return;
6883     }
6884     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6885 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6886 		       "Missing encoding in text declaration\n");
6887     }
6888 
6889     SKIP_BLANKS;
6890     if ((RAW == '?') && (NXT(1) == '>')) {
6891         SKIP(2);
6892     } else if (RAW == '>') {
6893         /* Deprecated old WD ... */
6894 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6895 	NEXT;
6896     } else {
6897 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6898 	MOVETO_ENDTAG(CUR_PTR);
6899 	NEXT;
6900     }
6901 }
6902 
6903 /**
6904  * xmlParseExternalSubset:
6905  * @ctxt:  an XML parser context
6906  * @ExternalID: the external identifier
6907  * @SystemID: the system identifier (or URL)
6908  *
6909  * parse Markup declarations from an external subset
6910  *
6911  * [30] extSubset ::= textDecl? extSubsetDecl
6912  *
6913  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6914  */
6915 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6916 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6917                        const xmlChar *SystemID) {
6918     xmlDetectSAX2(ctxt);
6919     GROW;
6920 
6921     if ((ctxt->encoding == NULL) &&
6922         (ctxt->input->end - ctxt->input->cur >= 4)) {
6923         xmlChar start[4];
6924 	xmlCharEncoding enc;
6925 
6926 	start[0] = RAW;
6927 	start[1] = NXT(1);
6928 	start[2] = NXT(2);
6929 	start[3] = NXT(3);
6930 	enc = xmlDetectCharEncoding(start, 4);
6931 	if (enc != XML_CHAR_ENCODING_NONE)
6932 	    xmlSwitchEncoding(ctxt, enc);
6933     }
6934 
6935     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6936 	xmlParseTextDecl(ctxt);
6937 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6938 	    /*
6939 	     * The XML REC instructs us to stop parsing right here
6940 	     */
6941 	    xmlHaltParser(ctxt);
6942 	    return;
6943 	}
6944     }
6945     if (ctxt->myDoc == NULL) {
6946         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6947 	if (ctxt->myDoc == NULL) {
6948 	    xmlErrMemory(ctxt, "New Doc failed");
6949 	    return;
6950 	}
6951 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
6952     }
6953     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6954         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6955 
6956     ctxt->instate = XML_PARSER_DTD;
6957     ctxt->external = 1;
6958     SKIP_BLANKS;
6959     while (((RAW == '<') && (NXT(1) == '?')) ||
6960            ((RAW == '<') && (NXT(1) == '!')) ||
6961 	   (RAW == '%')) {
6962 	const xmlChar *check = CUR_PTR;
6963 	unsigned int cons = ctxt->input->consumed;
6964 
6965 	GROW;
6966         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6967 	    xmlParseConditionalSections(ctxt);
6968 	} else
6969 	    xmlParseMarkupDecl(ctxt);
6970         SKIP_BLANKS;
6971 
6972 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6973 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6974 	    break;
6975 	}
6976     }
6977 
6978     if (RAW != 0) {
6979 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6980     }
6981 
6982 }
6983 
6984 /**
6985  * xmlParseReference:
6986  * @ctxt:  an XML parser context
6987  *
6988  * parse and handle entity references in content, depending on the SAX
6989  * interface, this may end-up in a call to character() if this is a
6990  * CharRef, a predefined entity, if there is no reference() callback.
6991  * or if the parser was asked to switch to that mode.
6992  *
6993  * [67] Reference ::= EntityRef | CharRef
6994  */
6995 void
xmlParseReference(xmlParserCtxtPtr ctxt)6996 xmlParseReference(xmlParserCtxtPtr ctxt) {
6997     xmlEntityPtr ent;
6998     xmlChar *val;
6999     int was_checked;
7000     xmlNodePtr list = NULL;
7001     xmlParserErrors ret = XML_ERR_OK;
7002 
7003 
7004     if (RAW != '&')
7005         return;
7006 
7007     /*
7008      * Simple case of a CharRef
7009      */
7010     if (NXT(1) == '#') {
7011 	int i = 0;
7012 	xmlChar out[10];
7013 	int hex = NXT(2);
7014 	int value = xmlParseCharRef(ctxt);
7015 
7016 	if (value == 0)
7017 	    return;
7018 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7019 	    /*
7020 	     * So we are using non-UTF-8 buffers
7021 	     * Check that the char fit on 8bits, if not
7022 	     * generate a CharRef.
7023 	     */
7024 	    if (value <= 0xFF) {
7025 		out[0] = value;
7026 		out[1] = 0;
7027 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7028 		    (!ctxt->disableSAX))
7029 		    ctxt->sax->characters(ctxt->userData, out, 1);
7030 	    } else {
7031 		if ((hex == 'x') || (hex == 'X'))
7032 		    snprintf((char *)out, sizeof(out), "#x%X", value);
7033 		else
7034 		    snprintf((char *)out, sizeof(out), "#%d", value);
7035 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7036 		    (!ctxt->disableSAX))
7037 		    ctxt->sax->reference(ctxt->userData, out);
7038 	    }
7039 	} else {
7040 	    /*
7041 	     * Just encode the value in UTF-8
7042 	     */
7043 	    COPY_BUF(0 ,out, i, value);
7044 	    out[i] = 0;
7045 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7046 		(!ctxt->disableSAX))
7047 		ctxt->sax->characters(ctxt->userData, out, i);
7048 	}
7049 	return;
7050     }
7051 
7052     /*
7053      * We are seeing an entity reference
7054      */
7055     ent = xmlParseEntityRef(ctxt);
7056     if (ent == NULL) return;
7057     if (!ctxt->wellFormed)
7058 	return;
7059     was_checked = ent->checked;
7060 
7061     /* special case of predefined entities */
7062     if ((ent->name == NULL) ||
7063         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7064 	val = ent->content;
7065 	if (val == NULL) return;
7066 	/*
7067 	 * inline the entity.
7068 	 */
7069 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7070 	    (!ctxt->disableSAX))
7071 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7072 	return;
7073     }
7074 
7075     /*
7076      * The first reference to the entity trigger a parsing phase
7077      * where the ent->children is filled with the result from
7078      * the parsing.
7079      * Note: external parsed entities will not be loaded, it is not
7080      * required for a non-validating parser, unless the parsing option
7081      * of validating, or substituting entities were given. Doing so is
7082      * far more secure as the parser will only process data coming from
7083      * the document entity by default.
7084      */
7085     if (((ent->checked == 0) ||
7086          ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7087         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7088          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7089 	unsigned long oldnbent = ctxt->nbentities;
7090 
7091 	/*
7092 	 * This is a bit hackish but this seems the best
7093 	 * way to make sure both SAX and DOM entity support
7094 	 * behaves okay.
7095 	 */
7096 	void *user_data;
7097 	if (ctxt->userData == ctxt)
7098 	    user_data = NULL;
7099 	else
7100 	    user_data = ctxt->userData;
7101 
7102 	/*
7103 	 * Check that this entity is well formed
7104 	 * 4.3.2: An internal general parsed entity is well-formed
7105 	 * if its replacement text matches the production labeled
7106 	 * content.
7107 	 */
7108 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7109 	    ctxt->depth++;
7110 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7111 	                                              user_data, &list);
7112 	    ctxt->depth--;
7113 
7114 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7115 	    ctxt->depth++;
7116 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7117 	                                   user_data, ctxt->depth, ent->URI,
7118 					   ent->ExternalID, &list);
7119 	    ctxt->depth--;
7120 	} else {
7121 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7122 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7123 			 "invalid entity type found\n", NULL);
7124 	}
7125 
7126 	/*
7127 	 * Store the number of entities needing parsing for this entity
7128 	 * content and do checkings
7129 	 */
7130 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7131 	if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7132 	    ent->checked |= 1;
7133 	if (ret == XML_ERR_ENTITY_LOOP) {
7134 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7135 	    xmlFreeNodeList(list);
7136 	    return;
7137 	}
7138 	if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7139 	    xmlFreeNodeList(list);
7140 	    return;
7141 	}
7142 
7143 	if ((ret == XML_ERR_OK) && (list != NULL)) {
7144 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7145 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7146 		(ent->children == NULL)) {
7147 		ent->children = list;
7148 		if (ctxt->replaceEntities) {
7149 		    /*
7150 		     * Prune it directly in the generated document
7151 		     * except for single text nodes.
7152 		     */
7153 		    if (((list->type == XML_TEXT_NODE) &&
7154 			 (list->next == NULL)) ||
7155 			(ctxt->parseMode == XML_PARSE_READER)) {
7156 			list->parent = (xmlNodePtr) ent;
7157 			list = NULL;
7158 			ent->owner = 1;
7159 		    } else {
7160 			ent->owner = 0;
7161 			while (list != NULL) {
7162 			    list->parent = (xmlNodePtr) ctxt->node;
7163 			    list->doc = ctxt->myDoc;
7164 			    if (list->next == NULL)
7165 				ent->last = list;
7166 			    list = list->next;
7167 			}
7168 			list = ent->children;
7169 #ifdef LIBXML_LEGACY_ENABLED
7170 			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7171 			  xmlAddEntityReference(ent, list, NULL);
7172 #endif /* LIBXML_LEGACY_ENABLED */
7173 		    }
7174 		} else {
7175 		    ent->owner = 1;
7176 		    while (list != NULL) {
7177 			list->parent = (xmlNodePtr) ent;
7178 			xmlSetTreeDoc(list, ent->doc);
7179 			if (list->next == NULL)
7180 			    ent->last = list;
7181 			list = list->next;
7182 		    }
7183 		}
7184 	    } else {
7185 		xmlFreeNodeList(list);
7186 		list = NULL;
7187 	    }
7188 	} else if ((ret != XML_ERR_OK) &&
7189 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7190 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7191 		     "Entity '%s' failed to parse\n", ent->name);
7192             if (ent->content != NULL)
7193                 ent->content[0] = 0;
7194 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
7195 	} else if (list != NULL) {
7196 	    xmlFreeNodeList(list);
7197 	    list = NULL;
7198 	}
7199 	if (ent->checked == 0)
7200 	    ent->checked = 2;
7201 
7202         /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7203         was_checked = 0;
7204     } else if (ent->checked != 1) {
7205 	ctxt->nbentities += ent->checked / 2;
7206     }
7207 
7208     /*
7209      * Now that the entity content has been gathered
7210      * provide it to the application, this can take different forms based
7211      * on the parsing modes.
7212      */
7213     if (ent->children == NULL) {
7214 	/*
7215 	 * Probably running in SAX mode and the callbacks don't
7216 	 * build the entity content. So unless we already went
7217 	 * though parsing for first checking go though the entity
7218 	 * content to generate callbacks associated to the entity
7219 	 */
7220 	if (was_checked != 0) {
7221 	    void *user_data;
7222 	    /*
7223 	     * This is a bit hackish but this seems the best
7224 	     * way to make sure both SAX and DOM entity support
7225 	     * behaves okay.
7226 	     */
7227 	    if (ctxt->userData == ctxt)
7228 		user_data = NULL;
7229 	    else
7230 		user_data = ctxt->userData;
7231 
7232 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7233 		ctxt->depth++;
7234 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7235 				   ent->content, user_data, NULL);
7236 		ctxt->depth--;
7237 	    } else if (ent->etype ==
7238 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7239 		ctxt->depth++;
7240 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7241 			   ctxt->sax, user_data, ctxt->depth,
7242 			   ent->URI, ent->ExternalID, NULL);
7243 		ctxt->depth--;
7244 	    } else {
7245 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7246 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7247 			     "invalid entity type found\n", NULL);
7248 	    }
7249 	    if (ret == XML_ERR_ENTITY_LOOP) {
7250 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7251 		return;
7252 	    }
7253 	}
7254 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7255 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7256 	    /*
7257 	     * Entity reference callback comes second, it's somewhat
7258 	     * superfluous but a compatibility to historical behaviour
7259 	     */
7260 	    ctxt->sax->reference(ctxt->userData, ent->name);
7261 	}
7262 	return;
7263     }
7264 
7265     /*
7266      * If we didn't get any children for the entity being built
7267      */
7268     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7269 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7270 	/*
7271 	 * Create a node.
7272 	 */
7273 	ctxt->sax->reference(ctxt->userData, ent->name);
7274 	return;
7275     }
7276 
7277     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7278 	/*
7279 	 * There is a problem on the handling of _private for entities
7280 	 * (bug 155816): Should we copy the content of the field from
7281 	 * the entity (possibly overwriting some value set by the user
7282 	 * when a copy is created), should we leave it alone, or should
7283 	 * we try to take care of different situations?  The problem
7284 	 * is exacerbated by the usage of this field by the xmlReader.
7285 	 * To fix this bug, we look at _private on the created node
7286 	 * and, if it's NULL, we copy in whatever was in the entity.
7287 	 * If it's not NULL we leave it alone.  This is somewhat of a
7288 	 * hack - maybe we should have further tests to determine
7289 	 * what to do.
7290 	 */
7291 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7292 	    /*
7293 	     * Seems we are generating the DOM content, do
7294 	     * a simple tree copy for all references except the first
7295 	     * In the first occurrence list contains the replacement.
7296 	     */
7297 	    if (((list == NULL) && (ent->owner == 0)) ||
7298 		(ctxt->parseMode == XML_PARSE_READER)) {
7299 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7300 
7301 		/*
7302 		 * We are copying here, make sure there is no abuse
7303 		 */
7304 		ctxt->sizeentcopy += ent->length + 5;
7305 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7306 		    return;
7307 
7308 		/*
7309 		 * when operating on a reader, the entities definitions
7310 		 * are always owning the entities subtree.
7311 		if (ctxt->parseMode == XML_PARSE_READER)
7312 		    ent->owner = 1;
7313 		 */
7314 
7315 		cur = ent->children;
7316 		while (cur != NULL) {
7317 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7318 		    if (nw != NULL) {
7319 			if (nw->_private == NULL)
7320 			    nw->_private = cur->_private;
7321 			if (firstChild == NULL){
7322 			    firstChild = nw;
7323 			}
7324 			nw = xmlAddChild(ctxt->node, nw);
7325 		    }
7326 		    if (cur == ent->last) {
7327 			/*
7328 			 * needed to detect some strange empty
7329 			 * node cases in the reader tests
7330 			 */
7331 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7332 			    (nw != NULL) &&
7333 			    (nw->type == XML_ELEMENT_NODE) &&
7334 			    (nw->children == NULL))
7335 			    nw->extra = 1;
7336 
7337 			break;
7338 		    }
7339 		    cur = cur->next;
7340 		}
7341 #ifdef LIBXML_LEGACY_ENABLED
7342 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7343 		  xmlAddEntityReference(ent, firstChild, nw);
7344 #endif /* LIBXML_LEGACY_ENABLED */
7345 	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7346 		xmlNodePtr nw = NULL, cur, next, last,
7347 			   firstChild = NULL;
7348 
7349 		/*
7350 		 * We are copying here, make sure there is no abuse
7351 		 */
7352 		ctxt->sizeentcopy += ent->length + 5;
7353 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7354 		    return;
7355 
7356 		/*
7357 		 * Copy the entity child list and make it the new
7358 		 * entity child list. The goal is to make sure any
7359 		 * ID or REF referenced will be the one from the
7360 		 * document content and not the entity copy.
7361 		 */
7362 		cur = ent->children;
7363 		ent->children = NULL;
7364 		last = ent->last;
7365 		ent->last = NULL;
7366 		while (cur != NULL) {
7367 		    next = cur->next;
7368 		    cur->next = NULL;
7369 		    cur->parent = NULL;
7370 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7371 		    if (nw != NULL) {
7372 			if (nw->_private == NULL)
7373 			    nw->_private = cur->_private;
7374 			if (firstChild == NULL){
7375 			    firstChild = cur;
7376 			}
7377 			xmlAddChild((xmlNodePtr) ent, nw);
7378 			xmlAddChild(ctxt->node, cur);
7379 		    }
7380 		    if (cur == last)
7381 			break;
7382 		    cur = next;
7383 		}
7384 		if (ent->owner == 0)
7385 		    ent->owner = 1;
7386 #ifdef LIBXML_LEGACY_ENABLED
7387 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7388 		  xmlAddEntityReference(ent, firstChild, nw);
7389 #endif /* LIBXML_LEGACY_ENABLED */
7390 	    } else {
7391 		const xmlChar *nbktext;
7392 
7393 		/*
7394 		 * the name change is to avoid coalescing of the
7395 		 * node with a possible previous text one which
7396 		 * would make ent->children a dangling pointer
7397 		 */
7398 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7399 					-1);
7400 		if (ent->children->type == XML_TEXT_NODE)
7401 		    ent->children->name = nbktext;
7402 		if ((ent->last != ent->children) &&
7403 		    (ent->last->type == XML_TEXT_NODE))
7404 		    ent->last->name = nbktext;
7405 		xmlAddChildList(ctxt->node, ent->children);
7406 	    }
7407 
7408 	    /*
7409 	     * This is to avoid a nasty side effect, see
7410 	     * characters() in SAX.c
7411 	     */
7412 	    ctxt->nodemem = 0;
7413 	    ctxt->nodelen = 0;
7414 	    return;
7415 	}
7416     }
7417 }
7418 
7419 /**
7420  * xmlParseEntityRef:
7421  * @ctxt:  an XML parser context
7422  *
7423  * parse ENTITY references declarations
7424  *
7425  * [68] EntityRef ::= '&' Name ';'
7426  *
7427  * [ WFC: Entity Declared ]
7428  * In a document without any DTD, a document with only an internal DTD
7429  * subset which contains no parameter entity references, or a document
7430  * with "standalone='yes'", the Name given in the entity reference
7431  * must match that in an entity declaration, except that well-formed
7432  * documents need not declare any of the following entities: amp, lt,
7433  * gt, apos, quot.  The declaration of a parameter entity must precede
7434  * any reference to it.  Similarly, the declaration of a general entity
7435  * must precede any reference to it which appears in a default value in an
7436  * attribute-list declaration. Note that if entities are declared in the
7437  * external subset or in external parameter entities, a non-validating
7438  * processor is not obligated to read and process their declarations;
7439  * for such documents, the rule that an entity must be declared is a
7440  * well-formedness constraint only if standalone='yes'.
7441  *
7442  * [ WFC: Parsed Entity ]
7443  * An entity reference must not contain the name of an unparsed entity
7444  *
7445  * Returns the xmlEntityPtr if found, or NULL otherwise.
7446  */
7447 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7448 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7449     const xmlChar *name;
7450     xmlEntityPtr ent = NULL;
7451 
7452     GROW;
7453     if (ctxt->instate == XML_PARSER_EOF)
7454         return(NULL);
7455 
7456     if (RAW != '&')
7457         return(NULL);
7458     NEXT;
7459     name = xmlParseName(ctxt);
7460     if (name == NULL) {
7461 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7462 		       "xmlParseEntityRef: no name\n");
7463         return(NULL);
7464     }
7465     if (RAW != ';') {
7466 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7467 	return(NULL);
7468     }
7469     NEXT;
7470 
7471     /*
7472      * Predefined entities override any extra definition
7473      */
7474     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7475         ent = xmlGetPredefinedEntity(name);
7476         if (ent != NULL)
7477             return(ent);
7478     }
7479 
7480     /*
7481      * Increase the number of entity references parsed
7482      */
7483     ctxt->nbentities++;
7484 
7485     /*
7486      * Ask first SAX for entity resolution, otherwise try the
7487      * entities which may have stored in the parser context.
7488      */
7489     if (ctxt->sax != NULL) {
7490 	if (ctxt->sax->getEntity != NULL)
7491 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7492 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7493 	    (ctxt->options & XML_PARSE_OLDSAX))
7494 	    ent = xmlGetPredefinedEntity(name);
7495 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7496 	    (ctxt->userData==ctxt)) {
7497 	    ent = xmlSAX2GetEntity(ctxt, name);
7498 	}
7499     }
7500     if (ctxt->instate == XML_PARSER_EOF)
7501 	return(NULL);
7502     /*
7503      * [ WFC: Entity Declared ]
7504      * In a document without any DTD, a document with only an
7505      * internal DTD subset which contains no parameter entity
7506      * references, or a document with "standalone='yes'", the
7507      * Name given in the entity reference must match that in an
7508      * entity declaration, except that well-formed documents
7509      * need not declare any of the following entities: amp, lt,
7510      * gt, apos, quot.
7511      * The declaration of a parameter entity must precede any
7512      * reference to it.
7513      * Similarly, the declaration of a general entity must
7514      * precede any reference to it which appears in a default
7515      * value in an attribute-list declaration. Note that if
7516      * entities are declared in the external subset or in
7517      * external parameter entities, a non-validating processor
7518      * is not obligated to read and process their declarations;
7519      * for such documents, the rule that an entity must be
7520      * declared is a well-formedness constraint only if
7521      * standalone='yes'.
7522      */
7523     if (ent == NULL) {
7524 	if ((ctxt->standalone == 1) ||
7525 	    ((ctxt->hasExternalSubset == 0) &&
7526 	     (ctxt->hasPErefs == 0))) {
7527 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7528 		     "Entity '%s' not defined\n", name);
7529 	} else {
7530 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7531 		     "Entity '%s' not defined\n", name);
7532 	    if ((ctxt->inSubset == 0) &&
7533 		(ctxt->sax != NULL) &&
7534 		(ctxt->sax->reference != NULL)) {
7535 		ctxt->sax->reference(ctxt->userData, name);
7536 	    }
7537 	}
7538 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7539 	ctxt->valid = 0;
7540     }
7541 
7542     /*
7543      * [ WFC: Parsed Entity ]
7544      * An entity reference must not contain the name of an
7545      * unparsed entity
7546      */
7547     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7548 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7549 		 "Entity reference to unparsed entity %s\n", name);
7550     }
7551 
7552     /*
7553      * [ WFC: No External Entity References ]
7554      * Attribute values cannot contain direct or indirect
7555      * entity references to external entities.
7556      */
7557     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7558 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7559 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7560 	     "Attribute references external entity '%s'\n", name);
7561     }
7562     /*
7563      * [ WFC: No < in Attribute Values ]
7564      * The replacement text of any entity referred to directly or
7565      * indirectly in an attribute value (other than "&lt;") must
7566      * not contain a <.
7567      */
7568     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7569 	     (ent != NULL) &&
7570 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7571 	if (((ent->checked & 1) || (ent->checked == 0)) &&
7572 	     (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7573 	    xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7574 	"'<' in entity '%s' is not allowed in attributes values\n", name);
7575         }
7576     }
7577 
7578     /*
7579      * Internal check, no parameter entities here ...
7580      */
7581     else {
7582 	switch (ent->etype) {
7583 	    case XML_INTERNAL_PARAMETER_ENTITY:
7584 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7585 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7586 	     "Attempt to reference the parameter entity '%s'\n",
7587 			      name);
7588 	    break;
7589 	    default:
7590 	    break;
7591 	}
7592     }
7593 
7594     /*
7595      * [ WFC: No Recursion ]
7596      * A parsed entity must not contain a recursive reference
7597      * to itself, either directly or indirectly.
7598      * Done somewhere else
7599      */
7600     return(ent);
7601 }
7602 
7603 /**
7604  * xmlParseStringEntityRef:
7605  * @ctxt:  an XML parser context
7606  * @str:  a pointer to an index in the string
7607  *
7608  * parse ENTITY references declarations, but this version parses it from
7609  * a string value.
7610  *
7611  * [68] EntityRef ::= '&' Name ';'
7612  *
7613  * [ WFC: Entity Declared ]
7614  * In a document without any DTD, a document with only an internal DTD
7615  * subset which contains no parameter entity references, or a document
7616  * with "standalone='yes'", the Name given in the entity reference
7617  * must match that in an entity declaration, except that well-formed
7618  * documents need not declare any of the following entities: amp, lt,
7619  * gt, apos, quot.  The declaration of a parameter entity must precede
7620  * any reference to it.  Similarly, the declaration of a general entity
7621  * must precede any reference to it which appears in a default value in an
7622  * attribute-list declaration. Note that if entities are declared in the
7623  * external subset or in external parameter entities, a non-validating
7624  * processor is not obligated to read and process their declarations;
7625  * for such documents, the rule that an entity must be declared is a
7626  * well-formedness constraint only if standalone='yes'.
7627  *
7628  * [ WFC: Parsed Entity ]
7629  * An entity reference must not contain the name of an unparsed entity
7630  *
7631  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7632  * is updated to the current location in the string.
7633  */
7634 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7635 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7636     xmlChar *name;
7637     const xmlChar *ptr;
7638     xmlChar cur;
7639     xmlEntityPtr ent = NULL;
7640 
7641     if ((str == NULL) || (*str == NULL))
7642         return(NULL);
7643     ptr = *str;
7644     cur = *ptr;
7645     if (cur != '&')
7646 	return(NULL);
7647 
7648     ptr++;
7649     name = xmlParseStringName(ctxt, &ptr);
7650     if (name == NULL) {
7651 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7652 		       "xmlParseStringEntityRef: no name\n");
7653 	*str = ptr;
7654 	return(NULL);
7655     }
7656     if (*ptr != ';') {
7657 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7658         xmlFree(name);
7659 	*str = ptr;
7660 	return(NULL);
7661     }
7662     ptr++;
7663 
7664 
7665     /*
7666      * Predefined entities override any extra definition
7667      */
7668     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7669         ent = xmlGetPredefinedEntity(name);
7670         if (ent != NULL) {
7671             xmlFree(name);
7672             *str = ptr;
7673             return(ent);
7674         }
7675     }
7676 
7677     /*
7678      * Increate the number of entity references parsed
7679      */
7680     ctxt->nbentities++;
7681 
7682     /*
7683      * Ask first SAX for entity resolution, otherwise try the
7684      * entities which may have stored in the parser context.
7685      */
7686     if (ctxt->sax != NULL) {
7687 	if (ctxt->sax->getEntity != NULL)
7688 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7689 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7690 	    ent = xmlGetPredefinedEntity(name);
7691 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7692 	    ent = xmlSAX2GetEntity(ctxt, name);
7693 	}
7694     }
7695     if (ctxt->instate == XML_PARSER_EOF) {
7696 	xmlFree(name);
7697 	return(NULL);
7698     }
7699 
7700     /*
7701      * [ WFC: Entity Declared ]
7702      * In a document without any DTD, a document with only an
7703      * internal DTD subset which contains no parameter entity
7704      * references, or a document with "standalone='yes'", the
7705      * Name given in the entity reference must match that in an
7706      * entity declaration, except that well-formed documents
7707      * need not declare any of the following entities: amp, lt,
7708      * gt, apos, quot.
7709      * The declaration of a parameter entity must precede any
7710      * reference to it.
7711      * Similarly, the declaration of a general entity must
7712      * precede any reference to it which appears in a default
7713      * value in an attribute-list declaration. Note that if
7714      * entities are declared in the external subset or in
7715      * external parameter entities, a non-validating processor
7716      * is not obligated to read and process their declarations;
7717      * for such documents, the rule that an entity must be
7718      * declared is a well-formedness constraint only if
7719      * standalone='yes'.
7720      */
7721     if (ent == NULL) {
7722 	if ((ctxt->standalone == 1) ||
7723 	    ((ctxt->hasExternalSubset == 0) &&
7724 	     (ctxt->hasPErefs == 0))) {
7725 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7726 		     "Entity '%s' not defined\n", name);
7727 	} else {
7728 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7729 			  "Entity '%s' not defined\n",
7730 			  name);
7731 	}
7732 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7733 	/* TODO ? check regressions ctxt->valid = 0; */
7734     }
7735 
7736     /*
7737      * [ WFC: Parsed Entity ]
7738      * An entity reference must not contain the name of an
7739      * unparsed entity
7740      */
7741     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7742 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7743 		 "Entity reference to unparsed entity %s\n", name);
7744     }
7745 
7746     /*
7747      * [ WFC: No External Entity References ]
7748      * Attribute values cannot contain direct or indirect
7749      * entity references to external entities.
7750      */
7751     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7752 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7753 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7754 	 "Attribute references external entity '%s'\n", name);
7755     }
7756     /*
7757      * [ WFC: No < in Attribute Values ]
7758      * The replacement text of any entity referred to directly or
7759      * indirectly in an attribute value (other than "&lt;") must
7760      * not contain a <.
7761      */
7762     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7763 	     (ent != NULL) && (ent->content != NULL) &&
7764 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7765 	     (xmlStrchr(ent->content, '<'))) {
7766 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7767      "'<' in entity '%s' is not allowed in attributes values\n",
7768 			  name);
7769     }
7770 
7771     /*
7772      * Internal check, no parameter entities here ...
7773      */
7774     else {
7775 	switch (ent->etype) {
7776 	    case XML_INTERNAL_PARAMETER_ENTITY:
7777 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7778 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7779 	     "Attempt to reference the parameter entity '%s'\n",
7780 				  name);
7781 	    break;
7782 	    default:
7783 	    break;
7784 	}
7785     }
7786 
7787     /*
7788      * [ WFC: No Recursion ]
7789      * A parsed entity must not contain a recursive reference
7790      * to itself, either directly or indirectly.
7791      * Done somewhere else
7792      */
7793 
7794     xmlFree(name);
7795     *str = ptr;
7796     return(ent);
7797 }
7798 
7799 /**
7800  * xmlParsePEReference:
7801  * @ctxt:  an XML parser context
7802  *
7803  * parse PEReference declarations
7804  * The entity content is handled directly by pushing it's content as
7805  * a new input stream.
7806  *
7807  * [69] PEReference ::= '%' Name ';'
7808  *
7809  * [ WFC: No Recursion ]
7810  * A parsed entity must not contain a recursive
7811  * reference to itself, either directly or indirectly.
7812  *
7813  * [ WFC: Entity Declared ]
7814  * In a document without any DTD, a document with only an internal DTD
7815  * subset which contains no parameter entity references, or a document
7816  * with "standalone='yes'", ...  ... The declaration of a parameter
7817  * entity must precede any reference to it...
7818  *
7819  * [ VC: Entity Declared ]
7820  * In a document with an external subset or external parameter entities
7821  * with "standalone='no'", ...  ... The declaration of a parameter entity
7822  * must precede any reference to it...
7823  *
7824  * [ WFC: In DTD ]
7825  * Parameter-entity references may only appear in the DTD.
7826  * NOTE: misleading but this is handled.
7827  */
7828 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7829 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7830 {
7831     const xmlChar *name;
7832     xmlEntityPtr entity = NULL;
7833     xmlParserInputPtr input;
7834 
7835     if (RAW != '%')
7836         return;
7837     NEXT;
7838     name = xmlParseName(ctxt);
7839     if (name == NULL) {
7840 	xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7841 	return;
7842     }
7843     if (xmlParserDebugEntities)
7844 	xmlGenericError(xmlGenericErrorContext,
7845 		"PEReference: %s\n", name);
7846     if (RAW != ';') {
7847 	xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7848         return;
7849     }
7850 
7851     NEXT;
7852 
7853     /*
7854      * Increate the number of entity references parsed
7855      */
7856     ctxt->nbentities++;
7857 
7858     /*
7859      * Request the entity from SAX
7860      */
7861     if ((ctxt->sax != NULL) &&
7862 	(ctxt->sax->getParameterEntity != NULL))
7863 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7864     if (ctxt->instate == XML_PARSER_EOF)
7865 	return;
7866     if (entity == NULL) {
7867 	/*
7868 	 * [ WFC: Entity Declared ]
7869 	 * In a document without any DTD, a document with only an
7870 	 * internal DTD subset which contains no parameter entity
7871 	 * references, or a document with "standalone='yes'", ...
7872 	 * ... The declaration of a parameter entity must precede
7873 	 * any reference to it...
7874 	 */
7875 	if ((ctxt->standalone == 1) ||
7876 	    ((ctxt->hasExternalSubset == 0) &&
7877 	     (ctxt->hasPErefs == 0))) {
7878 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7879 			      "PEReference: %%%s; not found\n",
7880 			      name);
7881 	} else {
7882 	    /*
7883 	     * [ VC: Entity Declared ]
7884 	     * In a document with an external subset or external
7885 	     * parameter entities with "standalone='no'", ...
7886 	     * ... The declaration of a parameter entity must
7887 	     * precede any reference to it...
7888 	     */
7889             if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7890                 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7891                                  "PEReference: %%%s; not found\n",
7892                                  name, NULL);
7893             } else
7894                 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7895                               "PEReference: %%%s; not found\n",
7896                               name, NULL);
7897             ctxt->valid = 0;
7898 	}
7899 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
7900     } else {
7901 	/*
7902 	 * Internal checking in case the entity quest barfed
7903 	 */
7904 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7905 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7906 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7907 		  "Internal: %%%s; is not a parameter entity\n",
7908 			  name, NULL);
7909 	} else {
7910             xmlChar start[4];
7911             xmlCharEncoding enc;
7912 
7913 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7914 	        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7915 		((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7916 		((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7917 		((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7918 		(ctxt->replaceEntities == 0) &&
7919 		(ctxt->validate == 0))
7920 		return;
7921 
7922 	    input = xmlNewEntityInputStream(ctxt, entity);
7923 	    if (xmlPushInput(ctxt, input) < 0) {
7924                 xmlFreeInputStream(input);
7925 		return;
7926             }
7927 
7928 	    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7929                 /*
7930                  * Get the 4 first bytes and decode the charset
7931                  * if enc != XML_CHAR_ENCODING_NONE
7932                  * plug some encoding conversion routines.
7933                  * Note that, since we may have some non-UTF8
7934                  * encoding (like UTF16, bug 135229), the 'length'
7935                  * is not known, but we can calculate based upon
7936                  * the amount of data in the buffer.
7937                  */
7938                 GROW
7939                 if (ctxt->instate == XML_PARSER_EOF)
7940                     return;
7941                 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7942                     start[0] = RAW;
7943                     start[1] = NXT(1);
7944                     start[2] = NXT(2);
7945                     start[3] = NXT(3);
7946                     enc = xmlDetectCharEncoding(start, 4);
7947                     if (enc != XML_CHAR_ENCODING_NONE) {
7948                         xmlSwitchEncoding(ctxt, enc);
7949                     }
7950                 }
7951 
7952                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7953                     (IS_BLANK_CH(NXT(5)))) {
7954                     xmlParseTextDecl(ctxt);
7955                 }
7956             }
7957 	}
7958     }
7959     ctxt->hasPErefs = 1;
7960 }
7961 
7962 /**
7963  * xmlLoadEntityContent:
7964  * @ctxt:  an XML parser context
7965  * @entity: an unloaded system entity
7966  *
7967  * Load the original content of the given system entity from the
7968  * ExternalID/SystemID given. This is to be used for Included in Literal
7969  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7970  *
7971  * Returns 0 in case of success and -1 in case of failure
7972  */
7973 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7974 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7975     xmlParserInputPtr input;
7976     xmlBufferPtr buf;
7977     int l, c;
7978     int count = 0;
7979 
7980     if ((ctxt == NULL) || (entity == NULL) ||
7981         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7982 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7983 	(entity->content != NULL)) {
7984 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7985 	            "xmlLoadEntityContent parameter error");
7986         return(-1);
7987     }
7988 
7989     if (xmlParserDebugEntities)
7990 	xmlGenericError(xmlGenericErrorContext,
7991 		"Reading %s entity content input\n", entity->name);
7992 
7993     buf = xmlBufferCreate();
7994     if (buf == NULL) {
7995 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7996 	            "xmlLoadEntityContent parameter error");
7997         return(-1);
7998     }
7999 
8000     input = xmlNewEntityInputStream(ctxt, entity);
8001     if (input == NULL) {
8002 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8003 	            "xmlLoadEntityContent input error");
8004 	xmlBufferFree(buf);
8005         return(-1);
8006     }
8007 
8008     /*
8009      * Push the entity as the current input, read char by char
8010      * saving to the buffer until the end of the entity or an error
8011      */
8012     if (xmlPushInput(ctxt, input) < 0) {
8013         xmlBufferFree(buf);
8014 	return(-1);
8015     }
8016 
8017     GROW;
8018     c = CUR_CHAR(l);
8019     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8020            (IS_CHAR(c))) {
8021         xmlBufferAdd(buf, ctxt->input->cur, l);
8022 	if (count++ > XML_PARSER_CHUNK_SIZE) {
8023 	    count = 0;
8024 	    GROW;
8025             if (ctxt->instate == XML_PARSER_EOF) {
8026                 xmlBufferFree(buf);
8027                 return(-1);
8028             }
8029 	}
8030 	NEXTL(l);
8031 	c = CUR_CHAR(l);
8032 	if (c == 0) {
8033 	    count = 0;
8034 	    GROW;
8035             if (ctxt->instate == XML_PARSER_EOF) {
8036                 xmlBufferFree(buf);
8037                 return(-1);
8038             }
8039 	    c = CUR_CHAR(l);
8040 	}
8041     }
8042 
8043     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8044         xmlPopInput(ctxt);
8045     } else if (!IS_CHAR(c)) {
8046         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8047                           "xmlLoadEntityContent: invalid char value %d\n",
8048 	                  c);
8049 	xmlBufferFree(buf);
8050 	return(-1);
8051     }
8052     entity->content = buf->content;
8053     buf->content = NULL;
8054     xmlBufferFree(buf);
8055 
8056     return(0);
8057 }
8058 
8059 /**
8060  * xmlParseStringPEReference:
8061  * @ctxt:  an XML parser context
8062  * @str:  a pointer to an index in the string
8063  *
8064  * parse PEReference declarations
8065  *
8066  * [69] PEReference ::= '%' Name ';'
8067  *
8068  * [ WFC: No Recursion ]
8069  * A parsed entity must not contain a recursive
8070  * reference to itself, either directly or indirectly.
8071  *
8072  * [ WFC: Entity Declared ]
8073  * In a document without any DTD, a document with only an internal DTD
8074  * subset which contains no parameter entity references, or a document
8075  * with "standalone='yes'", ...  ... The declaration of a parameter
8076  * entity must precede any reference to it...
8077  *
8078  * [ VC: Entity Declared ]
8079  * In a document with an external subset or external parameter entities
8080  * with "standalone='no'", ...  ... The declaration of a parameter entity
8081  * must precede any reference to it...
8082  *
8083  * [ WFC: In DTD ]
8084  * Parameter-entity references may only appear in the DTD.
8085  * NOTE: misleading but this is handled.
8086  *
8087  * Returns the string of the entity content.
8088  *         str is updated to the current value of the index
8089  */
8090 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8091 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8092     const xmlChar *ptr;
8093     xmlChar cur;
8094     xmlChar *name;
8095     xmlEntityPtr entity = NULL;
8096 
8097     if ((str == NULL) || (*str == NULL)) return(NULL);
8098     ptr = *str;
8099     cur = *ptr;
8100     if (cur != '%')
8101         return(NULL);
8102     ptr++;
8103     name = xmlParseStringName(ctxt, &ptr);
8104     if (name == NULL) {
8105 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8106 		       "xmlParseStringPEReference: no name\n");
8107 	*str = ptr;
8108 	return(NULL);
8109     }
8110     cur = *ptr;
8111     if (cur != ';') {
8112 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8113 	xmlFree(name);
8114 	*str = ptr;
8115 	return(NULL);
8116     }
8117     ptr++;
8118 
8119     /*
8120      * Increate the number of entity references parsed
8121      */
8122     ctxt->nbentities++;
8123 
8124     /*
8125      * Request the entity from SAX
8126      */
8127     if ((ctxt->sax != NULL) &&
8128 	(ctxt->sax->getParameterEntity != NULL))
8129 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8130     if (ctxt->instate == XML_PARSER_EOF) {
8131 	xmlFree(name);
8132 	*str = ptr;
8133 	return(NULL);
8134     }
8135     if (entity == NULL) {
8136 	/*
8137 	 * [ WFC: Entity Declared ]
8138 	 * In a document without any DTD, a document with only an
8139 	 * internal DTD subset which contains no parameter entity
8140 	 * references, or a document with "standalone='yes'", ...
8141 	 * ... The declaration of a parameter entity must precede
8142 	 * any reference to it...
8143 	 */
8144 	if ((ctxt->standalone == 1) ||
8145 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8146 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8147 		 "PEReference: %%%s; not found\n", name);
8148 	} else {
8149 	    /*
8150 	     * [ VC: Entity Declared ]
8151 	     * In a document with an external subset or external
8152 	     * parameter entities with "standalone='no'", ...
8153 	     * ... The declaration of a parameter entity must
8154 	     * precede any reference to it...
8155 	     */
8156 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8157 			  "PEReference: %%%s; not found\n",
8158 			  name, NULL);
8159 	    ctxt->valid = 0;
8160 	}
8161 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8162     } else {
8163 	/*
8164 	 * Internal checking in case the entity quest barfed
8165 	 */
8166 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8167 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8168 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8169 			  "%%%s; is not a parameter entity\n",
8170 			  name, NULL);
8171 	}
8172     }
8173     ctxt->hasPErefs = 1;
8174     xmlFree(name);
8175     *str = ptr;
8176     return(entity);
8177 }
8178 
8179 /**
8180  * xmlParseDocTypeDecl:
8181  * @ctxt:  an XML parser context
8182  *
8183  * parse a DOCTYPE declaration
8184  *
8185  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8186  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8187  *
8188  * [ VC: Root Element Type ]
8189  * The Name in the document type declaration must match the element
8190  * type of the root element.
8191  */
8192 
8193 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8194 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8195     const xmlChar *name = NULL;
8196     xmlChar *ExternalID = NULL;
8197     xmlChar *URI = NULL;
8198 
8199     /*
8200      * We know that '<!DOCTYPE' has been detected.
8201      */
8202     SKIP(9);
8203 
8204     SKIP_BLANKS;
8205 
8206     /*
8207      * Parse the DOCTYPE name.
8208      */
8209     name = xmlParseName(ctxt);
8210     if (name == NULL) {
8211 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8212 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8213     }
8214     ctxt->intSubName = name;
8215 
8216     SKIP_BLANKS;
8217 
8218     /*
8219      * Check for SystemID and ExternalID
8220      */
8221     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8222 
8223     if ((URI != NULL) || (ExternalID != NULL)) {
8224         ctxt->hasExternalSubset = 1;
8225     }
8226     ctxt->extSubURI = URI;
8227     ctxt->extSubSystem = ExternalID;
8228 
8229     SKIP_BLANKS;
8230 
8231     /*
8232      * Create and update the internal subset.
8233      */
8234     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8235 	(!ctxt->disableSAX))
8236 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8237     if (ctxt->instate == XML_PARSER_EOF)
8238 	return;
8239 
8240     /*
8241      * Is there any internal subset declarations ?
8242      * they are handled separately in xmlParseInternalSubset()
8243      */
8244     if (RAW == '[')
8245 	return;
8246 
8247     /*
8248      * We should be at the end of the DOCTYPE declaration.
8249      */
8250     if (RAW != '>') {
8251 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8252     }
8253     NEXT;
8254 }
8255 
8256 /**
8257  * xmlParseInternalSubset:
8258  * @ctxt:  an XML parser context
8259  *
8260  * parse the internal subset declaration
8261  *
8262  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8263  */
8264 
8265 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8266 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8267     /*
8268      * Is there any DTD definition ?
8269      */
8270     if (RAW == '[') {
8271         int baseInputNr = ctxt->inputNr;
8272         ctxt->instate = XML_PARSER_DTD;
8273         NEXT;
8274 	/*
8275 	 * Parse the succession of Markup declarations and
8276 	 * PEReferences.
8277 	 * Subsequence (markupdecl | PEReference | S)*
8278 	 */
8279 	while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8280                (ctxt->instate != XML_PARSER_EOF)) {
8281 	    const xmlChar *check = CUR_PTR;
8282 	    unsigned int cons = ctxt->input->consumed;
8283 
8284 	    SKIP_BLANKS;
8285 	    xmlParseMarkupDecl(ctxt);
8286 	    xmlParsePEReference(ctxt);
8287 
8288 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8289 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8290 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8291                 if (ctxt->inputNr > baseInputNr)
8292                     xmlPopInput(ctxt);
8293                 else
8294 		    break;
8295 	    }
8296 	}
8297 	if (RAW == ']') {
8298 	    NEXT;
8299 	    SKIP_BLANKS;
8300 	}
8301     }
8302 
8303     /*
8304      * We should be at the end of the DOCTYPE declaration.
8305      */
8306     if (RAW != '>') {
8307 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8308 	return;
8309     }
8310     NEXT;
8311 }
8312 
8313 #ifdef LIBXML_SAX1_ENABLED
8314 /**
8315  * xmlParseAttribute:
8316  * @ctxt:  an XML parser context
8317  * @value:  a xmlChar ** used to store the value of the attribute
8318  *
8319  * parse an attribute
8320  *
8321  * [41] Attribute ::= Name Eq AttValue
8322  *
8323  * [ WFC: No External Entity References ]
8324  * Attribute values cannot contain direct or indirect entity references
8325  * to external entities.
8326  *
8327  * [ WFC: No < in Attribute Values ]
8328  * The replacement text of any entity referred to directly or indirectly in
8329  * an attribute value (other than "&lt;") must not contain a <.
8330  *
8331  * [ VC: Attribute Value Type ]
8332  * The attribute must have been declared; the value must be of the type
8333  * declared for it.
8334  *
8335  * [25] Eq ::= S? '=' S?
8336  *
8337  * With namespace:
8338  *
8339  * [NS 11] Attribute ::= QName Eq AttValue
8340  *
8341  * Also the case QName == xmlns:??? is handled independently as a namespace
8342  * definition.
8343  *
8344  * Returns the attribute name, and the value in *value.
8345  */
8346 
8347 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8348 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8349     const xmlChar *name;
8350     xmlChar *val;
8351 
8352     *value = NULL;
8353     GROW;
8354     name = xmlParseName(ctxt);
8355     if (name == NULL) {
8356 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8357 	               "error parsing attribute name\n");
8358         return(NULL);
8359     }
8360 
8361     /*
8362      * read the value
8363      */
8364     SKIP_BLANKS;
8365     if (RAW == '=') {
8366         NEXT;
8367 	SKIP_BLANKS;
8368 	val = xmlParseAttValue(ctxt);
8369 	ctxt->instate = XML_PARSER_CONTENT;
8370     } else {
8371 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8372 	       "Specification mandates value for attribute %s\n", name);
8373 	return(NULL);
8374     }
8375 
8376     /*
8377      * Check that xml:lang conforms to the specification
8378      * No more registered as an error, just generate a warning now
8379      * since this was deprecated in XML second edition
8380      */
8381     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8382 	if (!xmlCheckLanguageID(val)) {
8383 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8384 		          "Malformed value for xml:lang : %s\n",
8385 			  val, NULL);
8386 	}
8387     }
8388 
8389     /*
8390      * Check that xml:space conforms to the specification
8391      */
8392     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8393 	if (xmlStrEqual(val, BAD_CAST "default"))
8394 	    *(ctxt->space) = 0;
8395 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8396 	    *(ctxt->space) = 1;
8397 	else {
8398 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8399 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8400                                  val, NULL);
8401 	}
8402     }
8403 
8404     *value = val;
8405     return(name);
8406 }
8407 
8408 /**
8409  * xmlParseStartTag:
8410  * @ctxt:  an XML parser context
8411  *
8412  * parse a start of tag either for rule element or
8413  * EmptyElement. In both case we don't parse the tag closing chars.
8414  *
8415  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8416  *
8417  * [ WFC: Unique Att Spec ]
8418  * No attribute name may appear more than once in the same start-tag or
8419  * empty-element tag.
8420  *
8421  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8422  *
8423  * [ WFC: Unique Att Spec ]
8424  * No attribute name may appear more than once in the same start-tag or
8425  * empty-element tag.
8426  *
8427  * With namespace:
8428  *
8429  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8430  *
8431  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8432  *
8433  * Returns the element name parsed
8434  */
8435 
8436 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8437 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8438     const xmlChar *name;
8439     const xmlChar *attname;
8440     xmlChar *attvalue;
8441     const xmlChar **atts = ctxt->atts;
8442     int nbatts = 0;
8443     int maxatts = ctxt->maxatts;
8444     int i;
8445 
8446     if (RAW != '<') return(NULL);
8447     NEXT1;
8448 
8449     name = xmlParseName(ctxt);
8450     if (name == NULL) {
8451 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8452 	     "xmlParseStartTag: invalid element name\n");
8453         return(NULL);
8454     }
8455 
8456     /*
8457      * Now parse the attributes, it ends up with the ending
8458      *
8459      * (S Attribute)* S?
8460      */
8461     SKIP_BLANKS;
8462     GROW;
8463 
8464     while (((RAW != '>') &&
8465 	   ((RAW != '/') || (NXT(1) != '>')) &&
8466 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8467 	const xmlChar *q = CUR_PTR;
8468 	unsigned int cons = ctxt->input->consumed;
8469 
8470 	attname = xmlParseAttribute(ctxt, &attvalue);
8471         if ((attname != NULL) && (attvalue != NULL)) {
8472 	    /*
8473 	     * [ WFC: Unique Att Spec ]
8474 	     * No attribute name may appear more than once in the same
8475 	     * start-tag or empty-element tag.
8476 	     */
8477 	    for (i = 0; i < nbatts;i += 2) {
8478 	        if (xmlStrEqual(atts[i], attname)) {
8479 		    xmlErrAttributeDup(ctxt, NULL, attname);
8480 		    xmlFree(attvalue);
8481 		    goto failed;
8482 		}
8483 	    }
8484 	    /*
8485 	     * Add the pair to atts
8486 	     */
8487 	    if (atts == NULL) {
8488 	        maxatts = 22; /* allow for 10 attrs by default */
8489 	        atts = (const xmlChar **)
8490 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8491 		if (atts == NULL) {
8492 		    xmlErrMemory(ctxt, NULL);
8493 		    if (attvalue != NULL)
8494 			xmlFree(attvalue);
8495 		    goto failed;
8496 		}
8497 		ctxt->atts = atts;
8498 		ctxt->maxatts = maxatts;
8499 	    } else if (nbatts + 4 > maxatts) {
8500 	        const xmlChar **n;
8501 
8502 	        maxatts *= 2;
8503 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8504 					     maxatts * sizeof(const xmlChar *));
8505 		if (n == NULL) {
8506 		    xmlErrMemory(ctxt, NULL);
8507 		    if (attvalue != NULL)
8508 			xmlFree(attvalue);
8509 		    goto failed;
8510 		}
8511 		atts = n;
8512 		ctxt->atts = atts;
8513 		ctxt->maxatts = maxatts;
8514 	    }
8515 	    atts[nbatts++] = attname;
8516 	    atts[nbatts++] = attvalue;
8517 	    atts[nbatts] = NULL;
8518 	    atts[nbatts + 1] = NULL;
8519 	} else {
8520 	    if (attvalue != NULL)
8521 		xmlFree(attvalue);
8522 	}
8523 
8524 failed:
8525 
8526 	GROW
8527 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8528 	    break;
8529 	if (SKIP_BLANKS == 0) {
8530 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8531 			   "attributes construct error\n");
8532 	}
8533         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8534             (attname == NULL) && (attvalue == NULL)) {
8535 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8536 			   "xmlParseStartTag: problem parsing attributes\n");
8537 	    break;
8538 	}
8539 	SHRINK;
8540         GROW;
8541     }
8542 
8543     /*
8544      * SAX: Start of Element !
8545      */
8546     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8547 	(!ctxt->disableSAX)) {
8548 	if (nbatts > 0)
8549 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8550 	else
8551 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8552     }
8553 
8554     if (atts != NULL) {
8555         /* Free only the content strings */
8556         for (i = 1;i < nbatts;i+=2)
8557 	    if (atts[i] != NULL)
8558 	       xmlFree((xmlChar *) atts[i]);
8559     }
8560     return(name);
8561 }
8562 
8563 /**
8564  * xmlParseEndTag1:
8565  * @ctxt:  an XML parser context
8566  * @line:  line of the start tag
8567  * @nsNr:  number of namespaces on the start tag
8568  *
8569  * parse an end of tag
8570  *
8571  * [42] ETag ::= '</' Name S? '>'
8572  *
8573  * With namespace
8574  *
8575  * [NS 9] ETag ::= '</' QName S? '>'
8576  */
8577 
8578 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8579 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8580     const xmlChar *name;
8581 
8582     GROW;
8583     if ((RAW != '<') || (NXT(1) != '/')) {
8584 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8585 		       "xmlParseEndTag: '</' not found\n");
8586 	return;
8587     }
8588     SKIP(2);
8589 
8590     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8591 
8592     /*
8593      * We should definitely be at the ending "S? '>'" part
8594      */
8595     GROW;
8596     SKIP_BLANKS;
8597     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8598 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8599     } else
8600 	NEXT1;
8601 
8602     /*
8603      * [ WFC: Element Type Match ]
8604      * The Name in an element's end-tag must match the element type in the
8605      * start-tag.
8606      *
8607      */
8608     if (name != (xmlChar*)1) {
8609         if (name == NULL) name = BAD_CAST "unparseable";
8610         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8611 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8612 		                ctxt->name, line, name);
8613     }
8614 
8615     /*
8616      * SAX: End of Tag
8617      */
8618     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8619 	(!ctxt->disableSAX))
8620         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8621 
8622     namePop(ctxt);
8623     spacePop(ctxt);
8624     return;
8625 }
8626 
8627 /**
8628  * xmlParseEndTag:
8629  * @ctxt:  an XML parser context
8630  *
8631  * parse an end of tag
8632  *
8633  * [42] ETag ::= '</' Name S? '>'
8634  *
8635  * With namespace
8636  *
8637  * [NS 9] ETag ::= '</' QName S? '>'
8638  */
8639 
8640 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8641 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8642     xmlParseEndTag1(ctxt, 0);
8643 }
8644 #endif /* LIBXML_SAX1_ENABLED */
8645 
8646 /************************************************************************
8647  *									*
8648  *		      SAX 2 specific operations				*
8649  *									*
8650  ************************************************************************/
8651 
8652 /*
8653  * xmlGetNamespace:
8654  * @ctxt:  an XML parser context
8655  * @prefix:  the prefix to lookup
8656  *
8657  * Lookup the namespace name for the @prefix (which ca be NULL)
8658  * The prefix must come from the @ctxt->dict dictionary
8659  *
8660  * Returns the namespace name or NULL if not bound
8661  */
8662 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8663 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8664     int i;
8665 
8666     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8667     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8668         if (ctxt->nsTab[i] == prefix) {
8669 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8670 	        return(NULL);
8671 	    return(ctxt->nsTab[i + 1]);
8672 	}
8673     return(NULL);
8674 }
8675 
8676 /**
8677  * xmlParseQName:
8678  * @ctxt:  an XML parser context
8679  * @prefix:  pointer to store the prefix part
8680  *
8681  * parse an XML Namespace QName
8682  *
8683  * [6]  QName  ::= (Prefix ':')? LocalPart
8684  * [7]  Prefix  ::= NCName
8685  * [8]  LocalPart  ::= NCName
8686  *
8687  * Returns the Name parsed or NULL
8688  */
8689 
8690 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8691 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692     const xmlChar *l, *p;
8693 
8694     GROW;
8695 
8696     l = xmlParseNCName(ctxt);
8697     if (l == NULL) {
8698         if (CUR == ':') {
8699 	    l = xmlParseName(ctxt);
8700 	    if (l != NULL) {
8701 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8702 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8703 		*prefix = NULL;
8704 		return(l);
8705 	    }
8706 	}
8707         return(NULL);
8708     }
8709     if (CUR == ':') {
8710         NEXT;
8711 	p = l;
8712 	l = xmlParseNCName(ctxt);
8713 	if (l == NULL) {
8714 	    xmlChar *tmp;
8715 
8716             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8717 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8718 	    l = xmlParseNmtoken(ctxt);
8719 	    if (l == NULL)
8720 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8721 	    else {
8722 		tmp = xmlBuildQName(l, p, NULL, 0);
8723 		xmlFree((char *)l);
8724 	    }
8725 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8726 	    if (tmp != NULL) xmlFree(tmp);
8727 	    *prefix = NULL;
8728 	    return(p);
8729 	}
8730 	if (CUR == ':') {
8731 	    xmlChar *tmp;
8732 
8733             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8734 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8735 	    NEXT;
8736 	    tmp = (xmlChar *) xmlParseName(ctxt);
8737 	    if (tmp != NULL) {
8738 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8739 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8740 		if (tmp != NULL) xmlFree(tmp);
8741 		*prefix = p;
8742 		return(l);
8743 	    }
8744 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8745 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8746 	    if (tmp != NULL) xmlFree(tmp);
8747 	    *prefix = p;
8748 	    return(l);
8749 	}
8750 	*prefix = p;
8751     } else
8752         *prefix = NULL;
8753     return(l);
8754 }
8755 
8756 /**
8757  * xmlParseQNameAndCompare:
8758  * @ctxt:  an XML parser context
8759  * @name:  the localname
8760  * @prefix:  the prefix, if any.
8761  *
8762  * parse an XML name and compares for match
8763  * (specialized for endtag parsing)
8764  *
8765  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8766  * and the name for mismatch
8767  */
8768 
8769 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8770 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8771                         xmlChar const *prefix) {
8772     const xmlChar *cmp;
8773     const xmlChar *in;
8774     const xmlChar *ret;
8775     const xmlChar *prefix2;
8776 
8777     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8778 
8779     GROW;
8780     in = ctxt->input->cur;
8781 
8782     cmp = prefix;
8783     while (*in != 0 && *in == *cmp) {
8784 	++in;
8785 	++cmp;
8786     }
8787     if ((*cmp == 0) && (*in == ':')) {
8788         in++;
8789 	cmp = name;
8790 	while (*in != 0 && *in == *cmp) {
8791 	    ++in;
8792 	    ++cmp;
8793 	}
8794 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8795 	    /* success */
8796 	    ctxt->input->cur = in;
8797 	    return((const xmlChar*) 1);
8798 	}
8799     }
8800     /*
8801      * all strings coms from the dictionary, equality can be done directly
8802      */
8803     ret = xmlParseQName (ctxt, &prefix2);
8804     if ((ret == name) && (prefix == prefix2))
8805 	return((const xmlChar*) 1);
8806     return ret;
8807 }
8808 
8809 /**
8810  * xmlParseAttValueInternal:
8811  * @ctxt:  an XML parser context
8812  * @len:  attribute len result
8813  * @alloc:  whether the attribute was reallocated as a new string
8814  * @normalize:  if 1 then further non-CDATA normalization must be done
8815  *
8816  * parse a value for an attribute.
8817  * NOTE: if no normalization is needed, the routine will return pointers
8818  *       directly from the data buffer.
8819  *
8820  * 3.3.3 Attribute-Value Normalization:
8821  * Before the value of an attribute is passed to the application or
8822  * checked for validity, the XML processor must normalize it as follows:
8823  * - a character reference is processed by appending the referenced
8824  *   character to the attribute value
8825  * - an entity reference is processed by recursively processing the
8826  *   replacement text of the entity
8827  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8828  *   appending #x20 to the normalized value, except that only a single
8829  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8830  *   parsed entity or the literal entity value of an internal parsed entity
8831  * - other characters are processed by appending them to the normalized value
8832  * If the declared value is not CDATA, then the XML processor must further
8833  * process the normalized attribute value by discarding any leading and
8834  * trailing space (#x20) characters, and by replacing sequences of space
8835  * (#x20) characters by a single space (#x20) character.
8836  * All attributes for which no declaration has been read should be treated
8837  * by a non-validating parser as if declared CDATA.
8838  *
8839  * Returns the AttValue parsed or NULL. The value has to be freed by the
8840  *     caller if it was copied, this can be detected by val[*len] == 0.
8841  */
8842 
8843 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8844 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8845                          int normalize)
8846 {
8847     xmlChar limit = 0;
8848     const xmlChar *in = NULL, *start, *end, *last;
8849     xmlChar *ret = NULL;
8850     int line, col;
8851 
8852     GROW;
8853     in = (xmlChar *) CUR_PTR;
8854     line = ctxt->input->line;
8855     col = ctxt->input->col;
8856     if (*in != '"' && *in != '\'') {
8857         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8858         return (NULL);
8859     }
8860     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8861 
8862     /*
8863      * try to handle in this routine the most common case where no
8864      * allocation of a new string is required and where content is
8865      * pure ASCII.
8866      */
8867     limit = *in++;
8868     col++;
8869     end = ctxt->input->end;
8870     start = in;
8871     if (in >= end) {
8872         const xmlChar *oldbase = ctxt->input->base;
8873 	GROW;
8874 	if (oldbase != ctxt->input->base) {
8875 	    long delta = ctxt->input->base - oldbase;
8876 	    start = start + delta;
8877 	    in = in + delta;
8878 	}
8879 	end = ctxt->input->end;
8880     }
8881     if (normalize) {
8882         /*
8883 	 * Skip any leading spaces
8884 	 */
8885 	while ((in < end) && (*in != limit) &&
8886 	       ((*in == 0x20) || (*in == 0x9) ||
8887 	        (*in == 0xA) || (*in == 0xD))) {
8888 	    if (*in == 0xA) {
8889 	        line++; col = 1;
8890 	    } else {
8891 	        col++;
8892 	    }
8893 	    in++;
8894 	    start = in;
8895 	    if (in >= end) {
8896 		const xmlChar *oldbase = ctxt->input->base;
8897 		GROW;
8898                 if (ctxt->instate == XML_PARSER_EOF)
8899                     return(NULL);
8900 		if (oldbase != ctxt->input->base) {
8901 		    long delta = ctxt->input->base - oldbase;
8902 		    start = start + delta;
8903 		    in = in + delta;
8904 		}
8905 		end = ctxt->input->end;
8906                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8907                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8908                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8909                                    "AttValue length too long\n");
8910                     return(NULL);
8911                 }
8912 	    }
8913 	}
8914 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8915 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8916 	    col++;
8917 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
8918 	    if (in >= end) {
8919 		const xmlChar *oldbase = ctxt->input->base;
8920 		GROW;
8921                 if (ctxt->instate == XML_PARSER_EOF)
8922                     return(NULL);
8923 		if (oldbase != ctxt->input->base) {
8924 		    long delta = ctxt->input->base - oldbase;
8925 		    start = start + delta;
8926 		    in = in + delta;
8927 		}
8928 		end = ctxt->input->end;
8929                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8930                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8931                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8932                                    "AttValue length too long\n");
8933                     return(NULL);
8934                 }
8935 	    }
8936 	}
8937 	last = in;
8938 	/*
8939 	 * skip the trailing blanks
8940 	 */
8941 	while ((last[-1] == 0x20) && (last > start)) last--;
8942 	while ((in < end) && (*in != limit) &&
8943 	       ((*in == 0x20) || (*in == 0x9) ||
8944 	        (*in == 0xA) || (*in == 0xD))) {
8945 	    if (*in == 0xA) {
8946 	        line++, col = 1;
8947 	    } else {
8948 	        col++;
8949 	    }
8950 	    in++;
8951 	    if (in >= end) {
8952 		const xmlChar *oldbase = ctxt->input->base;
8953 		GROW;
8954                 if (ctxt->instate == XML_PARSER_EOF)
8955                     return(NULL);
8956 		if (oldbase != ctxt->input->base) {
8957 		    long delta = ctxt->input->base - oldbase;
8958 		    start = start + delta;
8959 		    in = in + delta;
8960 		    last = last + delta;
8961 		}
8962 		end = ctxt->input->end;
8963                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8964                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8965                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8966                                    "AttValue length too long\n");
8967                     return(NULL);
8968                 }
8969 	    }
8970 	}
8971         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8972             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8973             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8974                            "AttValue length too long\n");
8975             return(NULL);
8976         }
8977 	if (*in != limit) goto need_complex;
8978     } else {
8979 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8980 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8981 	    in++;
8982 	    col++;
8983 	    if (in >= end) {
8984 		const xmlChar *oldbase = ctxt->input->base;
8985 		GROW;
8986                 if (ctxt->instate == XML_PARSER_EOF)
8987                     return(NULL);
8988 		if (oldbase != ctxt->input->base) {
8989 		    long delta = ctxt->input->base - oldbase;
8990 		    start = start + delta;
8991 		    in = in + delta;
8992 		}
8993 		end = ctxt->input->end;
8994                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8995                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8996                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8997                                    "AttValue length too long\n");
8998                     return(NULL);
8999                 }
9000 	    }
9001 	}
9002 	last = in;
9003         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9004             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9005             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9006                            "AttValue length too long\n");
9007             return(NULL);
9008         }
9009 	if (*in != limit) goto need_complex;
9010     }
9011     in++;
9012     col++;
9013     if (len != NULL) {
9014         *len = last - start;
9015         ret = (xmlChar *) start;
9016     } else {
9017         if (alloc) *alloc = 1;
9018         ret = xmlStrndup(start, last - start);
9019     }
9020     CUR_PTR = in;
9021     ctxt->input->line = line;
9022     ctxt->input->col = col;
9023     if (alloc) *alloc = 0;
9024     return ret;
9025 need_complex:
9026     if (alloc) *alloc = 1;
9027     return xmlParseAttValueComplex(ctxt, len, normalize);
9028 }
9029 
9030 /**
9031  * xmlParseAttribute2:
9032  * @ctxt:  an XML parser context
9033  * @pref:  the element prefix
9034  * @elem:  the element name
9035  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9036  * @value:  a xmlChar ** used to store the value of the attribute
9037  * @len:  an int * to save the length of the attribute
9038  * @alloc:  an int * to indicate if the attribute was allocated
9039  *
9040  * parse an attribute in the new SAX2 framework.
9041  *
9042  * Returns the attribute name, and the value in *value, .
9043  */
9044 
9045 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9046 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9047                    const xmlChar * pref, const xmlChar * elem,
9048                    const xmlChar ** prefix, xmlChar ** value,
9049                    int *len, int *alloc)
9050 {
9051     const xmlChar *name;
9052     xmlChar *val, *internal_val = NULL;
9053     int normalize = 0;
9054 
9055     *value = NULL;
9056     GROW;
9057     name = xmlParseQName(ctxt, prefix);
9058     if (name == NULL) {
9059         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9060                        "error parsing attribute name\n");
9061         return (NULL);
9062     }
9063 
9064     /*
9065      * get the type if needed
9066      */
9067     if (ctxt->attsSpecial != NULL) {
9068         int type;
9069 
9070         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9071                                                  pref, elem, *prefix, name);
9072         if (type != 0)
9073             normalize = 1;
9074     }
9075 
9076     /*
9077      * read the value
9078      */
9079     SKIP_BLANKS;
9080     if (RAW == '=') {
9081         NEXT;
9082         SKIP_BLANKS;
9083         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9084 	if (normalize) {
9085 	    /*
9086 	     * Sometimes a second normalisation pass for spaces is needed
9087 	     * but that only happens if charrefs or entities refernces
9088 	     * have been used in the attribute value, i.e. the attribute
9089 	     * value have been extracted in an allocated string already.
9090 	     */
9091 	    if (*alloc) {
9092 	        const xmlChar *val2;
9093 
9094 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9095 		if ((val2 != NULL) && (val2 != val)) {
9096 		    xmlFree(val);
9097 		    val = (xmlChar *) val2;
9098 		}
9099 	    }
9100 	}
9101         ctxt->instate = XML_PARSER_CONTENT;
9102     } else {
9103         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9104                           "Specification mandates value for attribute %s\n",
9105                           name);
9106         return (NULL);
9107     }
9108 
9109     if (*prefix == ctxt->str_xml) {
9110         /*
9111          * Check that xml:lang conforms to the specification
9112          * No more registered as an error, just generate a warning now
9113          * since this was deprecated in XML second edition
9114          */
9115         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9116             internal_val = xmlStrndup(val, *len);
9117             if (!xmlCheckLanguageID(internal_val)) {
9118                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9119                               "Malformed value for xml:lang : %s\n",
9120                               internal_val, NULL);
9121             }
9122         }
9123 
9124         /*
9125          * Check that xml:space conforms to the specification
9126          */
9127         if (xmlStrEqual(name, BAD_CAST "space")) {
9128             internal_val = xmlStrndup(val, *len);
9129             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9130                 *(ctxt->space) = 0;
9131             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9132                 *(ctxt->space) = 1;
9133             else {
9134                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9135                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9136                               internal_val, NULL);
9137             }
9138         }
9139         if (internal_val) {
9140             xmlFree(internal_val);
9141         }
9142     }
9143 
9144     *value = val;
9145     return (name);
9146 }
9147 /**
9148  * xmlParseStartTag2:
9149  * @ctxt:  an XML parser context
9150  *
9151  * parse a start of tag either for rule element or
9152  * EmptyElement. In both case we don't parse the tag closing chars.
9153  * This routine is called when running SAX2 parsing
9154  *
9155  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9156  *
9157  * [ WFC: Unique Att Spec ]
9158  * No attribute name may appear more than once in the same start-tag or
9159  * empty-element tag.
9160  *
9161  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9162  *
9163  * [ WFC: Unique Att Spec ]
9164  * No attribute name may appear more than once in the same start-tag or
9165  * empty-element tag.
9166  *
9167  * With namespace:
9168  *
9169  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9170  *
9171  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9172  *
9173  * Returns the element name parsed
9174  */
9175 
9176 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9177 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9178                   const xmlChar **URI, int *tlen) {
9179     const xmlChar *localname;
9180     const xmlChar *prefix;
9181     const xmlChar *attname;
9182     const xmlChar *aprefix;
9183     const xmlChar *nsname;
9184     xmlChar *attvalue;
9185     const xmlChar **atts = ctxt->atts;
9186     int maxatts = ctxt->maxatts;
9187     int nratts, nbatts, nbdef, inputid;
9188     int i, j, nbNs, attval;
9189     unsigned long cur;
9190     int nsNr = ctxt->nsNr;
9191 
9192     if (RAW != '<') return(NULL);
9193     NEXT1;
9194 
9195     /*
9196      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9197      *       point since the attribute values may be stored as pointers to
9198      *       the buffer and calling SHRINK would destroy them !
9199      *       The Shrinking is only possible once the full set of attribute
9200      *       callbacks have been done.
9201      */
9202     SHRINK;
9203     cur = ctxt->input->cur - ctxt->input->base;
9204     inputid = ctxt->input->id;
9205     nbatts = 0;
9206     nratts = 0;
9207     nbdef = 0;
9208     nbNs = 0;
9209     attval = 0;
9210     /* Forget any namespaces added during an earlier parse of this element. */
9211     ctxt->nsNr = nsNr;
9212 
9213     localname = xmlParseQName(ctxt, &prefix);
9214     if (localname == NULL) {
9215 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9216 		       "StartTag: invalid element name\n");
9217         return(NULL);
9218     }
9219     *tlen = ctxt->input->cur - ctxt->input->base - cur;
9220 
9221     /*
9222      * Now parse the attributes, it ends up with the ending
9223      *
9224      * (S Attribute)* S?
9225      */
9226     SKIP_BLANKS;
9227     GROW;
9228 
9229     while (((RAW != '>') &&
9230 	   ((RAW != '/') || (NXT(1) != '>')) &&
9231 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9232 	const xmlChar *q = CUR_PTR;
9233 	unsigned int cons = ctxt->input->consumed;
9234 	int len = -1, alloc = 0;
9235 
9236 	attname = xmlParseAttribute2(ctxt, prefix, localname,
9237 	                             &aprefix, &attvalue, &len, &alloc);
9238         if ((attname == NULL) || (attvalue == NULL))
9239             goto next_attr;
9240 	if (len < 0) len = xmlStrlen(attvalue);
9241 
9242         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9243             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9244             xmlURIPtr uri;
9245 
9246             if (URL == NULL) {
9247                 xmlErrMemory(ctxt, "dictionary allocation failure");
9248                 if ((attvalue != NULL) && (alloc != 0))
9249                     xmlFree(attvalue);
9250                 return(NULL);
9251             }
9252             if (*URL != 0) {
9253                 uri = xmlParseURI((const char *) URL);
9254                 if (uri == NULL) {
9255                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9256                              "xmlns: '%s' is not a valid URI\n",
9257                                        URL, NULL, NULL);
9258                 } else {
9259                     if (uri->scheme == NULL) {
9260                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9261                                   "xmlns: URI %s is not absolute\n",
9262                                   URL, NULL, NULL);
9263                     }
9264                     xmlFreeURI(uri);
9265                 }
9266                 if (URL == ctxt->str_xml_ns) {
9267                     if (attname != ctxt->str_xml) {
9268                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9269                      "xml namespace URI cannot be the default namespace\n",
9270                                  NULL, NULL, NULL);
9271                     }
9272                     goto next_attr;
9273                 }
9274                 if ((len == 29) &&
9275                     (xmlStrEqual(URL,
9276                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9277                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9278                          "reuse of the xmlns namespace name is forbidden\n",
9279                              NULL, NULL, NULL);
9280                     goto next_attr;
9281                 }
9282             }
9283             /*
9284              * check that it's not a defined namespace
9285              */
9286             for (j = 1;j <= nbNs;j++)
9287                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9288                     break;
9289             if (j <= nbNs)
9290                 xmlErrAttributeDup(ctxt, NULL, attname);
9291             else
9292                 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9293 
9294         } else if (aprefix == ctxt->str_xmlns) {
9295             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9296             xmlURIPtr uri;
9297 
9298             if (attname == ctxt->str_xml) {
9299                 if (URL != ctxt->str_xml_ns) {
9300                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9301                              "xml namespace prefix mapped to wrong URI\n",
9302                              NULL, NULL, NULL);
9303                 }
9304                 /*
9305                  * Do not keep a namespace definition node
9306                  */
9307                 goto next_attr;
9308             }
9309             if (URL == ctxt->str_xml_ns) {
9310                 if (attname != ctxt->str_xml) {
9311                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9312                              "xml namespace URI mapped to wrong prefix\n",
9313                              NULL, NULL, NULL);
9314                 }
9315                 goto next_attr;
9316             }
9317             if (attname == ctxt->str_xmlns) {
9318                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9319                          "redefinition of the xmlns prefix is forbidden\n",
9320                          NULL, NULL, NULL);
9321                 goto next_attr;
9322             }
9323             if ((len == 29) &&
9324                 (xmlStrEqual(URL,
9325                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9326                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9327                          "reuse of the xmlns namespace name is forbidden\n",
9328                          NULL, NULL, NULL);
9329                 goto next_attr;
9330             }
9331             if ((URL == NULL) || (URL[0] == 0)) {
9332                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9333                          "xmlns:%s: Empty XML namespace is not allowed\n",
9334                               attname, NULL, NULL);
9335                 goto next_attr;
9336             } else {
9337                 uri = xmlParseURI((const char *) URL);
9338                 if (uri == NULL) {
9339                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9340                          "xmlns:%s: '%s' is not a valid URI\n",
9341                                        attname, URL, NULL);
9342                 } else {
9343                     if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9344                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9345                                   "xmlns:%s: URI %s is not absolute\n",
9346                                   attname, URL, NULL);
9347                     }
9348                     xmlFreeURI(uri);
9349                 }
9350             }
9351 
9352             /*
9353              * check that it's not a defined namespace
9354              */
9355             for (j = 1;j <= nbNs;j++)
9356                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9357                     break;
9358             if (j <= nbNs)
9359                 xmlErrAttributeDup(ctxt, aprefix, attname);
9360             else
9361                 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9362 
9363         } else {
9364             /*
9365              * Add the pair to atts
9366              */
9367             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9368                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9369                     goto next_attr;
9370                 }
9371                 maxatts = ctxt->maxatts;
9372                 atts = ctxt->atts;
9373             }
9374             ctxt->attallocs[nratts++] = alloc;
9375             atts[nbatts++] = attname;
9376             atts[nbatts++] = aprefix;
9377             /*
9378              * The namespace URI field is used temporarily to point at the
9379              * base of the current input buffer for non-alloced attributes.
9380              * When the input buffer is reallocated, all the pointers become
9381              * invalid, but they can be reconstructed later.
9382              */
9383             if (alloc)
9384                 atts[nbatts++] = NULL;
9385             else
9386                 atts[nbatts++] = ctxt->input->base;
9387             atts[nbatts++] = attvalue;
9388             attvalue += len;
9389             atts[nbatts++] = attvalue;
9390             /*
9391              * tag if some deallocation is needed
9392              */
9393             if (alloc != 0) attval = 1;
9394             attvalue = NULL; /* moved into atts */
9395         }
9396 
9397 next_attr:
9398         if ((attvalue != NULL) && (alloc != 0)) {
9399             xmlFree(attvalue);
9400             attvalue = NULL;
9401         }
9402 
9403 	GROW
9404         if (ctxt->instate == XML_PARSER_EOF)
9405             break;
9406 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9407 	    break;
9408 	if (SKIP_BLANKS == 0) {
9409 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9410 			   "attributes construct error\n");
9411 	    break;
9412 	}
9413         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9414             (attname == NULL) && (attvalue == NULL)) {
9415 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9416 	         "xmlParseStartTag: problem parsing attributes\n");
9417 	    break;
9418 	}
9419         GROW;
9420     }
9421 
9422     if (ctxt->input->id != inputid) {
9423         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9424                     "Unexpected change of input\n");
9425         localname = NULL;
9426         goto done;
9427     }
9428 
9429     /* Reconstruct attribute value pointers. */
9430     for (i = 0, j = 0; j < nratts; i += 5, j++) {
9431         if (atts[i+2] != NULL) {
9432             /*
9433              * Arithmetic on dangling pointers is technically undefined
9434              * behavior, but well...
9435              */
9436             ptrdiff_t offset = ctxt->input->base - atts[i+2];
9437             atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9438             atts[i+3] += offset;  /* value */
9439             atts[i+4] += offset;  /* valuend */
9440         }
9441     }
9442 
9443     /*
9444      * The attributes defaulting
9445      */
9446     if (ctxt->attsDefault != NULL) {
9447         xmlDefAttrsPtr defaults;
9448 
9449 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9450 	if (defaults != NULL) {
9451 	    for (i = 0;i < defaults->nbAttrs;i++) {
9452 	        attname = defaults->values[5 * i];
9453 		aprefix = defaults->values[5 * i + 1];
9454 
9455                 /*
9456 		 * special work for namespaces defaulted defs
9457 		 */
9458 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9459 		    /*
9460 		     * check that it's not a defined namespace
9461 		     */
9462 		    for (j = 1;j <= nbNs;j++)
9463 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9464 			    break;
9465 	            if (j <= nbNs) continue;
9466 
9467 		    nsname = xmlGetNamespace(ctxt, NULL);
9468 		    if (nsname != defaults->values[5 * i + 2]) {
9469 			if (nsPush(ctxt, NULL,
9470 			           defaults->values[5 * i + 2]) > 0)
9471 			    nbNs++;
9472 		    }
9473 		} else if (aprefix == ctxt->str_xmlns) {
9474 		    /*
9475 		     * check that it's not a defined namespace
9476 		     */
9477 		    for (j = 1;j <= nbNs;j++)
9478 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9479 			    break;
9480 	            if (j <= nbNs) continue;
9481 
9482 		    nsname = xmlGetNamespace(ctxt, attname);
9483 		    if (nsname != defaults->values[2]) {
9484 			if (nsPush(ctxt, attname,
9485 			           defaults->values[5 * i + 2]) > 0)
9486 			    nbNs++;
9487 		    }
9488 		} else {
9489 		    /*
9490 		     * check that it's not a defined attribute
9491 		     */
9492 		    for (j = 0;j < nbatts;j+=5) {
9493 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9494 			    break;
9495 		    }
9496 		    if (j < nbatts) continue;
9497 
9498 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9499 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9500 			    return(NULL);
9501 			}
9502 			maxatts = ctxt->maxatts;
9503 			atts = ctxt->atts;
9504 		    }
9505 		    atts[nbatts++] = attname;
9506 		    atts[nbatts++] = aprefix;
9507 		    if (aprefix == NULL)
9508 			atts[nbatts++] = NULL;
9509 		    else
9510 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9511 		    atts[nbatts++] = defaults->values[5 * i + 2];
9512 		    atts[nbatts++] = defaults->values[5 * i + 3];
9513 		    if ((ctxt->standalone == 1) &&
9514 		        (defaults->values[5 * i + 4] != NULL)) {
9515 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9516 	  "standalone: attribute %s on %s defaulted from external subset\n",
9517 	                                 attname, localname);
9518 		    }
9519 		    nbdef++;
9520 		}
9521 	    }
9522 	}
9523     }
9524 
9525     /*
9526      * The attributes checkings
9527      */
9528     for (i = 0; i < nbatts;i += 5) {
9529         /*
9530 	* The default namespace does not apply to attribute names.
9531 	*/
9532 	if (atts[i + 1] != NULL) {
9533 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9534 	    if (nsname == NULL) {
9535 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9536 		    "Namespace prefix %s for %s on %s is not defined\n",
9537 		    atts[i + 1], atts[i], localname);
9538 	    }
9539 	    atts[i + 2] = nsname;
9540 	} else
9541 	    nsname = NULL;
9542 	/*
9543 	 * [ WFC: Unique Att Spec ]
9544 	 * No attribute name may appear more than once in the same
9545 	 * start-tag or empty-element tag.
9546 	 * As extended by the Namespace in XML REC.
9547 	 */
9548         for (j = 0; j < i;j += 5) {
9549 	    if (atts[i] == atts[j]) {
9550 	        if (atts[i+1] == atts[j+1]) {
9551 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9552 		    break;
9553 		}
9554 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9555 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9556 			     "Namespaced Attribute %s in '%s' redefined\n",
9557 			     atts[i], nsname, NULL);
9558 		    break;
9559 		}
9560 	    }
9561 	}
9562     }
9563 
9564     nsname = xmlGetNamespace(ctxt, prefix);
9565     if ((prefix != NULL) && (nsname == NULL)) {
9566 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9567 	         "Namespace prefix %s on %s is not defined\n",
9568 		 prefix, localname, NULL);
9569     }
9570     *pref = prefix;
9571     *URI = nsname;
9572 
9573     /*
9574      * SAX: Start of Element !
9575      */
9576     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9577 	(!ctxt->disableSAX)) {
9578 	if (nbNs > 0)
9579 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9580 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9581 			  nbatts / 5, nbdef, atts);
9582 	else
9583 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9584 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9585     }
9586 
9587 done:
9588     /*
9589      * Free up attribute allocated strings if needed
9590      */
9591     if (attval != 0) {
9592 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9593 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9594 	        xmlFree((xmlChar *) atts[i]);
9595     }
9596 
9597     return(localname);
9598 }
9599 
9600 /**
9601  * xmlParseEndTag2:
9602  * @ctxt:  an XML parser context
9603  * @line:  line of the start tag
9604  * @nsNr:  number of namespaces on the start tag
9605  *
9606  * parse an end of tag
9607  *
9608  * [42] ETag ::= '</' Name S? '>'
9609  *
9610  * With namespace
9611  *
9612  * [NS 9] ETag ::= '</' QName S? '>'
9613  */
9614 
9615 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9616 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9617                 const xmlChar *URI, int line, int nsNr, int tlen) {
9618     const xmlChar *name;
9619     size_t curLength;
9620 
9621     GROW;
9622     if ((RAW != '<') || (NXT(1) != '/')) {
9623 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9624 	return;
9625     }
9626     SKIP(2);
9627 
9628     curLength = ctxt->input->end - ctxt->input->cur;
9629     if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9630         (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9631         if ((curLength >= (size_t)(tlen + 1)) &&
9632 	    (ctxt->input->cur[tlen] == '>')) {
9633 	    ctxt->input->cur += tlen + 1;
9634 	    ctxt->input->col += tlen + 1;
9635 	    goto done;
9636 	}
9637 	ctxt->input->cur += tlen;
9638 	ctxt->input->col += tlen;
9639 	name = (xmlChar*)1;
9640     } else {
9641 	if (prefix == NULL)
9642 	    name = xmlParseNameAndCompare(ctxt, ctxt->name);
9643 	else
9644 	    name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9645     }
9646 
9647     /*
9648      * We should definitely be at the ending "S? '>'" part
9649      */
9650     GROW;
9651     if (ctxt->instate == XML_PARSER_EOF)
9652         return;
9653     SKIP_BLANKS;
9654     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9655 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9656     } else
9657 	NEXT1;
9658 
9659     /*
9660      * [ WFC: Element Type Match ]
9661      * The Name in an element's end-tag must match the element type in the
9662      * start-tag.
9663      *
9664      */
9665     if (name != (xmlChar*)1) {
9666         if (name == NULL) name = BAD_CAST "unparseable";
9667         if ((line == 0) && (ctxt->node != NULL))
9668             line = ctxt->node->line;
9669         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9670 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9671 		                ctxt->name, line, name);
9672     }
9673 
9674     /*
9675      * SAX: End of Tag
9676      */
9677 done:
9678     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9679 	(!ctxt->disableSAX))
9680 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9681 
9682     spacePop(ctxt);
9683     if (nsNr != 0)
9684 	nsPop(ctxt, nsNr);
9685     return;
9686 }
9687 
9688 /**
9689  * xmlParseCDSect:
9690  * @ctxt:  an XML parser context
9691  *
9692  * Parse escaped pure raw content.
9693  *
9694  * [18] CDSect ::= CDStart CData CDEnd
9695  *
9696  * [19] CDStart ::= '<![CDATA['
9697  *
9698  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9699  *
9700  * [21] CDEnd ::= ']]>'
9701  */
9702 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9703 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9704     xmlChar *buf = NULL;
9705     int len = 0;
9706     int size = XML_PARSER_BUFFER_SIZE;
9707     int r, rl;
9708     int	s, sl;
9709     int cur, l;
9710     int count = 0;
9711 
9712     /* Check 2.6.0 was NXT(0) not RAW */
9713     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9714 	SKIP(9);
9715     } else
9716         return;
9717 
9718     ctxt->instate = XML_PARSER_CDATA_SECTION;
9719     r = CUR_CHAR(rl);
9720     if (!IS_CHAR(r)) {
9721 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9722 	ctxt->instate = XML_PARSER_CONTENT;
9723         return;
9724     }
9725     NEXTL(rl);
9726     s = CUR_CHAR(sl);
9727     if (!IS_CHAR(s)) {
9728 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9729 	ctxt->instate = XML_PARSER_CONTENT;
9730         return;
9731     }
9732     NEXTL(sl);
9733     cur = CUR_CHAR(l);
9734     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9735     if (buf == NULL) {
9736 	xmlErrMemory(ctxt, NULL);
9737 	return;
9738     }
9739     while (IS_CHAR(cur) &&
9740            ((r != ']') || (s != ']') || (cur != '>'))) {
9741 	if (len + 5 >= size) {
9742 	    xmlChar *tmp;
9743 
9744             if ((size > XML_MAX_TEXT_LENGTH) &&
9745                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9746                 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9747                              "CData section too big found", NULL);
9748                 xmlFree (buf);
9749                 return;
9750             }
9751 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9752 	    if (tmp == NULL) {
9753 	        xmlFree(buf);
9754 		xmlErrMemory(ctxt, NULL);
9755 		return;
9756 	    }
9757 	    buf = tmp;
9758 	    size *= 2;
9759 	}
9760 	COPY_BUF(rl,buf,len,r);
9761 	r = s;
9762 	rl = sl;
9763 	s = cur;
9764 	sl = l;
9765 	count++;
9766 	if (count > 50) {
9767 	    GROW;
9768             if (ctxt->instate == XML_PARSER_EOF) {
9769 		xmlFree(buf);
9770 		return;
9771             }
9772 	    count = 0;
9773 	}
9774 	NEXTL(l);
9775 	cur = CUR_CHAR(l);
9776     }
9777     buf[len] = 0;
9778     ctxt->instate = XML_PARSER_CONTENT;
9779     if (cur != '>') {
9780 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9781 	                     "CData section not finished\n%.50s\n", buf);
9782 	xmlFree(buf);
9783         return;
9784     }
9785     NEXTL(l);
9786 
9787     /*
9788      * OK the buffer is to be consumed as cdata.
9789      */
9790     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9791 	if (ctxt->sax->cdataBlock != NULL)
9792 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9793 	else if (ctxt->sax->characters != NULL)
9794 	    ctxt->sax->characters(ctxt->userData, buf, len);
9795     }
9796     xmlFree(buf);
9797 }
9798 
9799 /**
9800  * xmlParseContent:
9801  * @ctxt:  an XML parser context
9802  *
9803  * Parse a content:
9804  *
9805  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9806  */
9807 
9808 void
xmlParseContent(xmlParserCtxtPtr ctxt)9809 xmlParseContent(xmlParserCtxtPtr ctxt) {
9810     GROW;
9811     while ((RAW != 0) &&
9812 	   ((RAW != '<') || (NXT(1) != '/')) &&
9813 	   (ctxt->instate != XML_PARSER_EOF)) {
9814 	const xmlChar *test = CUR_PTR;
9815 	unsigned int cons = ctxt->input->consumed;
9816 	const xmlChar *cur = ctxt->input->cur;
9817 
9818 	/*
9819 	 * First case : a Processing Instruction.
9820 	 */
9821 	if ((*cur == '<') && (cur[1] == '?')) {
9822 	    xmlParsePI(ctxt);
9823 	}
9824 
9825 	/*
9826 	 * Second case : a CDSection
9827 	 */
9828 	/* 2.6.0 test was *cur not RAW */
9829 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9830 	    xmlParseCDSect(ctxt);
9831 	}
9832 
9833 	/*
9834 	 * Third case :  a comment
9835 	 */
9836 	else if ((*cur == '<') && (NXT(1) == '!') &&
9837 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9838 	    xmlParseComment(ctxt);
9839 	    ctxt->instate = XML_PARSER_CONTENT;
9840 	}
9841 
9842 	/*
9843 	 * Fourth case :  a sub-element.
9844 	 */
9845 	else if (*cur == '<') {
9846 	    xmlParseElement(ctxt);
9847 	}
9848 
9849 	/*
9850 	 * Fifth case : a reference. If if has not been resolved,
9851 	 *    parsing returns it's Name, create the node
9852 	 */
9853 
9854 	else if (*cur == '&') {
9855 	    xmlParseReference(ctxt);
9856 	}
9857 
9858 	/*
9859 	 * Last case, text. Note that References are handled directly.
9860 	 */
9861 	else {
9862 	    xmlParseCharData(ctxt, 0);
9863 	}
9864 
9865 	GROW;
9866 	SHRINK;
9867 
9868 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9869 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9870 	                "detected an error in element content\n");
9871 	    xmlHaltParser(ctxt);
9872             break;
9873 	}
9874     }
9875 }
9876 
9877 /**
9878  * xmlParseElement:
9879  * @ctxt:  an XML parser context
9880  *
9881  * parse an XML element, this is highly recursive
9882  *
9883  * [39] element ::= EmptyElemTag | STag content ETag
9884  *
9885  * [ WFC: Element Type Match ]
9886  * The Name in an element's end-tag must match the element type in the
9887  * start-tag.
9888  *
9889  */
9890 
9891 void
xmlParseElement(xmlParserCtxtPtr ctxt)9892 xmlParseElement(xmlParserCtxtPtr ctxt) {
9893     const xmlChar *name;
9894     const xmlChar *prefix = NULL;
9895     const xmlChar *URI = NULL;
9896     xmlParserNodeInfo node_info;
9897     int line, tlen = 0;
9898     xmlNodePtr ret;
9899     int nsNr = ctxt->nsNr;
9900 
9901     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9902         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9903 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9904 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9905 			  xmlParserMaxDepth);
9906 	xmlHaltParser(ctxt);
9907 	return;
9908     }
9909 
9910     /* Capture start position */
9911     if (ctxt->record_info) {
9912         node_info.begin_pos = ctxt->input->consumed +
9913                           (CUR_PTR - ctxt->input->base);
9914 	node_info.begin_line = ctxt->input->line;
9915     }
9916 
9917     if (ctxt->spaceNr == 0)
9918 	spacePush(ctxt, -1);
9919     else if (*ctxt->space == -2)
9920 	spacePush(ctxt, -1);
9921     else
9922 	spacePush(ctxt, *ctxt->space);
9923 
9924     line = ctxt->input->line;
9925 #ifdef LIBXML_SAX1_ENABLED
9926     if (ctxt->sax2)
9927 #endif /* LIBXML_SAX1_ENABLED */
9928         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9929 #ifdef LIBXML_SAX1_ENABLED
9930     else
9931 	name = xmlParseStartTag(ctxt);
9932 #endif /* LIBXML_SAX1_ENABLED */
9933     if (ctxt->instate == XML_PARSER_EOF)
9934 	return;
9935     if (name == NULL) {
9936 	spacePop(ctxt);
9937         return;
9938     }
9939     namePush(ctxt, name);
9940     ret = ctxt->node;
9941 
9942 #ifdef LIBXML_VALID_ENABLED
9943     /*
9944      * [ VC: Root Element Type ]
9945      * The Name in the document type declaration must match the element
9946      * type of the root element.
9947      */
9948     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9949         ctxt->node && (ctxt->node == ctxt->myDoc->children))
9950         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9951 #endif /* LIBXML_VALID_ENABLED */
9952 
9953     /*
9954      * Check for an Empty Element.
9955      */
9956     if ((RAW == '/') && (NXT(1) == '>')) {
9957         SKIP(2);
9958 	if (ctxt->sax2) {
9959 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9960 		(!ctxt->disableSAX))
9961 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9962 #ifdef LIBXML_SAX1_ENABLED
9963 	} else {
9964 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9965 		(!ctxt->disableSAX))
9966 		ctxt->sax->endElement(ctxt->userData, name);
9967 #endif /* LIBXML_SAX1_ENABLED */
9968 	}
9969 	namePop(ctxt);
9970 	spacePop(ctxt);
9971 	if (nsNr != ctxt->nsNr)
9972 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9973 	if ( ret != NULL && ctxt->record_info ) {
9974 	   node_info.end_pos = ctxt->input->consumed +
9975 			      (CUR_PTR - ctxt->input->base);
9976 	   node_info.end_line = ctxt->input->line;
9977 	   node_info.node = ret;
9978 	   xmlParserAddNodeInfo(ctxt, &node_info);
9979 	}
9980 	return;
9981     }
9982     if (RAW == '>') {
9983         NEXT1;
9984     } else {
9985         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9986 		     "Couldn't find end of Start Tag %s line %d\n",
9987 		                name, line, NULL);
9988 
9989 	/*
9990 	 * end of parsing of this node.
9991 	 */
9992 	nodePop(ctxt);
9993 	namePop(ctxt);
9994 	spacePop(ctxt);
9995 	if (nsNr != ctxt->nsNr)
9996 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9997 
9998 	/*
9999 	 * Capture end position and add node
10000 	 */
10001 	if ( ret != NULL && ctxt->record_info ) {
10002 	   node_info.end_pos = ctxt->input->consumed +
10003 			      (CUR_PTR - ctxt->input->base);
10004 	   node_info.end_line = ctxt->input->line;
10005 	   node_info.node = ret;
10006 	   xmlParserAddNodeInfo(ctxt, &node_info);
10007 	}
10008 	return;
10009     }
10010 
10011     /*
10012      * Parse the content of the element:
10013      */
10014     xmlParseContent(ctxt);
10015     if (ctxt->instate == XML_PARSER_EOF)
10016 	return;
10017     if (!IS_BYTE_CHAR(RAW)) {
10018         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10019 	 "Premature end of data in tag %s line %d\n",
10020 		                name, line, NULL);
10021 
10022 	/*
10023 	 * end of parsing of this node.
10024 	 */
10025 	nodePop(ctxt);
10026 	namePop(ctxt);
10027 	spacePop(ctxt);
10028 	if (nsNr != ctxt->nsNr)
10029 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10030 	return;
10031     }
10032 
10033     /*
10034      * parse the end of tag: '</' should be here.
10035      */
10036     if (ctxt->sax2) {
10037 	xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10038 	namePop(ctxt);
10039     }
10040 #ifdef LIBXML_SAX1_ENABLED
10041       else
10042 	xmlParseEndTag1(ctxt, line);
10043 #endif /* LIBXML_SAX1_ENABLED */
10044 
10045     /*
10046      * Capture end position and add node
10047      */
10048     if ( ret != NULL && ctxt->record_info ) {
10049        node_info.end_pos = ctxt->input->consumed +
10050                           (CUR_PTR - ctxt->input->base);
10051        node_info.end_line = ctxt->input->line;
10052        node_info.node = ret;
10053        xmlParserAddNodeInfo(ctxt, &node_info);
10054     }
10055 }
10056 
10057 /**
10058  * xmlParseVersionNum:
10059  * @ctxt:  an XML parser context
10060  *
10061  * parse the XML version value.
10062  *
10063  * [26] VersionNum ::= '1.' [0-9]+
10064  *
10065  * In practice allow [0-9].[0-9]+ at that level
10066  *
10067  * Returns the string giving the XML version number, or NULL
10068  */
10069 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10070 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10071     xmlChar *buf = NULL;
10072     int len = 0;
10073     int size = 10;
10074     xmlChar cur;
10075 
10076     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10077     if (buf == NULL) {
10078 	xmlErrMemory(ctxt, NULL);
10079 	return(NULL);
10080     }
10081     cur = CUR;
10082     if (!((cur >= '0') && (cur <= '9'))) {
10083 	xmlFree(buf);
10084 	return(NULL);
10085     }
10086     buf[len++] = cur;
10087     NEXT;
10088     cur=CUR;
10089     if (cur != '.') {
10090 	xmlFree(buf);
10091 	return(NULL);
10092     }
10093     buf[len++] = cur;
10094     NEXT;
10095     cur=CUR;
10096     while ((cur >= '0') && (cur <= '9')) {
10097 	if (len + 1 >= size) {
10098 	    xmlChar *tmp;
10099 
10100 	    size *= 2;
10101 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10102 	    if (tmp == NULL) {
10103 	        xmlFree(buf);
10104 		xmlErrMemory(ctxt, NULL);
10105 		return(NULL);
10106 	    }
10107 	    buf = tmp;
10108 	}
10109 	buf[len++] = cur;
10110 	NEXT;
10111 	cur=CUR;
10112     }
10113     buf[len] = 0;
10114     return(buf);
10115 }
10116 
10117 /**
10118  * xmlParseVersionInfo:
10119  * @ctxt:  an XML parser context
10120  *
10121  * parse the XML version.
10122  *
10123  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10124  *
10125  * [25] Eq ::= S? '=' S?
10126  *
10127  * Returns the version string, e.g. "1.0"
10128  */
10129 
10130 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10131 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10132     xmlChar *version = NULL;
10133 
10134     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10135 	SKIP(7);
10136 	SKIP_BLANKS;
10137 	if (RAW != '=') {
10138 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10139 	    return(NULL);
10140         }
10141 	NEXT;
10142 	SKIP_BLANKS;
10143 	if (RAW == '"') {
10144 	    NEXT;
10145 	    version = xmlParseVersionNum(ctxt);
10146 	    if (RAW != '"') {
10147 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10148 	    } else
10149 	        NEXT;
10150 	} else if (RAW == '\''){
10151 	    NEXT;
10152 	    version = xmlParseVersionNum(ctxt);
10153 	    if (RAW != '\'') {
10154 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10155 	    } else
10156 	        NEXT;
10157 	} else {
10158 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10159 	}
10160     }
10161     return(version);
10162 }
10163 
10164 /**
10165  * xmlParseEncName:
10166  * @ctxt:  an XML parser context
10167  *
10168  * parse the XML encoding name
10169  *
10170  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10171  *
10172  * Returns the encoding name value or NULL
10173  */
10174 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10175 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10176     xmlChar *buf = NULL;
10177     int len = 0;
10178     int size = 10;
10179     xmlChar cur;
10180 
10181     cur = CUR;
10182     if (((cur >= 'a') && (cur <= 'z')) ||
10183         ((cur >= 'A') && (cur <= 'Z'))) {
10184 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10185 	if (buf == NULL) {
10186 	    xmlErrMemory(ctxt, NULL);
10187 	    return(NULL);
10188 	}
10189 
10190 	buf[len++] = cur;
10191 	NEXT;
10192 	cur = CUR;
10193 	while (((cur >= 'a') && (cur <= 'z')) ||
10194 	       ((cur >= 'A') && (cur <= 'Z')) ||
10195 	       ((cur >= '0') && (cur <= '9')) ||
10196 	       (cur == '.') || (cur == '_') ||
10197 	       (cur == '-')) {
10198 	    if (len + 1 >= size) {
10199 	        xmlChar *tmp;
10200 
10201 		size *= 2;
10202 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10203 		if (tmp == NULL) {
10204 		    xmlErrMemory(ctxt, NULL);
10205 		    xmlFree(buf);
10206 		    return(NULL);
10207 		}
10208 		buf = tmp;
10209 	    }
10210 	    buf[len++] = cur;
10211 	    NEXT;
10212 	    cur = CUR;
10213 	    if (cur == 0) {
10214 	        SHRINK;
10215 		GROW;
10216 		cur = CUR;
10217 	    }
10218         }
10219 	buf[len] = 0;
10220     } else {
10221 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10222     }
10223     return(buf);
10224 }
10225 
10226 /**
10227  * xmlParseEncodingDecl:
10228  * @ctxt:  an XML parser context
10229  *
10230  * parse the XML encoding declaration
10231  *
10232  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10233  *
10234  * this setups the conversion filters.
10235  *
10236  * Returns the encoding value or NULL
10237  */
10238 
10239 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10240 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10241     xmlChar *encoding = NULL;
10242 
10243     SKIP_BLANKS;
10244     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10245 	SKIP(8);
10246 	SKIP_BLANKS;
10247 	if (RAW != '=') {
10248 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10249 	    return(NULL);
10250         }
10251 	NEXT;
10252 	SKIP_BLANKS;
10253 	if (RAW == '"') {
10254 	    NEXT;
10255 	    encoding = xmlParseEncName(ctxt);
10256 	    if (RAW != '"') {
10257 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10258 		xmlFree((xmlChar *) encoding);
10259 		return(NULL);
10260 	    } else
10261 	        NEXT;
10262 	} else if (RAW == '\''){
10263 	    NEXT;
10264 	    encoding = xmlParseEncName(ctxt);
10265 	    if (RAW != '\'') {
10266 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10267 		xmlFree((xmlChar *) encoding);
10268 		return(NULL);
10269 	    } else
10270 	        NEXT;
10271 	} else {
10272 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10273 	}
10274 
10275         /*
10276          * Non standard parsing, allowing the user to ignore encoding
10277          */
10278         if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10279 	    xmlFree((xmlChar *) encoding);
10280             return(NULL);
10281 	}
10282 
10283 	/*
10284 	 * UTF-16 encoding stwich has already taken place at this stage,
10285 	 * more over the little-endian/big-endian selection is already done
10286 	 */
10287         if ((encoding != NULL) &&
10288 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10289 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10290 	    /*
10291 	     * If no encoding was passed to the parser, that we are
10292 	     * using UTF-16 and no decoder is present i.e. the
10293 	     * document is apparently UTF-8 compatible, then raise an
10294 	     * encoding mismatch fatal error
10295 	     */
10296 	    if ((ctxt->encoding == NULL) &&
10297 	        (ctxt->input->buf != NULL) &&
10298 	        (ctxt->input->buf->encoder == NULL)) {
10299 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10300 		  "Document labelled UTF-16 but has UTF-8 content\n");
10301 	    }
10302 	    if (ctxt->encoding != NULL)
10303 		xmlFree((xmlChar *) ctxt->encoding);
10304 	    ctxt->encoding = encoding;
10305 	}
10306 	/*
10307 	 * UTF-8 encoding is handled natively
10308 	 */
10309         else if ((encoding != NULL) &&
10310 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10311 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10312 	    if (ctxt->encoding != NULL)
10313 		xmlFree((xmlChar *) ctxt->encoding);
10314 	    ctxt->encoding = encoding;
10315 	}
10316 	else if (encoding != NULL) {
10317 	    xmlCharEncodingHandlerPtr handler;
10318 
10319 	    if (ctxt->input->encoding != NULL)
10320 		xmlFree((xmlChar *) ctxt->input->encoding);
10321 	    ctxt->input->encoding = encoding;
10322 
10323             handler = xmlFindCharEncodingHandler((const char *) encoding);
10324 	    if (handler != NULL) {
10325 		if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10326 		    /* failed to convert */
10327 		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10328 		    return(NULL);
10329 		}
10330 	    } else {
10331 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10332 			"Unsupported encoding %s\n", encoding);
10333 		return(NULL);
10334 	    }
10335 	}
10336     }
10337     return(encoding);
10338 }
10339 
10340 /**
10341  * xmlParseSDDecl:
10342  * @ctxt:  an XML parser context
10343  *
10344  * parse the XML standalone declaration
10345  *
10346  * [32] SDDecl ::= S 'standalone' Eq
10347  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10348  *
10349  * [ VC: Standalone Document Declaration ]
10350  * TODO The standalone document declaration must have the value "no"
10351  * if any external markup declarations contain declarations of:
10352  *  - attributes with default values, if elements to which these
10353  *    attributes apply appear in the document without specifications
10354  *    of values for these attributes, or
10355  *  - entities (other than amp, lt, gt, apos, quot), if references
10356  *    to those entities appear in the document, or
10357  *  - attributes with values subject to normalization, where the
10358  *    attribute appears in the document with a value which will change
10359  *    as a result of normalization, or
10360  *  - element types with element content, if white space occurs directly
10361  *    within any instance of those types.
10362  *
10363  * Returns:
10364  *   1 if standalone="yes"
10365  *   0 if standalone="no"
10366  *  -2 if standalone attribute is missing or invalid
10367  *	  (A standalone value of -2 means that the XML declaration was found,
10368  *	   but no value was specified for the standalone attribute).
10369  */
10370 
10371 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10372 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10373     int standalone = -2;
10374 
10375     SKIP_BLANKS;
10376     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10377 	SKIP(10);
10378         SKIP_BLANKS;
10379 	if (RAW != '=') {
10380 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10381 	    return(standalone);
10382         }
10383 	NEXT;
10384 	SKIP_BLANKS;
10385         if (RAW == '\''){
10386 	    NEXT;
10387 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10388 	        standalone = 0;
10389                 SKIP(2);
10390 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10391 	               (NXT(2) == 's')) {
10392 	        standalone = 1;
10393 		SKIP(3);
10394             } else {
10395 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10396 	    }
10397 	    if (RAW != '\'') {
10398 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10399 	    } else
10400 	        NEXT;
10401 	} else if (RAW == '"'){
10402 	    NEXT;
10403 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10404 	        standalone = 0;
10405 		SKIP(2);
10406 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10407 	               (NXT(2) == 's')) {
10408 	        standalone = 1;
10409                 SKIP(3);
10410             } else {
10411 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10412 	    }
10413 	    if (RAW != '"') {
10414 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10415 	    } else
10416 	        NEXT;
10417 	} else {
10418 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10419         }
10420     }
10421     return(standalone);
10422 }
10423 
10424 /**
10425  * xmlParseXMLDecl:
10426  * @ctxt:  an XML parser context
10427  *
10428  * parse an XML declaration header
10429  *
10430  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10431  */
10432 
10433 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10434 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10435     xmlChar *version;
10436 
10437     /*
10438      * This value for standalone indicates that the document has an
10439      * XML declaration but it does not have a standalone attribute.
10440      * It will be overwritten later if a standalone attribute is found.
10441      */
10442     ctxt->input->standalone = -2;
10443 
10444     /*
10445      * We know that '<?xml' is here.
10446      */
10447     SKIP(5);
10448 
10449     if (!IS_BLANK_CH(RAW)) {
10450 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10451 	               "Blank needed after '<?xml'\n");
10452     }
10453     SKIP_BLANKS;
10454 
10455     /*
10456      * We must have the VersionInfo here.
10457      */
10458     version = xmlParseVersionInfo(ctxt);
10459     if (version == NULL) {
10460 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10461     } else {
10462 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10463 	    /*
10464 	     * Changed here for XML-1.0 5th edition
10465 	     */
10466 	    if (ctxt->options & XML_PARSE_OLD10) {
10467 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10468 			          "Unsupported version '%s'\n",
10469 			          version);
10470 	    } else {
10471 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10472 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10473 		                  "Unsupported version '%s'\n",
10474 				  version, NULL);
10475 		} else {
10476 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10477 				      "Unsupported version '%s'\n",
10478 				      version);
10479 		}
10480 	    }
10481 	}
10482 	if (ctxt->version != NULL)
10483 	    xmlFree((void *) ctxt->version);
10484 	ctxt->version = version;
10485     }
10486 
10487     /*
10488      * We may have the encoding declaration
10489      */
10490     if (!IS_BLANK_CH(RAW)) {
10491         if ((RAW == '?') && (NXT(1) == '>')) {
10492 	    SKIP(2);
10493 	    return;
10494 	}
10495 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10496     }
10497     xmlParseEncodingDecl(ctxt);
10498     if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10499          (ctxt->instate == XML_PARSER_EOF)) {
10500 	/*
10501 	 * The XML REC instructs us to stop parsing right here
10502 	 */
10503         return;
10504     }
10505 
10506     /*
10507      * We may have the standalone status.
10508      */
10509     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10510         if ((RAW == '?') && (NXT(1) == '>')) {
10511 	    SKIP(2);
10512 	    return;
10513 	}
10514 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10515     }
10516 
10517     /*
10518      * We can grow the input buffer freely at that point
10519      */
10520     GROW;
10521 
10522     SKIP_BLANKS;
10523     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10524 
10525     SKIP_BLANKS;
10526     if ((RAW == '?') && (NXT(1) == '>')) {
10527         SKIP(2);
10528     } else if (RAW == '>') {
10529         /* Deprecated old WD ... */
10530 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10531 	NEXT;
10532     } else {
10533 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10534 	MOVETO_ENDTAG(CUR_PTR);
10535 	NEXT;
10536     }
10537 }
10538 
10539 /**
10540  * xmlParseMisc:
10541  * @ctxt:  an XML parser context
10542  *
10543  * parse an XML Misc* optional field.
10544  *
10545  * [27] Misc ::= Comment | PI |  S
10546  */
10547 
10548 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10549 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10550     while ((ctxt->instate != XML_PARSER_EOF) &&
10551            (((RAW == '<') && (NXT(1) == '?')) ||
10552             (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10553             IS_BLANK_CH(CUR))) {
10554         if ((RAW == '<') && (NXT(1) == '?')) {
10555 	    xmlParsePI(ctxt);
10556 	} else if (IS_BLANK_CH(CUR)) {
10557 	    NEXT;
10558 	} else
10559 	    xmlParseComment(ctxt);
10560     }
10561 }
10562 
10563 /**
10564  * xmlParseDocument:
10565  * @ctxt:  an XML parser context
10566  *
10567  * parse an XML document (and build a tree if using the standard SAX
10568  * interface).
10569  *
10570  * [1] document ::= prolog element Misc*
10571  *
10572  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10573  *
10574  * Returns 0, -1 in case of error. the parser context is augmented
10575  *                as a result of the parsing.
10576  */
10577 
10578 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10579 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10580     xmlChar start[4];
10581     xmlCharEncoding enc;
10582 
10583     xmlInitParser();
10584 
10585     if ((ctxt == NULL) || (ctxt->input == NULL))
10586         return(-1);
10587 
10588     GROW;
10589 
10590     /*
10591      * SAX: detecting the level.
10592      */
10593     xmlDetectSAX2(ctxt);
10594 
10595     /*
10596      * SAX: beginning of the document processing.
10597      */
10598     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10599         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10600     if (ctxt->instate == XML_PARSER_EOF)
10601 	return(-1);
10602 
10603     if ((ctxt->encoding == NULL) &&
10604         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10605 	/*
10606 	 * Get the 4 first bytes and decode the charset
10607 	 * if enc != XML_CHAR_ENCODING_NONE
10608 	 * plug some encoding conversion routines.
10609 	 */
10610 	start[0] = RAW;
10611 	start[1] = NXT(1);
10612 	start[2] = NXT(2);
10613 	start[3] = NXT(3);
10614 	enc = xmlDetectCharEncoding(&start[0], 4);
10615 	if (enc != XML_CHAR_ENCODING_NONE) {
10616 	    xmlSwitchEncoding(ctxt, enc);
10617 	}
10618     }
10619 
10620 
10621     if (CUR == 0) {
10622 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10623 	return(-1);
10624     }
10625 
10626     /*
10627      * Check for the XMLDecl in the Prolog.
10628      * do not GROW here to avoid the detected encoder to decode more
10629      * than just the first line, unless the amount of data is really
10630      * too small to hold "<?xml version="1.0" encoding="foo"
10631      */
10632     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10633        GROW;
10634     }
10635     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10636 
10637 	/*
10638 	 * Note that we will switch encoding on the fly.
10639 	 */
10640 	xmlParseXMLDecl(ctxt);
10641 	if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10642 	    (ctxt->instate == XML_PARSER_EOF)) {
10643 	    /*
10644 	     * The XML REC instructs us to stop parsing right here
10645 	     */
10646 	    return(-1);
10647 	}
10648 	ctxt->standalone = ctxt->input->standalone;
10649 	SKIP_BLANKS;
10650     } else {
10651 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10652     }
10653     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10654         ctxt->sax->startDocument(ctxt->userData);
10655     if (ctxt->instate == XML_PARSER_EOF)
10656 	return(-1);
10657     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10658         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10659 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10660     }
10661 
10662     /*
10663      * The Misc part of the Prolog
10664      */
10665     GROW;
10666     xmlParseMisc(ctxt);
10667 
10668     /*
10669      * Then possibly doc type declaration(s) and more Misc
10670      * (doctypedecl Misc*)?
10671      */
10672     GROW;
10673     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10674 
10675 	ctxt->inSubset = 1;
10676 	xmlParseDocTypeDecl(ctxt);
10677 	if (RAW == '[') {
10678 	    ctxt->instate = XML_PARSER_DTD;
10679 	    xmlParseInternalSubset(ctxt);
10680 	    if (ctxt->instate == XML_PARSER_EOF)
10681 		return(-1);
10682 	}
10683 
10684 	/*
10685 	 * Create and update the external subset.
10686 	 */
10687 	ctxt->inSubset = 2;
10688 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10689 	    (!ctxt->disableSAX))
10690 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10691 	                              ctxt->extSubSystem, ctxt->extSubURI);
10692 	if (ctxt->instate == XML_PARSER_EOF)
10693 	    return(-1);
10694 	ctxt->inSubset = 0;
10695 
10696         xmlCleanSpecialAttr(ctxt);
10697 
10698 	ctxt->instate = XML_PARSER_PROLOG;
10699 	xmlParseMisc(ctxt);
10700     }
10701 
10702     /*
10703      * Time to start parsing the tree itself
10704      */
10705     GROW;
10706     if (RAW != '<') {
10707 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10708 		       "Start tag expected, '<' not found\n");
10709     } else {
10710 	ctxt->instate = XML_PARSER_CONTENT;
10711 	xmlParseElement(ctxt);
10712 	ctxt->instate = XML_PARSER_EPILOG;
10713 
10714 
10715 	/*
10716 	 * The Misc part at the end
10717 	 */
10718 	xmlParseMisc(ctxt);
10719 
10720 	if (RAW != 0) {
10721 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10722 	}
10723 	ctxt->instate = XML_PARSER_EOF;
10724     }
10725 
10726     /*
10727      * SAX: end of the document processing.
10728      */
10729     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10730         ctxt->sax->endDocument(ctxt->userData);
10731 
10732     /*
10733      * Remove locally kept entity definitions if the tree was not built
10734      */
10735     if ((ctxt->myDoc != NULL) &&
10736 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10737 	xmlFreeDoc(ctxt->myDoc);
10738 	ctxt->myDoc = NULL;
10739     }
10740 
10741     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10742         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10743 	if (ctxt->valid)
10744 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10745 	if (ctxt->nsWellFormed)
10746 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10747 	if (ctxt->options & XML_PARSE_OLD10)
10748 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10749     }
10750     if (! ctxt->wellFormed) {
10751 	ctxt->valid = 0;
10752 	return(-1);
10753     }
10754     return(0);
10755 }
10756 
10757 /**
10758  * xmlParseExtParsedEnt:
10759  * @ctxt:  an XML parser context
10760  *
10761  * parse a general parsed entity
10762  * An external general parsed entity is well-formed if it matches the
10763  * production labeled extParsedEnt.
10764  *
10765  * [78] extParsedEnt ::= TextDecl? content
10766  *
10767  * Returns 0, -1 in case of error. the parser context is augmented
10768  *                as a result of the parsing.
10769  */
10770 
10771 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10772 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10773     xmlChar start[4];
10774     xmlCharEncoding enc;
10775 
10776     if ((ctxt == NULL) || (ctxt->input == NULL))
10777         return(-1);
10778 
10779     xmlDefaultSAXHandlerInit();
10780 
10781     xmlDetectSAX2(ctxt);
10782 
10783     GROW;
10784 
10785     /*
10786      * SAX: beginning of the document processing.
10787      */
10788     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10789         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10790 
10791     /*
10792      * Get the 4 first bytes and decode the charset
10793      * if enc != XML_CHAR_ENCODING_NONE
10794      * plug some encoding conversion routines.
10795      */
10796     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10797 	start[0] = RAW;
10798 	start[1] = NXT(1);
10799 	start[2] = NXT(2);
10800 	start[3] = NXT(3);
10801 	enc = xmlDetectCharEncoding(start, 4);
10802 	if (enc != XML_CHAR_ENCODING_NONE) {
10803 	    xmlSwitchEncoding(ctxt, enc);
10804 	}
10805     }
10806 
10807 
10808     if (CUR == 0) {
10809 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10810     }
10811 
10812     /*
10813      * Check for the XMLDecl in the Prolog.
10814      */
10815     GROW;
10816     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10817 
10818 	/*
10819 	 * Note that we will switch encoding on the fly.
10820 	 */
10821 	xmlParseXMLDecl(ctxt);
10822 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10823 	    /*
10824 	     * The XML REC instructs us to stop parsing right here
10825 	     */
10826 	    return(-1);
10827 	}
10828 	SKIP_BLANKS;
10829     } else {
10830 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10831     }
10832     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10833         ctxt->sax->startDocument(ctxt->userData);
10834     if (ctxt->instate == XML_PARSER_EOF)
10835 	return(-1);
10836 
10837     /*
10838      * Doing validity checking on chunk doesn't make sense
10839      */
10840     ctxt->instate = XML_PARSER_CONTENT;
10841     ctxt->validate = 0;
10842     ctxt->loadsubset = 0;
10843     ctxt->depth = 0;
10844 
10845     xmlParseContent(ctxt);
10846     if (ctxt->instate == XML_PARSER_EOF)
10847 	return(-1);
10848 
10849     if ((RAW == '<') && (NXT(1) == '/')) {
10850 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10851     } else if (RAW != 0) {
10852 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10853     }
10854 
10855     /*
10856      * SAX: end of the document processing.
10857      */
10858     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10859         ctxt->sax->endDocument(ctxt->userData);
10860 
10861     if (! ctxt->wellFormed) return(-1);
10862     return(0);
10863 }
10864 
10865 #ifdef LIBXML_PUSH_ENABLED
10866 /************************************************************************
10867  *									*
10868  *		Progressive parsing interfaces				*
10869  *									*
10870  ************************************************************************/
10871 
10872 /**
10873  * xmlParseLookupSequence:
10874  * @ctxt:  an XML parser context
10875  * @first:  the first char to lookup
10876  * @next:  the next char to lookup or zero
10877  * @third:  the next char to lookup or zero
10878  *
10879  * Try to find if a sequence (first, next, third) or  just (first next) or
10880  * (first) is available in the input stream.
10881  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10882  * to avoid rescanning sequences of bytes, it DOES change the state of the
10883  * parser, do not use liberally.
10884  *
10885  * Returns the index to the current parsing point if the full sequence
10886  *      is available, -1 otherwise.
10887  */
10888 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10889 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10890                        xmlChar next, xmlChar third) {
10891     int base, len;
10892     xmlParserInputPtr in;
10893     const xmlChar *buf;
10894 
10895     in = ctxt->input;
10896     if (in == NULL) return(-1);
10897     base = in->cur - in->base;
10898     if (base < 0) return(-1);
10899     if (ctxt->checkIndex > base)
10900         base = ctxt->checkIndex;
10901     if (in->buf == NULL) {
10902 	buf = in->base;
10903 	len = in->length;
10904     } else {
10905 	buf = xmlBufContent(in->buf->buffer);
10906 	len = xmlBufUse(in->buf->buffer);
10907     }
10908     /* take into account the sequence length */
10909     if (third) len -= 2;
10910     else if (next) len --;
10911     for (;base < len;base++) {
10912         if (buf[base] == first) {
10913 	    if (third != 0) {
10914 		if ((buf[base + 1] != next) ||
10915 		    (buf[base + 2] != third)) continue;
10916 	    } else if (next != 0) {
10917 		if (buf[base + 1] != next) continue;
10918 	    }
10919 	    ctxt->checkIndex = 0;
10920 #ifdef DEBUG_PUSH
10921 	    if (next == 0)
10922 		xmlGenericError(xmlGenericErrorContext,
10923 			"PP: lookup '%c' found at %d\n",
10924 			first, base);
10925 	    else if (third == 0)
10926 		xmlGenericError(xmlGenericErrorContext,
10927 			"PP: lookup '%c%c' found at %d\n",
10928 			first, next, base);
10929 	    else
10930 		xmlGenericError(xmlGenericErrorContext,
10931 			"PP: lookup '%c%c%c' found at %d\n",
10932 			first, next, third, base);
10933 #endif
10934 	    return(base - (in->cur - in->base));
10935 	}
10936     }
10937     ctxt->checkIndex = base;
10938 #ifdef DEBUG_PUSH
10939     if (next == 0)
10940 	xmlGenericError(xmlGenericErrorContext,
10941 		"PP: lookup '%c' failed\n", first);
10942     else if (third == 0)
10943 	xmlGenericError(xmlGenericErrorContext,
10944 		"PP: lookup '%c%c' failed\n", first, next);
10945     else
10946 	xmlGenericError(xmlGenericErrorContext,
10947 		"PP: lookup '%c%c%c' failed\n", first, next, third);
10948 #endif
10949     return(-1);
10950 }
10951 
10952 /**
10953  * xmlParseGetLasts:
10954  * @ctxt:  an XML parser context
10955  * @lastlt:  pointer to store the last '<' from the input
10956  * @lastgt:  pointer to store the last '>' from the input
10957  *
10958  * Lookup the last < and > in the current chunk
10959  */
10960 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10961 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10962                  const xmlChar **lastgt) {
10963     const xmlChar *tmp;
10964 
10965     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10966 	xmlGenericError(xmlGenericErrorContext,
10967 		    "Internal error: xmlParseGetLasts\n");
10968 	return;
10969     }
10970     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10971         tmp = ctxt->input->end;
10972 	tmp--;
10973 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10974 	if (tmp < ctxt->input->base) {
10975 	    *lastlt = NULL;
10976 	    *lastgt = NULL;
10977 	} else {
10978 	    *lastlt = tmp;
10979 	    tmp++;
10980 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10981 	        if (*tmp == '\'') {
10982 		    tmp++;
10983 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10984 		    if (tmp < ctxt->input->end) tmp++;
10985 		} else if (*tmp == '"') {
10986 		    tmp++;
10987 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10988 		    if (tmp < ctxt->input->end) tmp++;
10989 		} else
10990 		    tmp++;
10991 	    }
10992 	    if (tmp < ctxt->input->end)
10993 	        *lastgt = tmp;
10994 	    else {
10995 	        tmp = *lastlt;
10996 		tmp--;
10997 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10998 		if (tmp >= ctxt->input->base)
10999 		    *lastgt = tmp;
11000 		else
11001 		    *lastgt = NULL;
11002 	    }
11003 	}
11004     } else {
11005         *lastlt = NULL;
11006 	*lastgt = NULL;
11007     }
11008 }
11009 /**
11010  * xmlCheckCdataPush:
11011  * @cur: pointer to the block of characters
11012  * @len: length of the block in bytes
11013  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11014  *
11015  * Check that the block of characters is okay as SCdata content [20]
11016  *
11017  * Returns the number of bytes to pass if okay, a negative index where an
11018  *         UTF-8 error occurred otherwise
11019  */
11020 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11021 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11022     int ix;
11023     unsigned char c;
11024     int codepoint;
11025 
11026     if ((utf == NULL) || (len <= 0))
11027         return(0);
11028 
11029     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11030         c = utf[ix];
11031         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11032 	    if (c >= 0x20)
11033 		ix++;
11034 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11035 	        ix++;
11036 	    else
11037 	        return(-ix);
11038 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11039 	    if (ix + 2 > len) return(complete ? -ix : ix);
11040 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11041 	        return(-ix);
11042 	    codepoint = (utf[ix] & 0x1f) << 6;
11043 	    codepoint |= utf[ix+1] & 0x3f;
11044 	    if (!xmlIsCharQ(codepoint))
11045 	        return(-ix);
11046 	    ix += 2;
11047 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11048 	    if (ix + 3 > len) return(complete ? -ix : ix);
11049 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11050 	        ((utf[ix+2] & 0xc0) != 0x80))
11051 		    return(-ix);
11052 	    codepoint = (utf[ix] & 0xf) << 12;
11053 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11054 	    codepoint |= utf[ix+2] & 0x3f;
11055 	    if (!xmlIsCharQ(codepoint))
11056 	        return(-ix);
11057 	    ix += 3;
11058 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11059 	    if (ix + 4 > len) return(complete ? -ix : ix);
11060 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11061 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11062 		((utf[ix+3] & 0xc0) != 0x80))
11063 		    return(-ix);
11064 	    codepoint = (utf[ix] & 0x7) << 18;
11065 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11066 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11067 	    codepoint |= utf[ix+3] & 0x3f;
11068 	    if (!xmlIsCharQ(codepoint))
11069 	        return(-ix);
11070 	    ix += 4;
11071 	} else				/* unknown encoding */
11072 	    return(-ix);
11073       }
11074       return(ix);
11075 }
11076 
11077 /**
11078  * xmlParseTryOrFinish:
11079  * @ctxt:  an XML parser context
11080  * @terminate:  last chunk indicator
11081  *
11082  * Try to progress on parsing
11083  *
11084  * Returns zero if no parsing was possible
11085  */
11086 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11087 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11088     int ret = 0;
11089     int avail, tlen;
11090     xmlChar cur, next;
11091     const xmlChar *lastlt, *lastgt;
11092 
11093     if (ctxt->input == NULL)
11094         return(0);
11095 
11096 #ifdef DEBUG_PUSH
11097     switch (ctxt->instate) {
11098 	case XML_PARSER_EOF:
11099 	    xmlGenericError(xmlGenericErrorContext,
11100 		    "PP: try EOF\n"); break;
11101 	case XML_PARSER_START:
11102 	    xmlGenericError(xmlGenericErrorContext,
11103 		    "PP: try START\n"); break;
11104 	case XML_PARSER_MISC:
11105 	    xmlGenericError(xmlGenericErrorContext,
11106 		    "PP: try MISC\n");break;
11107 	case XML_PARSER_COMMENT:
11108 	    xmlGenericError(xmlGenericErrorContext,
11109 		    "PP: try COMMENT\n");break;
11110 	case XML_PARSER_PROLOG:
11111 	    xmlGenericError(xmlGenericErrorContext,
11112 		    "PP: try PROLOG\n");break;
11113 	case XML_PARSER_START_TAG:
11114 	    xmlGenericError(xmlGenericErrorContext,
11115 		    "PP: try START_TAG\n");break;
11116 	case XML_PARSER_CONTENT:
11117 	    xmlGenericError(xmlGenericErrorContext,
11118 		    "PP: try CONTENT\n");break;
11119 	case XML_PARSER_CDATA_SECTION:
11120 	    xmlGenericError(xmlGenericErrorContext,
11121 		    "PP: try CDATA_SECTION\n");break;
11122 	case XML_PARSER_END_TAG:
11123 	    xmlGenericError(xmlGenericErrorContext,
11124 		    "PP: try END_TAG\n");break;
11125 	case XML_PARSER_ENTITY_DECL:
11126 	    xmlGenericError(xmlGenericErrorContext,
11127 		    "PP: try ENTITY_DECL\n");break;
11128 	case XML_PARSER_ENTITY_VALUE:
11129 	    xmlGenericError(xmlGenericErrorContext,
11130 		    "PP: try ENTITY_VALUE\n");break;
11131 	case XML_PARSER_ATTRIBUTE_VALUE:
11132 	    xmlGenericError(xmlGenericErrorContext,
11133 		    "PP: try ATTRIBUTE_VALUE\n");break;
11134 	case XML_PARSER_DTD:
11135 	    xmlGenericError(xmlGenericErrorContext,
11136 		    "PP: try DTD\n");break;
11137 	case XML_PARSER_EPILOG:
11138 	    xmlGenericError(xmlGenericErrorContext,
11139 		    "PP: try EPILOG\n");break;
11140 	case XML_PARSER_PI:
11141 	    xmlGenericError(xmlGenericErrorContext,
11142 		    "PP: try PI\n");break;
11143         case XML_PARSER_IGNORE:
11144             xmlGenericError(xmlGenericErrorContext,
11145 		    "PP: try IGNORE\n");break;
11146     }
11147 #endif
11148 
11149     if ((ctxt->input != NULL) &&
11150         (ctxt->input->cur - ctxt->input->base > 4096)) {
11151 	xmlSHRINK(ctxt);
11152 	ctxt->checkIndex = 0;
11153     }
11154     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11155 
11156     while (ctxt->instate != XML_PARSER_EOF) {
11157 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11158 	    return(0);
11159 
11160 	if (ctxt->input == NULL) break;
11161 	if (ctxt->input->buf == NULL)
11162 	    avail = ctxt->input->length -
11163 	            (ctxt->input->cur - ctxt->input->base);
11164 	else {
11165 	    /*
11166 	     * If we are operating on converted input, try to flush
11167 	     * remainng chars to avoid them stalling in the non-converted
11168 	     * buffer. But do not do this in document start where
11169 	     * encoding="..." may not have been read and we work on a
11170 	     * guessed encoding.
11171 	     */
11172 	    if ((ctxt->instate != XML_PARSER_START) &&
11173 	        (ctxt->input->buf->raw != NULL) &&
11174 		(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11175                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11176                                                  ctxt->input);
11177 		size_t current = ctxt->input->cur - ctxt->input->base;
11178 
11179 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11180                 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11181                                       base, current);
11182 	    }
11183 	    avail = xmlBufUse(ctxt->input->buf->buffer) -
11184 		    (ctxt->input->cur - ctxt->input->base);
11185 	}
11186         if (avail < 1)
11187 	    goto done;
11188         switch (ctxt->instate) {
11189             case XML_PARSER_EOF:
11190 	        /*
11191 		 * Document parsing is done !
11192 		 */
11193 	        goto done;
11194             case XML_PARSER_START:
11195 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11196 		    xmlChar start[4];
11197 		    xmlCharEncoding enc;
11198 
11199 		    /*
11200 		     * Very first chars read from the document flow.
11201 		     */
11202 		    if (avail < 4)
11203 			goto done;
11204 
11205 		    /*
11206 		     * Get the 4 first bytes and decode the charset
11207 		     * if enc != XML_CHAR_ENCODING_NONE
11208 		     * plug some encoding conversion routines,
11209 		     * else xmlSwitchEncoding will set to (default)
11210 		     * UTF8.
11211 		     */
11212 		    start[0] = RAW;
11213 		    start[1] = NXT(1);
11214 		    start[2] = NXT(2);
11215 		    start[3] = NXT(3);
11216 		    enc = xmlDetectCharEncoding(start, 4);
11217 		    xmlSwitchEncoding(ctxt, enc);
11218 		    break;
11219 		}
11220 
11221 		if (avail < 2)
11222 		    goto done;
11223 		cur = ctxt->input->cur[0];
11224 		next = ctxt->input->cur[1];
11225 		if (cur == 0) {
11226 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11227 			ctxt->sax->setDocumentLocator(ctxt->userData,
11228 						      &xmlDefaultSAXLocator);
11229 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11230 		    xmlHaltParser(ctxt);
11231 #ifdef DEBUG_PUSH
11232 		    xmlGenericError(xmlGenericErrorContext,
11233 			    "PP: entering EOF\n");
11234 #endif
11235 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11236 			ctxt->sax->endDocument(ctxt->userData);
11237 		    goto done;
11238 		}
11239 	        if ((cur == '<') && (next == '?')) {
11240 		    /* PI or XML decl */
11241 		    if (avail < 5) return(ret);
11242 		    if ((!terminate) &&
11243 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11244 			return(ret);
11245 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11246 			ctxt->sax->setDocumentLocator(ctxt->userData,
11247 						      &xmlDefaultSAXLocator);
11248 		    if ((ctxt->input->cur[2] == 'x') &&
11249 			(ctxt->input->cur[3] == 'm') &&
11250 			(ctxt->input->cur[4] == 'l') &&
11251 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11252 			ret += 5;
11253 #ifdef DEBUG_PUSH
11254 			xmlGenericError(xmlGenericErrorContext,
11255 				"PP: Parsing XML Decl\n");
11256 #endif
11257 			xmlParseXMLDecl(ctxt);
11258 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11259 			    /*
11260 			     * The XML REC instructs us to stop parsing right
11261 			     * here
11262 			     */
11263 			    xmlHaltParser(ctxt);
11264 			    return(0);
11265 			}
11266 			ctxt->standalone = ctxt->input->standalone;
11267 			if ((ctxt->encoding == NULL) &&
11268 			    (ctxt->input->encoding != NULL))
11269 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11270 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11271 			    (!ctxt->disableSAX))
11272 			    ctxt->sax->startDocument(ctxt->userData);
11273 			ctxt->instate = XML_PARSER_MISC;
11274 #ifdef DEBUG_PUSH
11275 			xmlGenericError(xmlGenericErrorContext,
11276 				"PP: entering MISC\n");
11277 #endif
11278 		    } else {
11279 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11280 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11281 			    (!ctxt->disableSAX))
11282 			    ctxt->sax->startDocument(ctxt->userData);
11283 			ctxt->instate = XML_PARSER_MISC;
11284 #ifdef DEBUG_PUSH
11285 			xmlGenericError(xmlGenericErrorContext,
11286 				"PP: entering MISC\n");
11287 #endif
11288 		    }
11289 		} else {
11290 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11291 			ctxt->sax->setDocumentLocator(ctxt->userData,
11292 						      &xmlDefaultSAXLocator);
11293 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11294 		    if (ctxt->version == NULL) {
11295 		        xmlErrMemory(ctxt, NULL);
11296 			break;
11297 		    }
11298 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11299 		        (!ctxt->disableSAX))
11300 			ctxt->sax->startDocument(ctxt->userData);
11301 		    ctxt->instate = XML_PARSER_MISC;
11302 #ifdef DEBUG_PUSH
11303 		    xmlGenericError(xmlGenericErrorContext,
11304 			    "PP: entering MISC\n");
11305 #endif
11306 		}
11307 		break;
11308             case XML_PARSER_START_TAG: {
11309 	        const xmlChar *name;
11310 		const xmlChar *prefix = NULL;
11311 		const xmlChar *URI = NULL;
11312 		int nsNr = ctxt->nsNr;
11313 
11314 		if ((avail < 2) && (ctxt->inputNr == 1))
11315 		    goto done;
11316 		cur = ctxt->input->cur[0];
11317 	        if (cur != '<') {
11318 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11319 		    xmlHaltParser(ctxt);
11320 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11321 			ctxt->sax->endDocument(ctxt->userData);
11322 		    goto done;
11323 		}
11324 		if (!terminate) {
11325 		    if (ctxt->progressive) {
11326 		        /* > can be found unescaped in attribute values */
11327 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11328 			    goto done;
11329 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11330 			goto done;
11331 		    }
11332 		}
11333 		if (ctxt->spaceNr == 0)
11334 		    spacePush(ctxt, -1);
11335 		else if (*ctxt->space == -2)
11336 		    spacePush(ctxt, -1);
11337 		else
11338 		    spacePush(ctxt, *ctxt->space);
11339 #ifdef LIBXML_SAX1_ENABLED
11340 		if (ctxt->sax2)
11341 #endif /* LIBXML_SAX1_ENABLED */
11342 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11343 #ifdef LIBXML_SAX1_ENABLED
11344 		else
11345 		    name = xmlParseStartTag(ctxt);
11346 #endif /* LIBXML_SAX1_ENABLED */
11347 		if (ctxt->instate == XML_PARSER_EOF)
11348 		    goto done;
11349 		if (name == NULL) {
11350 		    spacePop(ctxt);
11351 		    xmlHaltParser(ctxt);
11352 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11353 			ctxt->sax->endDocument(ctxt->userData);
11354 		    goto done;
11355 		}
11356 #ifdef LIBXML_VALID_ENABLED
11357 		/*
11358 		 * [ VC: Root Element Type ]
11359 		 * The Name in the document type declaration must match
11360 		 * the element type of the root element.
11361 		 */
11362 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11363 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11364 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11365 #endif /* LIBXML_VALID_ENABLED */
11366 
11367 		/*
11368 		 * Check for an Empty Element.
11369 		 */
11370 		if ((RAW == '/') && (NXT(1) == '>')) {
11371 		    SKIP(2);
11372 
11373 		    if (ctxt->sax2) {
11374 			if ((ctxt->sax != NULL) &&
11375 			    (ctxt->sax->endElementNs != NULL) &&
11376 			    (!ctxt->disableSAX))
11377 			    ctxt->sax->endElementNs(ctxt->userData, name,
11378 			                            prefix, URI);
11379 			if (ctxt->nsNr - nsNr > 0)
11380 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11381 #ifdef LIBXML_SAX1_ENABLED
11382 		    } else {
11383 			if ((ctxt->sax != NULL) &&
11384 			    (ctxt->sax->endElement != NULL) &&
11385 			    (!ctxt->disableSAX))
11386 			    ctxt->sax->endElement(ctxt->userData, name);
11387 #endif /* LIBXML_SAX1_ENABLED */
11388 		    }
11389 		    if (ctxt->instate == XML_PARSER_EOF)
11390 			goto done;
11391 		    spacePop(ctxt);
11392 		    if (ctxt->nameNr == 0) {
11393 			ctxt->instate = XML_PARSER_EPILOG;
11394 		    } else {
11395 			ctxt->instate = XML_PARSER_CONTENT;
11396 		    }
11397                     ctxt->progressive = 1;
11398 		    break;
11399 		}
11400 		if (RAW == '>') {
11401 		    NEXT;
11402 		} else {
11403 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11404 					 "Couldn't find end of Start Tag %s\n",
11405 					 name);
11406 		    nodePop(ctxt);
11407 		    spacePop(ctxt);
11408 		}
11409 		if (ctxt->sax2)
11410 		    nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11411 #ifdef LIBXML_SAX1_ENABLED
11412 		else
11413 		    namePush(ctxt, name);
11414 #endif /* LIBXML_SAX1_ENABLED */
11415 
11416 		ctxt->instate = XML_PARSER_CONTENT;
11417                 ctxt->progressive = 1;
11418                 break;
11419 	    }
11420             case XML_PARSER_CONTENT: {
11421 		const xmlChar *test;
11422 		unsigned int cons;
11423 		if ((avail < 2) && (ctxt->inputNr == 1))
11424 		    goto done;
11425 		cur = ctxt->input->cur[0];
11426 		next = ctxt->input->cur[1];
11427 
11428 		test = CUR_PTR;
11429 	        cons = ctxt->input->consumed;
11430 		if ((cur == '<') && (next == '/')) {
11431 		    ctxt->instate = XML_PARSER_END_TAG;
11432 		    break;
11433 	        } else if ((cur == '<') && (next == '?')) {
11434 		    if ((!terminate) &&
11435 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11436                         ctxt->progressive = XML_PARSER_PI;
11437 			goto done;
11438                     }
11439 		    xmlParsePI(ctxt);
11440 		    ctxt->instate = XML_PARSER_CONTENT;
11441                     ctxt->progressive = 1;
11442 		} else if ((cur == '<') && (next != '!')) {
11443 		    ctxt->instate = XML_PARSER_START_TAG;
11444 		    break;
11445 		} else if ((cur == '<') && (next == '!') &&
11446 		           (ctxt->input->cur[2] == '-') &&
11447 			   (ctxt->input->cur[3] == '-')) {
11448 		    int term;
11449 
11450 	            if (avail < 4)
11451 		        goto done;
11452 		    ctxt->input->cur += 4;
11453 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11454 		    ctxt->input->cur -= 4;
11455 		    if ((!terminate) && (term < 0)) {
11456                         ctxt->progressive = XML_PARSER_COMMENT;
11457 			goto done;
11458                     }
11459 		    xmlParseComment(ctxt);
11460 		    ctxt->instate = XML_PARSER_CONTENT;
11461                     ctxt->progressive = 1;
11462 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11463 		    (ctxt->input->cur[2] == '[') &&
11464 		    (ctxt->input->cur[3] == 'C') &&
11465 		    (ctxt->input->cur[4] == 'D') &&
11466 		    (ctxt->input->cur[5] == 'A') &&
11467 		    (ctxt->input->cur[6] == 'T') &&
11468 		    (ctxt->input->cur[7] == 'A') &&
11469 		    (ctxt->input->cur[8] == '[')) {
11470 		    SKIP(9);
11471 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11472 		    break;
11473 		} else if ((cur == '<') && (next == '!') &&
11474 		           (avail < 9)) {
11475 		    goto done;
11476 		} else if (cur == '&') {
11477 		    if ((!terminate) &&
11478 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11479 			goto done;
11480 		    xmlParseReference(ctxt);
11481 		} else {
11482 		    /* TODO Avoid the extra copy, handle directly !!! */
11483 		    /*
11484 		     * Goal of the following test is:
11485 		     *  - minimize calls to the SAX 'character' callback
11486 		     *    when they are mergeable
11487 		     *  - handle an problem for isBlank when we only parse
11488 		     *    a sequence of blank chars and the next one is
11489 		     *    not available to check against '<' presence.
11490 		     *  - tries to homogenize the differences in SAX
11491 		     *    callbacks between the push and pull versions
11492 		     *    of the parser.
11493 		     */
11494 		    if ((ctxt->inputNr == 1) &&
11495 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11496 			if (!terminate) {
11497 			    if (ctxt->progressive) {
11498 				if ((lastlt == NULL) ||
11499 				    (ctxt->input->cur > lastlt))
11500 				    goto done;
11501 			    } else if (xmlParseLookupSequence(ctxt,
11502 			                                      '<', 0, 0) < 0) {
11503 				goto done;
11504 			    }
11505 			}
11506                     }
11507 		    ctxt->checkIndex = 0;
11508 		    xmlParseCharData(ctxt, 0);
11509 		}
11510 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11511 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11512 		                "detected an error in element content\n");
11513 		    xmlHaltParser(ctxt);
11514 		    break;
11515 		}
11516 		break;
11517 	    }
11518             case XML_PARSER_END_TAG:
11519 		if (avail < 2)
11520 		    goto done;
11521 		if (!terminate) {
11522 		    if (ctxt->progressive) {
11523 		        /* > can be found unescaped in attribute values */
11524 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11525 			    goto done;
11526 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11527 			goto done;
11528 		    }
11529 		}
11530 		if (ctxt->sax2) {
11531 		    xmlParseEndTag2(ctxt,
11532 		            (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11533 		            (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11534 		            (int) (ptrdiff_t)
11535                                 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11536 		    nameNsPop(ctxt);
11537 		}
11538 #ifdef LIBXML_SAX1_ENABLED
11539 		  else
11540 		    xmlParseEndTag1(ctxt, 0);
11541 #endif /* LIBXML_SAX1_ENABLED */
11542 		if (ctxt->instate == XML_PARSER_EOF) {
11543 		    /* Nothing */
11544 		} else if (ctxt->nameNr == 0) {
11545 		    ctxt->instate = XML_PARSER_EPILOG;
11546 		} else {
11547 		    ctxt->instate = XML_PARSER_CONTENT;
11548 		}
11549 		break;
11550             case XML_PARSER_CDATA_SECTION: {
11551 	        /*
11552 		 * The Push mode need to have the SAX callback for
11553 		 * cdataBlock merge back contiguous callbacks.
11554 		 */
11555 		int base;
11556 
11557 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11558 		if (base < 0) {
11559 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11560 		        int tmp;
11561 
11562 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11563 			                        XML_PARSER_BIG_BUFFER_SIZE, 0);
11564 			if (tmp < 0) {
11565 			    tmp = -tmp;
11566 			    ctxt->input->cur += tmp;
11567 			    goto encoding_error;
11568 			}
11569 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11570 			    if (ctxt->sax->cdataBlock != NULL)
11571 				ctxt->sax->cdataBlock(ctxt->userData,
11572 				                      ctxt->input->cur, tmp);
11573 			    else if (ctxt->sax->characters != NULL)
11574 				ctxt->sax->characters(ctxt->userData,
11575 				                      ctxt->input->cur, tmp);
11576 			}
11577 			if (ctxt->instate == XML_PARSER_EOF)
11578 			    goto done;
11579 			SKIPL(tmp);
11580 			ctxt->checkIndex = 0;
11581 		    }
11582 		    goto done;
11583 		} else {
11584 		    int tmp;
11585 
11586 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11587 		    if ((tmp < 0) || (tmp != base)) {
11588 			tmp = -tmp;
11589 			ctxt->input->cur += tmp;
11590 			goto encoding_error;
11591 		    }
11592 		    if ((ctxt->sax != NULL) && (base == 0) &&
11593 		        (ctxt->sax->cdataBlock != NULL) &&
11594 		        (!ctxt->disableSAX)) {
11595 			/*
11596 			 * Special case to provide identical behaviour
11597 			 * between pull and push parsers on enpty CDATA
11598 			 * sections
11599 			 */
11600 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11601 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11602 			               "<![CDATA[", 9)))
11603 			     ctxt->sax->cdataBlock(ctxt->userData,
11604 			                           BAD_CAST "", 0);
11605 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11606 			(!ctxt->disableSAX)) {
11607 			if (ctxt->sax->cdataBlock != NULL)
11608 			    ctxt->sax->cdataBlock(ctxt->userData,
11609 						  ctxt->input->cur, base);
11610 			else if (ctxt->sax->characters != NULL)
11611 			    ctxt->sax->characters(ctxt->userData,
11612 						  ctxt->input->cur, base);
11613 		    }
11614 		    if (ctxt->instate == XML_PARSER_EOF)
11615 			goto done;
11616 		    SKIPL(base + 3);
11617 		    ctxt->checkIndex = 0;
11618 		    ctxt->instate = XML_PARSER_CONTENT;
11619 #ifdef DEBUG_PUSH
11620 		    xmlGenericError(xmlGenericErrorContext,
11621 			    "PP: entering CONTENT\n");
11622 #endif
11623 		}
11624 		break;
11625 	    }
11626             case XML_PARSER_MISC:
11627 		SKIP_BLANKS;
11628 		if (ctxt->input->buf == NULL)
11629 		    avail = ctxt->input->length -
11630 		            (ctxt->input->cur - ctxt->input->base);
11631 		else
11632 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11633 		            (ctxt->input->cur - ctxt->input->base);
11634 		if (avail < 2)
11635 		    goto done;
11636 		cur = ctxt->input->cur[0];
11637 		next = ctxt->input->cur[1];
11638 	        if ((cur == '<') && (next == '?')) {
11639 		    if ((!terminate) &&
11640 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11641                         ctxt->progressive = XML_PARSER_PI;
11642 			goto done;
11643                     }
11644 #ifdef DEBUG_PUSH
11645 		    xmlGenericError(xmlGenericErrorContext,
11646 			    "PP: Parsing PI\n");
11647 #endif
11648 		    xmlParsePI(ctxt);
11649 		    if (ctxt->instate == XML_PARSER_EOF)
11650 			goto done;
11651 		    ctxt->instate = XML_PARSER_MISC;
11652                     ctxt->progressive = 1;
11653 		    ctxt->checkIndex = 0;
11654 		} else if ((cur == '<') && (next == '!') &&
11655 		    (ctxt->input->cur[2] == '-') &&
11656 		    (ctxt->input->cur[3] == '-')) {
11657 		    if ((!terminate) &&
11658 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11659                         ctxt->progressive = XML_PARSER_COMMENT;
11660 			goto done;
11661                     }
11662 #ifdef DEBUG_PUSH
11663 		    xmlGenericError(xmlGenericErrorContext,
11664 			    "PP: Parsing Comment\n");
11665 #endif
11666 		    xmlParseComment(ctxt);
11667 		    if (ctxt->instate == XML_PARSER_EOF)
11668 			goto done;
11669 		    ctxt->instate = XML_PARSER_MISC;
11670                     ctxt->progressive = 1;
11671 		    ctxt->checkIndex = 0;
11672 		} else if ((cur == '<') && (next == '!') &&
11673 		    (ctxt->input->cur[2] == 'D') &&
11674 		    (ctxt->input->cur[3] == 'O') &&
11675 		    (ctxt->input->cur[4] == 'C') &&
11676 		    (ctxt->input->cur[5] == 'T') &&
11677 		    (ctxt->input->cur[6] == 'Y') &&
11678 		    (ctxt->input->cur[7] == 'P') &&
11679 		    (ctxt->input->cur[8] == 'E')) {
11680 		    if ((!terminate) &&
11681 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11682                         ctxt->progressive = XML_PARSER_DTD;
11683 			goto done;
11684                     }
11685 #ifdef DEBUG_PUSH
11686 		    xmlGenericError(xmlGenericErrorContext,
11687 			    "PP: Parsing internal subset\n");
11688 #endif
11689 		    ctxt->inSubset = 1;
11690                     ctxt->progressive = 0;
11691 		    ctxt->checkIndex = 0;
11692 		    xmlParseDocTypeDecl(ctxt);
11693 		    if (ctxt->instate == XML_PARSER_EOF)
11694 			goto done;
11695 		    if (RAW == '[') {
11696 			ctxt->instate = XML_PARSER_DTD;
11697 #ifdef DEBUG_PUSH
11698 			xmlGenericError(xmlGenericErrorContext,
11699 				"PP: entering DTD\n");
11700 #endif
11701 		    } else {
11702 			/*
11703 			 * Create and update the external subset.
11704 			 */
11705 			ctxt->inSubset = 2;
11706 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11707 			    (ctxt->sax->externalSubset != NULL))
11708 			    ctxt->sax->externalSubset(ctxt->userData,
11709 				    ctxt->intSubName, ctxt->extSubSystem,
11710 				    ctxt->extSubURI);
11711 			ctxt->inSubset = 0;
11712 			xmlCleanSpecialAttr(ctxt);
11713 			ctxt->instate = XML_PARSER_PROLOG;
11714 #ifdef DEBUG_PUSH
11715 			xmlGenericError(xmlGenericErrorContext,
11716 				"PP: entering PROLOG\n");
11717 #endif
11718 		    }
11719 		} else if ((cur == '<') && (next == '!') &&
11720 		           (avail < 9)) {
11721 		    goto done;
11722 		} else {
11723 		    ctxt->instate = XML_PARSER_START_TAG;
11724 		    ctxt->progressive = XML_PARSER_START_TAG;
11725 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11726 #ifdef DEBUG_PUSH
11727 		    xmlGenericError(xmlGenericErrorContext,
11728 			    "PP: entering START_TAG\n");
11729 #endif
11730 		}
11731 		break;
11732             case XML_PARSER_PROLOG:
11733 		SKIP_BLANKS;
11734 		if (ctxt->input->buf == NULL)
11735 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11736 		else
11737 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11738                             (ctxt->input->cur - ctxt->input->base);
11739 		if (avail < 2)
11740 		    goto done;
11741 		cur = ctxt->input->cur[0];
11742 		next = ctxt->input->cur[1];
11743 	        if ((cur == '<') && (next == '?')) {
11744 		    if ((!terminate) &&
11745 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11746                         ctxt->progressive = XML_PARSER_PI;
11747 			goto done;
11748                     }
11749 #ifdef DEBUG_PUSH
11750 		    xmlGenericError(xmlGenericErrorContext,
11751 			    "PP: Parsing PI\n");
11752 #endif
11753 		    xmlParsePI(ctxt);
11754 		    if (ctxt->instate == XML_PARSER_EOF)
11755 			goto done;
11756 		    ctxt->instate = XML_PARSER_PROLOG;
11757                     ctxt->progressive = 1;
11758 		} else if ((cur == '<') && (next == '!') &&
11759 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11760 		    if ((!terminate) &&
11761 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11762                         ctxt->progressive = XML_PARSER_COMMENT;
11763 			goto done;
11764                     }
11765 #ifdef DEBUG_PUSH
11766 		    xmlGenericError(xmlGenericErrorContext,
11767 			    "PP: Parsing Comment\n");
11768 #endif
11769 		    xmlParseComment(ctxt);
11770 		    if (ctxt->instate == XML_PARSER_EOF)
11771 			goto done;
11772 		    ctxt->instate = XML_PARSER_PROLOG;
11773                     ctxt->progressive = 1;
11774 		} else if ((cur == '<') && (next == '!') &&
11775 		           (avail < 4)) {
11776 		    goto done;
11777 		} else {
11778 		    ctxt->instate = XML_PARSER_START_TAG;
11779 		    if (ctxt->progressive == 0)
11780 			ctxt->progressive = XML_PARSER_START_TAG;
11781 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11782 #ifdef DEBUG_PUSH
11783 		    xmlGenericError(xmlGenericErrorContext,
11784 			    "PP: entering START_TAG\n");
11785 #endif
11786 		}
11787 		break;
11788             case XML_PARSER_EPILOG:
11789 		SKIP_BLANKS;
11790 		if (ctxt->input->buf == NULL)
11791 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11792 		else
11793 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11794                             (ctxt->input->cur - ctxt->input->base);
11795 		if (avail < 2)
11796 		    goto done;
11797 		cur = ctxt->input->cur[0];
11798 		next = ctxt->input->cur[1];
11799 	        if ((cur == '<') && (next == '?')) {
11800 		    if ((!terminate) &&
11801 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11802                         ctxt->progressive = XML_PARSER_PI;
11803 			goto done;
11804                     }
11805 #ifdef DEBUG_PUSH
11806 		    xmlGenericError(xmlGenericErrorContext,
11807 			    "PP: Parsing PI\n");
11808 #endif
11809 		    xmlParsePI(ctxt);
11810 		    if (ctxt->instate == XML_PARSER_EOF)
11811 			goto done;
11812 		    ctxt->instate = XML_PARSER_EPILOG;
11813                     ctxt->progressive = 1;
11814 		} else if ((cur == '<') && (next == '!') &&
11815 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11816 		    if ((!terminate) &&
11817 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11818                         ctxt->progressive = XML_PARSER_COMMENT;
11819 			goto done;
11820                     }
11821 #ifdef DEBUG_PUSH
11822 		    xmlGenericError(xmlGenericErrorContext,
11823 			    "PP: Parsing Comment\n");
11824 #endif
11825 		    xmlParseComment(ctxt);
11826 		    if (ctxt->instate == XML_PARSER_EOF)
11827 			goto done;
11828 		    ctxt->instate = XML_PARSER_EPILOG;
11829                     ctxt->progressive = 1;
11830 		} else if ((cur == '<') && (next == '!') &&
11831 		           (avail < 4)) {
11832 		    goto done;
11833 		} else {
11834 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11835 		    xmlHaltParser(ctxt);
11836 #ifdef DEBUG_PUSH
11837 		    xmlGenericError(xmlGenericErrorContext,
11838 			    "PP: entering EOF\n");
11839 #endif
11840 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11841 			ctxt->sax->endDocument(ctxt->userData);
11842 		    goto done;
11843 		}
11844 		break;
11845             case XML_PARSER_DTD: {
11846 	        /*
11847 		 * Sorry but progressive parsing of the internal subset
11848 		 * is not expected to be supported. We first check that
11849 		 * the full content of the internal subset is available and
11850 		 * the parsing is launched only at that point.
11851 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11852 		 * section and not in a ']]>' sequence which are conditional
11853 		 * sections (whoever argued to keep that crap in XML deserve
11854 		 * a place in hell !).
11855 		 */
11856 		int base, i;
11857 		xmlChar *buf;
11858 	        xmlChar quote = 0;
11859                 size_t use;
11860 
11861 		base = ctxt->input->cur - ctxt->input->base;
11862 		if (base < 0) return(0);
11863 		if (ctxt->checkIndex > base)
11864 		    base = ctxt->checkIndex;
11865 		buf = xmlBufContent(ctxt->input->buf->buffer);
11866                 use = xmlBufUse(ctxt->input->buf->buffer);
11867 		for (;(unsigned int) base < use; base++) {
11868 		    if (quote != 0) {
11869 		        if (buf[base] == quote)
11870 			    quote = 0;
11871 			continue;
11872 		    }
11873 		    if ((quote == 0) && (buf[base] == '<')) {
11874 		        int found  = 0;
11875 			/* special handling of comments */
11876 		        if (((unsigned int) base + 4 < use) &&
11877 			    (buf[base + 1] == '!') &&
11878 			    (buf[base + 2] == '-') &&
11879 			    (buf[base + 3] == '-')) {
11880 			    for (;(unsigned int) base + 3 < use; base++) {
11881 				if ((buf[base] == '-') &&
11882 				    (buf[base + 1] == '-') &&
11883 				    (buf[base + 2] == '>')) {
11884 				    found = 1;
11885 				    base += 2;
11886 				    break;
11887 				}
11888 		            }
11889 			    if (!found) {
11890 #if 0
11891 			        fprintf(stderr, "unfinished comment\n");
11892 #endif
11893 			        break; /* for */
11894 		            }
11895 		            continue;
11896 			}
11897 		    }
11898 		    if (buf[base] == '"') {
11899 		        quote = '"';
11900 			continue;
11901 		    }
11902 		    if (buf[base] == '\'') {
11903 		        quote = '\'';
11904 			continue;
11905 		    }
11906 		    if (buf[base] == ']') {
11907 #if 0
11908 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
11909 			        buf[base + 1], buf[base + 2], buf[base + 3]);
11910 #endif
11911 		        if ((unsigned int) base +1 >= use)
11912 			    break;
11913 			if (buf[base + 1] == ']') {
11914 			    /* conditional crap, skip both ']' ! */
11915 			    base++;
11916 			    continue;
11917 			}
11918 		        for (i = 1; (unsigned int) base + i < use; i++) {
11919 			    if (buf[base + i] == '>') {
11920 #if 0
11921 			        fprintf(stderr, "found\n");
11922 #endif
11923 			        goto found_end_int_subset;
11924 			    }
11925 			    if (!IS_BLANK_CH(buf[base + i])) {
11926 #if 0
11927 			        fprintf(stderr, "not found\n");
11928 #endif
11929 			        goto not_end_of_int_subset;
11930 			    }
11931 			}
11932 #if 0
11933 			fprintf(stderr, "end of stream\n");
11934 #endif
11935 		        break;
11936 
11937 		    }
11938 not_end_of_int_subset:
11939                     continue; /* for */
11940 		}
11941 		/*
11942 		 * We didn't found the end of the Internal subset
11943 		 */
11944                 if (quote == 0)
11945                     ctxt->checkIndex = base;
11946                 else
11947                     ctxt->checkIndex = 0;
11948 #ifdef DEBUG_PUSH
11949 		if (next == 0)
11950 		    xmlGenericError(xmlGenericErrorContext,
11951 			    "PP: lookup of int subset end filed\n");
11952 #endif
11953 	        goto done;
11954 
11955 found_end_int_subset:
11956                 ctxt->checkIndex = 0;
11957 		xmlParseInternalSubset(ctxt);
11958 		if (ctxt->instate == XML_PARSER_EOF)
11959 		    goto done;
11960 		ctxt->inSubset = 2;
11961 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11962 		    (ctxt->sax->externalSubset != NULL))
11963 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11964 			    ctxt->extSubSystem, ctxt->extSubURI);
11965 		ctxt->inSubset = 0;
11966 		xmlCleanSpecialAttr(ctxt);
11967 		if (ctxt->instate == XML_PARSER_EOF)
11968 		    goto done;
11969 		ctxt->instate = XML_PARSER_PROLOG;
11970 		ctxt->checkIndex = 0;
11971 #ifdef DEBUG_PUSH
11972 		xmlGenericError(xmlGenericErrorContext,
11973 			"PP: entering PROLOG\n");
11974 #endif
11975                 break;
11976 	    }
11977             case XML_PARSER_COMMENT:
11978 		xmlGenericError(xmlGenericErrorContext,
11979 			"PP: internal error, state == COMMENT\n");
11980 		ctxt->instate = XML_PARSER_CONTENT;
11981 #ifdef DEBUG_PUSH
11982 		xmlGenericError(xmlGenericErrorContext,
11983 			"PP: entering CONTENT\n");
11984 #endif
11985 		break;
11986             case XML_PARSER_IGNORE:
11987 		xmlGenericError(xmlGenericErrorContext,
11988 			"PP: internal error, state == IGNORE");
11989 	        ctxt->instate = XML_PARSER_DTD;
11990 #ifdef DEBUG_PUSH
11991 		xmlGenericError(xmlGenericErrorContext,
11992 			"PP: entering DTD\n");
11993 #endif
11994 	        break;
11995             case XML_PARSER_PI:
11996 		xmlGenericError(xmlGenericErrorContext,
11997 			"PP: internal error, state == PI\n");
11998 		ctxt->instate = XML_PARSER_CONTENT;
11999 #ifdef DEBUG_PUSH
12000 		xmlGenericError(xmlGenericErrorContext,
12001 			"PP: entering CONTENT\n");
12002 #endif
12003 		break;
12004             case XML_PARSER_ENTITY_DECL:
12005 		xmlGenericError(xmlGenericErrorContext,
12006 			"PP: internal error, state == ENTITY_DECL\n");
12007 		ctxt->instate = XML_PARSER_DTD;
12008 #ifdef DEBUG_PUSH
12009 		xmlGenericError(xmlGenericErrorContext,
12010 			"PP: entering DTD\n");
12011 #endif
12012 		break;
12013             case XML_PARSER_ENTITY_VALUE:
12014 		xmlGenericError(xmlGenericErrorContext,
12015 			"PP: internal error, state == ENTITY_VALUE\n");
12016 		ctxt->instate = XML_PARSER_CONTENT;
12017 #ifdef DEBUG_PUSH
12018 		xmlGenericError(xmlGenericErrorContext,
12019 			"PP: entering DTD\n");
12020 #endif
12021 		break;
12022             case XML_PARSER_ATTRIBUTE_VALUE:
12023 		xmlGenericError(xmlGenericErrorContext,
12024 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
12025 		ctxt->instate = XML_PARSER_START_TAG;
12026 #ifdef DEBUG_PUSH
12027 		xmlGenericError(xmlGenericErrorContext,
12028 			"PP: entering START_TAG\n");
12029 #endif
12030 		break;
12031             case XML_PARSER_SYSTEM_LITERAL:
12032 		xmlGenericError(xmlGenericErrorContext,
12033 			"PP: internal error, state == SYSTEM_LITERAL\n");
12034 		ctxt->instate = XML_PARSER_START_TAG;
12035 #ifdef DEBUG_PUSH
12036 		xmlGenericError(xmlGenericErrorContext,
12037 			"PP: entering START_TAG\n");
12038 #endif
12039 		break;
12040             case XML_PARSER_PUBLIC_LITERAL:
12041 		xmlGenericError(xmlGenericErrorContext,
12042 			"PP: internal error, state == PUBLIC_LITERAL\n");
12043 		ctxt->instate = XML_PARSER_START_TAG;
12044 #ifdef DEBUG_PUSH
12045 		xmlGenericError(xmlGenericErrorContext,
12046 			"PP: entering START_TAG\n");
12047 #endif
12048 		break;
12049 	}
12050     }
12051 done:
12052 #ifdef DEBUG_PUSH
12053     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12054 #endif
12055     return(ret);
12056 encoding_error:
12057     {
12058         char buffer[150];
12059 
12060 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12061 			ctxt->input->cur[0], ctxt->input->cur[1],
12062 			ctxt->input->cur[2], ctxt->input->cur[3]);
12063 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12064 		     "Input is not proper UTF-8, indicate encoding !\n%s",
12065 		     BAD_CAST buffer, NULL);
12066     }
12067     return(0);
12068 }
12069 
12070 /**
12071  * xmlParseCheckTransition:
12072  * @ctxt:  an XML parser context
12073  * @chunk:  a char array
12074  * @size:  the size in byte of the chunk
12075  *
12076  * Check depending on the current parser state if the chunk given must be
12077  * processed immediately or one need more data to advance on parsing.
12078  *
12079  * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12080  */
12081 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12082 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12083     if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12084         return(-1);
12085     if (ctxt->instate == XML_PARSER_START_TAG) {
12086         if (memchr(chunk, '>', size) != NULL)
12087             return(1);
12088         return(0);
12089     }
12090     if (ctxt->progressive == XML_PARSER_COMMENT) {
12091         if (memchr(chunk, '>', size) != NULL)
12092             return(1);
12093         return(0);
12094     }
12095     if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12096         if (memchr(chunk, '>', size) != NULL)
12097             return(1);
12098         return(0);
12099     }
12100     if (ctxt->progressive == XML_PARSER_PI) {
12101         if (memchr(chunk, '>', size) != NULL)
12102             return(1);
12103         return(0);
12104     }
12105     if (ctxt->instate == XML_PARSER_END_TAG) {
12106         if (memchr(chunk, '>', size) != NULL)
12107             return(1);
12108         return(0);
12109     }
12110     if ((ctxt->progressive == XML_PARSER_DTD) ||
12111         (ctxt->instate == XML_PARSER_DTD)) {
12112         if (memchr(chunk, '>', size) != NULL)
12113             return(1);
12114         return(0);
12115     }
12116     return(1);
12117 }
12118 
12119 /**
12120  * xmlParseChunk:
12121  * @ctxt:  an XML parser context
12122  * @chunk:  an char array
12123  * @size:  the size in byte of the chunk
12124  * @terminate:  last chunk indicator
12125  *
12126  * Parse a Chunk of memory
12127  *
12128  * Returns zero if no error, the xmlParserErrors otherwise.
12129  */
12130 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12131 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12132               int terminate) {
12133     int end_in_lf = 0;
12134     int remain = 0;
12135     size_t old_avail = 0;
12136     size_t avail = 0;
12137 
12138     if (ctxt == NULL)
12139         return(XML_ERR_INTERNAL_ERROR);
12140     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12141         return(ctxt->errNo);
12142     if (ctxt->instate == XML_PARSER_EOF)
12143         return(-1);
12144     if (ctxt->instate == XML_PARSER_START)
12145         xmlDetectSAX2(ctxt);
12146     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12147         (chunk[size - 1] == '\r')) {
12148 	end_in_lf = 1;
12149 	size--;
12150     }
12151 
12152 xmldecl_done:
12153 
12154     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12155         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12156 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12157 	size_t cur = ctxt->input->cur - ctxt->input->base;
12158 	int res;
12159 
12160         old_avail = xmlBufUse(ctxt->input->buf->buffer);
12161         /*
12162          * Specific handling if we autodetected an encoding, we should not
12163          * push more than the first line ... which depend on the encoding
12164          * And only push the rest once the final encoding was detected
12165          */
12166         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12167             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12168             unsigned int len = 45;
12169 
12170             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12171                                BAD_CAST "UTF-16")) ||
12172                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12173                                BAD_CAST "UTF16")))
12174                 len = 90;
12175             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12176                                     BAD_CAST "UCS-4")) ||
12177                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12178                                     BAD_CAST "UCS4")))
12179                 len = 180;
12180 
12181             if (ctxt->input->buf->rawconsumed < len)
12182                 len -= ctxt->input->buf->rawconsumed;
12183 
12184             /*
12185              * Change size for reading the initial declaration only
12186              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12187              * will blindly copy extra bytes from memory.
12188              */
12189             if ((unsigned int) size > len) {
12190                 remain = size - len;
12191                 size = len;
12192             } else {
12193                 remain = 0;
12194             }
12195         }
12196 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12197 	if (res < 0) {
12198 	    ctxt->errNo = XML_PARSER_EOF;
12199 	    xmlHaltParser(ctxt);
12200 	    return (XML_PARSER_EOF);
12201 	}
12202         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12203 #ifdef DEBUG_PUSH
12204 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12205 #endif
12206 
12207     } else if (ctxt->instate != XML_PARSER_EOF) {
12208 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12209 	    xmlParserInputBufferPtr in = ctxt->input->buf;
12210 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
12211 		    (in->raw != NULL)) {
12212 		int nbchars;
12213 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12214 		size_t current = ctxt->input->cur - ctxt->input->base;
12215 
12216 		nbchars = xmlCharEncInput(in, terminate);
12217 		if (nbchars < 0) {
12218 		    /* TODO 2.6.0 */
12219 		    xmlGenericError(xmlGenericErrorContext,
12220 				    "xmlParseChunk: encoder error\n");
12221                     xmlHaltParser(ctxt);
12222 		    return(XML_ERR_INVALID_ENCODING);
12223 		}
12224 		xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12225 	    }
12226 	}
12227     }
12228     if (remain != 0) {
12229         xmlParseTryOrFinish(ctxt, 0);
12230     } else {
12231         if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12232             avail = xmlBufUse(ctxt->input->buf->buffer);
12233         /*
12234          * Depending on the current state it may not be such
12235          * a good idea to try parsing if there is nothing in the chunk
12236          * which would be worth doing a parser state transition and we
12237          * need to wait for more data
12238          */
12239         if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12240             (old_avail == 0) || (avail == 0) ||
12241             (xmlParseCheckTransition(ctxt,
12242                        (const char *)&ctxt->input->base[old_avail],
12243                                      avail - old_avail)))
12244             xmlParseTryOrFinish(ctxt, terminate);
12245     }
12246     if (ctxt->instate == XML_PARSER_EOF)
12247         return(ctxt->errNo);
12248 
12249     if ((ctxt->input != NULL) &&
12250          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12251          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12252         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12253         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12254         xmlHaltParser(ctxt);
12255     }
12256     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12257         return(ctxt->errNo);
12258 
12259     if (remain != 0) {
12260         chunk += size;
12261         size = remain;
12262         remain = 0;
12263         goto xmldecl_done;
12264     }
12265     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12266         (ctxt->input->buf != NULL)) {
12267 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12268 					 ctxt->input);
12269 	size_t current = ctxt->input->cur - ctxt->input->base;
12270 
12271 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12272 
12273 	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12274 			      base, current);
12275     }
12276     if (terminate) {
12277 	/*
12278 	 * Check for termination
12279 	 */
12280 	int cur_avail = 0;
12281 
12282 	if (ctxt->input != NULL) {
12283 	    if (ctxt->input->buf == NULL)
12284 		cur_avail = ctxt->input->length -
12285 			    (ctxt->input->cur - ctxt->input->base);
12286 	    else
12287 		cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12288 			              (ctxt->input->cur - ctxt->input->base);
12289 	}
12290 
12291 	if ((ctxt->instate != XML_PARSER_EOF) &&
12292 	    (ctxt->instate != XML_PARSER_EPILOG)) {
12293 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12294 	}
12295 	if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12296 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12297 	}
12298 	if (ctxt->instate != XML_PARSER_EOF) {
12299 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12300 		ctxt->sax->endDocument(ctxt->userData);
12301 	}
12302 	ctxt->instate = XML_PARSER_EOF;
12303     }
12304     if (ctxt->wellFormed == 0)
12305 	return((xmlParserErrors) ctxt->errNo);
12306     else
12307         return(0);
12308 }
12309 
12310 /************************************************************************
12311  *									*
12312  *		I/O front end functions to the parser			*
12313  *									*
12314  ************************************************************************/
12315 
12316 /**
12317  * xmlCreatePushParserCtxt:
12318  * @sax:  a SAX handler
12319  * @user_data:  The user data returned on SAX callbacks
12320  * @chunk:  a pointer to an array of chars
12321  * @size:  number of chars in the array
12322  * @filename:  an optional file name or URI
12323  *
12324  * Create a parser context for using the XML parser in push mode.
12325  * If @buffer and @size are non-NULL, the data is used to detect
12326  * the encoding.  The remaining characters will be parsed so they
12327  * don't need to be fed in again through xmlParseChunk.
12328  * To allow content encoding detection, @size should be >= 4
12329  * The value of @filename is used for fetching external entities
12330  * and error/warning reports.
12331  *
12332  * Returns the new parser context or NULL
12333  */
12334 
12335 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12336 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12337                         const char *chunk, int size, const char *filename) {
12338     xmlParserCtxtPtr ctxt;
12339     xmlParserInputPtr inputStream;
12340     xmlParserInputBufferPtr buf;
12341     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12342 
12343     /*
12344      * plug some encoding conversion routines
12345      */
12346     if ((chunk != NULL) && (size >= 4))
12347 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12348 
12349     buf = xmlAllocParserInputBuffer(enc);
12350     if (buf == NULL) return(NULL);
12351 
12352     ctxt = xmlNewParserCtxt();
12353     if (ctxt == NULL) {
12354         xmlErrMemory(NULL, "creating parser: out of memory\n");
12355 	xmlFreeParserInputBuffer(buf);
12356 	return(NULL);
12357     }
12358     ctxt->dictNames = 1;
12359     ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12360     if (ctxt->pushTab == NULL) {
12361         xmlErrMemory(ctxt, NULL);
12362 	xmlFreeParserInputBuffer(buf);
12363 	xmlFreeParserCtxt(ctxt);
12364 	return(NULL);
12365     }
12366     if (sax != NULL) {
12367 #ifdef LIBXML_SAX1_ENABLED
12368 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12369 #endif /* LIBXML_SAX1_ENABLED */
12370 	    xmlFree(ctxt->sax);
12371 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12372 	if (ctxt->sax == NULL) {
12373 	    xmlErrMemory(ctxt, NULL);
12374 	    xmlFreeParserInputBuffer(buf);
12375 	    xmlFreeParserCtxt(ctxt);
12376 	    return(NULL);
12377 	}
12378 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12379 	if (sax->initialized == XML_SAX2_MAGIC)
12380 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12381 	else
12382 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12383 	if (user_data != NULL)
12384 	    ctxt->userData = user_data;
12385     }
12386     if (filename == NULL) {
12387 	ctxt->directory = NULL;
12388     } else {
12389         ctxt->directory = xmlParserGetDirectory(filename);
12390     }
12391 
12392     inputStream = xmlNewInputStream(ctxt);
12393     if (inputStream == NULL) {
12394 	xmlFreeParserCtxt(ctxt);
12395 	xmlFreeParserInputBuffer(buf);
12396 	return(NULL);
12397     }
12398 
12399     if (filename == NULL)
12400 	inputStream->filename = NULL;
12401     else {
12402 	inputStream->filename = (char *)
12403 	    xmlCanonicPath((const xmlChar *) filename);
12404 	if (inputStream->filename == NULL) {
12405 	    xmlFreeParserCtxt(ctxt);
12406 	    xmlFreeParserInputBuffer(buf);
12407 	    return(NULL);
12408 	}
12409     }
12410     inputStream->buf = buf;
12411     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12412     inputPush(ctxt, inputStream);
12413 
12414     /*
12415      * If the caller didn't provide an initial 'chunk' for determining
12416      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12417      * that it can be automatically determined later
12418      */
12419     if ((size == 0) || (chunk == NULL)) {
12420 	ctxt->charset = XML_CHAR_ENCODING_NONE;
12421     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12422 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12423 	size_t cur = ctxt->input->cur - ctxt->input->base;
12424 
12425 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12426 
12427         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12428 #ifdef DEBUG_PUSH
12429 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12430 #endif
12431     }
12432 
12433     if (enc != XML_CHAR_ENCODING_NONE) {
12434         xmlSwitchEncoding(ctxt, enc);
12435     }
12436 
12437     return(ctxt);
12438 }
12439 #endif /* LIBXML_PUSH_ENABLED */
12440 
12441 /**
12442  * xmlHaltParser:
12443  * @ctxt:  an XML parser context
12444  *
12445  * Blocks further parser processing don't override error
12446  * for internal use
12447  */
12448 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12449 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12450     if (ctxt == NULL)
12451         return;
12452     ctxt->instate = XML_PARSER_EOF;
12453     ctxt->disableSAX = 1;
12454     while (ctxt->inputNr > 1)
12455         xmlFreeInputStream(inputPop(ctxt));
12456     if (ctxt->input != NULL) {
12457         /*
12458 	 * in case there was a specific allocation deallocate before
12459 	 * overriding base
12460 	 */
12461         if (ctxt->input->free != NULL) {
12462 	    ctxt->input->free((xmlChar *) ctxt->input->base);
12463 	    ctxt->input->free = NULL;
12464 	}
12465         if (ctxt->input->buf != NULL) {
12466             xmlFreeParserInputBuffer(ctxt->input->buf);
12467             ctxt->input->buf = NULL;
12468         }
12469 	ctxt->input->cur = BAD_CAST"";
12470         ctxt->input->length = 0;
12471 	ctxt->input->base = ctxt->input->cur;
12472         ctxt->input->end = ctxt->input->cur;
12473     }
12474 }
12475 
12476 /**
12477  * xmlStopParser:
12478  * @ctxt:  an XML parser context
12479  *
12480  * Blocks further parser processing
12481  */
12482 void
xmlStopParser(xmlParserCtxtPtr ctxt)12483 xmlStopParser(xmlParserCtxtPtr ctxt) {
12484     if (ctxt == NULL)
12485         return;
12486     xmlHaltParser(ctxt);
12487     ctxt->errNo = XML_ERR_USER_STOP;
12488 }
12489 
12490 /**
12491  * xmlCreateIOParserCtxt:
12492  * @sax:  a SAX handler
12493  * @user_data:  The user data returned on SAX callbacks
12494  * @ioread:  an I/O read function
12495  * @ioclose:  an I/O close function
12496  * @ioctx:  an I/O handler
12497  * @enc:  the charset encoding if known
12498  *
12499  * Create a parser context for using the XML parser with an existing
12500  * I/O stream
12501  *
12502  * Returns the new parser context or NULL
12503  */
12504 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12505 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12506 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12507 	void *ioctx, xmlCharEncoding enc) {
12508     xmlParserCtxtPtr ctxt;
12509     xmlParserInputPtr inputStream;
12510     xmlParserInputBufferPtr buf;
12511 
12512     if (ioread == NULL) return(NULL);
12513 
12514     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12515     if (buf == NULL) {
12516         if (ioclose != NULL)
12517             ioclose(ioctx);
12518         return (NULL);
12519     }
12520 
12521     ctxt = xmlNewParserCtxt();
12522     if (ctxt == NULL) {
12523 	xmlFreeParserInputBuffer(buf);
12524 	return(NULL);
12525     }
12526     if (sax != NULL) {
12527 #ifdef LIBXML_SAX1_ENABLED
12528 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12529 #endif /* LIBXML_SAX1_ENABLED */
12530 	    xmlFree(ctxt->sax);
12531 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12532 	if (ctxt->sax == NULL) {
12533 	    xmlErrMemory(ctxt, NULL);
12534 	    xmlFreeParserCtxt(ctxt);
12535 	    return(NULL);
12536 	}
12537 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12538 	if (sax->initialized == XML_SAX2_MAGIC)
12539 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12540 	else
12541 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12542 	if (user_data != NULL)
12543 	    ctxt->userData = user_data;
12544     }
12545 
12546     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12547     if (inputStream == NULL) {
12548 	xmlFreeParserCtxt(ctxt);
12549 	return(NULL);
12550     }
12551     inputPush(ctxt, inputStream);
12552 
12553     return(ctxt);
12554 }
12555 
12556 #ifdef LIBXML_VALID_ENABLED
12557 /************************************************************************
12558  *									*
12559  *		Front ends when parsing a DTD				*
12560  *									*
12561  ************************************************************************/
12562 
12563 /**
12564  * xmlIOParseDTD:
12565  * @sax:  the SAX handler block or NULL
12566  * @input:  an Input Buffer
12567  * @enc:  the charset encoding if known
12568  *
12569  * Load and parse a DTD
12570  *
12571  * Returns the resulting xmlDtdPtr or NULL in case of error.
12572  * @input will be freed by the function in any case.
12573  */
12574 
12575 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12576 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12577 	      xmlCharEncoding enc) {
12578     xmlDtdPtr ret = NULL;
12579     xmlParserCtxtPtr ctxt;
12580     xmlParserInputPtr pinput = NULL;
12581     xmlChar start[4];
12582 
12583     if (input == NULL)
12584 	return(NULL);
12585 
12586     ctxt = xmlNewParserCtxt();
12587     if (ctxt == NULL) {
12588         xmlFreeParserInputBuffer(input);
12589 	return(NULL);
12590     }
12591 
12592     /* We are loading a DTD */
12593     ctxt->options |= XML_PARSE_DTDLOAD;
12594 
12595     /*
12596      * Set-up the SAX context
12597      */
12598     if (sax != NULL) {
12599 	if (ctxt->sax != NULL)
12600 	    xmlFree(ctxt->sax);
12601         ctxt->sax = sax;
12602         ctxt->userData = ctxt;
12603     }
12604     xmlDetectSAX2(ctxt);
12605 
12606     /*
12607      * generate a parser input from the I/O handler
12608      */
12609 
12610     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12611     if (pinput == NULL) {
12612         if (sax != NULL) ctxt->sax = NULL;
12613         xmlFreeParserInputBuffer(input);
12614 	xmlFreeParserCtxt(ctxt);
12615 	return(NULL);
12616     }
12617 
12618     /*
12619      * plug some encoding conversion routines here.
12620      */
12621     if (xmlPushInput(ctxt, pinput) < 0) {
12622         if (sax != NULL) ctxt->sax = NULL;
12623 	xmlFreeParserCtxt(ctxt);
12624 	return(NULL);
12625     }
12626     if (enc != XML_CHAR_ENCODING_NONE) {
12627         xmlSwitchEncoding(ctxt, enc);
12628     }
12629 
12630     pinput->filename = NULL;
12631     pinput->line = 1;
12632     pinput->col = 1;
12633     pinput->base = ctxt->input->cur;
12634     pinput->cur = ctxt->input->cur;
12635     pinput->free = NULL;
12636 
12637     /*
12638      * let's parse that entity knowing it's an external subset.
12639      */
12640     ctxt->inSubset = 2;
12641     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12642     if (ctxt->myDoc == NULL) {
12643 	xmlErrMemory(ctxt, "New Doc failed");
12644 	return(NULL);
12645     }
12646     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12647     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12648 	                               BAD_CAST "none", BAD_CAST "none");
12649 
12650     if ((enc == XML_CHAR_ENCODING_NONE) &&
12651         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12652 	/*
12653 	 * Get the 4 first bytes and decode the charset
12654 	 * if enc != XML_CHAR_ENCODING_NONE
12655 	 * plug some encoding conversion routines.
12656 	 */
12657 	start[0] = RAW;
12658 	start[1] = NXT(1);
12659 	start[2] = NXT(2);
12660 	start[3] = NXT(3);
12661 	enc = xmlDetectCharEncoding(start, 4);
12662 	if (enc != XML_CHAR_ENCODING_NONE) {
12663 	    xmlSwitchEncoding(ctxt, enc);
12664 	}
12665     }
12666 
12667     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12668 
12669     if (ctxt->myDoc != NULL) {
12670 	if (ctxt->wellFormed) {
12671 	    ret = ctxt->myDoc->extSubset;
12672 	    ctxt->myDoc->extSubset = NULL;
12673 	    if (ret != NULL) {
12674 		xmlNodePtr tmp;
12675 
12676 		ret->doc = NULL;
12677 		tmp = ret->children;
12678 		while (tmp != NULL) {
12679 		    tmp->doc = NULL;
12680 		    tmp = tmp->next;
12681 		}
12682 	    }
12683 	} else {
12684 	    ret = NULL;
12685 	}
12686         xmlFreeDoc(ctxt->myDoc);
12687         ctxt->myDoc = NULL;
12688     }
12689     if (sax != NULL) ctxt->sax = NULL;
12690     xmlFreeParserCtxt(ctxt);
12691 
12692     return(ret);
12693 }
12694 
12695 /**
12696  * xmlSAXParseDTD:
12697  * @sax:  the SAX handler block
12698  * @ExternalID:  a NAME* containing the External ID of the DTD
12699  * @SystemID:  a NAME* containing the URL to the DTD
12700  *
12701  * Load and parse an external subset.
12702  *
12703  * Returns the resulting xmlDtdPtr or NULL in case of error.
12704  */
12705 
12706 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12707 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12708                           const xmlChar *SystemID) {
12709     xmlDtdPtr ret = NULL;
12710     xmlParserCtxtPtr ctxt;
12711     xmlParserInputPtr input = NULL;
12712     xmlCharEncoding enc;
12713     xmlChar* systemIdCanonic;
12714 
12715     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12716 
12717     ctxt = xmlNewParserCtxt();
12718     if (ctxt == NULL) {
12719 	return(NULL);
12720     }
12721 
12722     /* We are loading a DTD */
12723     ctxt->options |= XML_PARSE_DTDLOAD;
12724 
12725     /*
12726      * Set-up the SAX context
12727      */
12728     if (sax != NULL) {
12729 	if (ctxt->sax != NULL)
12730 	    xmlFree(ctxt->sax);
12731         ctxt->sax = sax;
12732         ctxt->userData = ctxt;
12733     }
12734 
12735     /*
12736      * Canonicalise the system ID
12737      */
12738     systemIdCanonic = xmlCanonicPath(SystemID);
12739     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12740 	xmlFreeParserCtxt(ctxt);
12741 	return(NULL);
12742     }
12743 
12744     /*
12745      * Ask the Entity resolver to load the damn thing
12746      */
12747 
12748     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12749 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12750 	                                 systemIdCanonic);
12751     if (input == NULL) {
12752         if (sax != NULL) ctxt->sax = NULL;
12753 	xmlFreeParserCtxt(ctxt);
12754 	if (systemIdCanonic != NULL)
12755 	    xmlFree(systemIdCanonic);
12756 	return(NULL);
12757     }
12758 
12759     /*
12760      * plug some encoding conversion routines here.
12761      */
12762     if (xmlPushInput(ctxt, input) < 0) {
12763         if (sax != NULL) ctxt->sax = NULL;
12764 	xmlFreeParserCtxt(ctxt);
12765 	if (systemIdCanonic != NULL)
12766 	    xmlFree(systemIdCanonic);
12767 	return(NULL);
12768     }
12769     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12770 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12771 	xmlSwitchEncoding(ctxt, enc);
12772     }
12773 
12774     if (input->filename == NULL)
12775 	input->filename = (char *) systemIdCanonic;
12776     else
12777 	xmlFree(systemIdCanonic);
12778     input->line = 1;
12779     input->col = 1;
12780     input->base = ctxt->input->cur;
12781     input->cur = ctxt->input->cur;
12782     input->free = NULL;
12783 
12784     /*
12785      * let's parse that entity knowing it's an external subset.
12786      */
12787     ctxt->inSubset = 2;
12788     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12789     if (ctxt->myDoc == NULL) {
12790 	xmlErrMemory(ctxt, "New Doc failed");
12791         if (sax != NULL) ctxt->sax = NULL;
12792 	xmlFreeParserCtxt(ctxt);
12793 	return(NULL);
12794     }
12795     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12796     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12797 	                               ExternalID, SystemID);
12798     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12799 
12800     if (ctxt->myDoc != NULL) {
12801 	if (ctxt->wellFormed) {
12802 	    ret = ctxt->myDoc->extSubset;
12803 	    ctxt->myDoc->extSubset = NULL;
12804 	    if (ret != NULL) {
12805 		xmlNodePtr tmp;
12806 
12807 		ret->doc = NULL;
12808 		tmp = ret->children;
12809 		while (tmp != NULL) {
12810 		    tmp->doc = NULL;
12811 		    tmp = tmp->next;
12812 		}
12813 	    }
12814 	} else {
12815 	    ret = NULL;
12816 	}
12817         xmlFreeDoc(ctxt->myDoc);
12818         ctxt->myDoc = NULL;
12819     }
12820     if (sax != NULL) ctxt->sax = NULL;
12821     xmlFreeParserCtxt(ctxt);
12822 
12823     return(ret);
12824 }
12825 
12826 
12827 /**
12828  * xmlParseDTD:
12829  * @ExternalID:  a NAME* containing the External ID of the DTD
12830  * @SystemID:  a NAME* containing the URL to the DTD
12831  *
12832  * Load and parse an external subset.
12833  *
12834  * Returns the resulting xmlDtdPtr or NULL in case of error.
12835  */
12836 
12837 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12838 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12839     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12840 }
12841 #endif /* LIBXML_VALID_ENABLED */
12842 
12843 /************************************************************************
12844  *									*
12845  *		Front ends when parsing an Entity			*
12846  *									*
12847  ************************************************************************/
12848 
12849 /**
12850  * xmlParseCtxtExternalEntity:
12851  * @ctx:  the existing parsing context
12852  * @URL:  the URL for the entity to load
12853  * @ID:  the System ID for the entity to load
12854  * @lst:  the return value for the set of parsed nodes
12855  *
12856  * Parse an external general entity within an existing parsing context
12857  * An external general parsed entity is well-formed if it matches the
12858  * production labeled extParsedEnt.
12859  *
12860  * [78] extParsedEnt ::= TextDecl? content
12861  *
12862  * Returns 0 if the entity is well formed, -1 in case of args problem and
12863  *    the parser error code otherwise
12864  */
12865 
12866 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12867 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12868 	               const xmlChar *ID, xmlNodePtr *lst) {
12869     xmlParserCtxtPtr ctxt;
12870     xmlDocPtr newDoc;
12871     xmlNodePtr newRoot;
12872     xmlSAXHandlerPtr oldsax = NULL;
12873     int ret = 0;
12874     xmlChar start[4];
12875     xmlCharEncoding enc;
12876 
12877     if (ctx == NULL) return(-1);
12878 
12879     if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12880         (ctx->depth > 1024)) {
12881 	return(XML_ERR_ENTITY_LOOP);
12882     }
12883 
12884     if (lst != NULL)
12885         *lst = NULL;
12886     if ((URL == NULL) && (ID == NULL))
12887 	return(-1);
12888     if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12889 	return(-1);
12890 
12891     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12892     if (ctxt == NULL) {
12893 	return(-1);
12894     }
12895 
12896     oldsax = ctxt->sax;
12897     ctxt->sax = ctx->sax;
12898     xmlDetectSAX2(ctxt);
12899     newDoc = xmlNewDoc(BAD_CAST "1.0");
12900     if (newDoc == NULL) {
12901 	xmlFreeParserCtxt(ctxt);
12902 	return(-1);
12903     }
12904     newDoc->properties = XML_DOC_INTERNAL;
12905     if (ctx->myDoc->dict) {
12906 	newDoc->dict = ctx->myDoc->dict;
12907 	xmlDictReference(newDoc->dict);
12908     }
12909     if (ctx->myDoc != NULL) {
12910 	newDoc->intSubset = ctx->myDoc->intSubset;
12911 	newDoc->extSubset = ctx->myDoc->extSubset;
12912     }
12913     if (ctx->myDoc->URL != NULL) {
12914 	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12915     }
12916     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12917     if (newRoot == NULL) {
12918 	ctxt->sax = oldsax;
12919 	xmlFreeParserCtxt(ctxt);
12920 	newDoc->intSubset = NULL;
12921 	newDoc->extSubset = NULL;
12922         xmlFreeDoc(newDoc);
12923 	return(-1);
12924     }
12925     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12926     nodePush(ctxt, newDoc->children);
12927     if (ctx->myDoc == NULL) {
12928 	ctxt->myDoc = newDoc;
12929     } else {
12930 	ctxt->myDoc = ctx->myDoc;
12931 	newDoc->children->doc = ctx->myDoc;
12932     }
12933 
12934     /*
12935      * Get the 4 first bytes and decode the charset
12936      * if enc != XML_CHAR_ENCODING_NONE
12937      * plug some encoding conversion routines.
12938      */
12939     GROW
12940     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12941 	start[0] = RAW;
12942 	start[1] = NXT(1);
12943 	start[2] = NXT(2);
12944 	start[3] = NXT(3);
12945 	enc = xmlDetectCharEncoding(start, 4);
12946 	if (enc != XML_CHAR_ENCODING_NONE) {
12947 	    xmlSwitchEncoding(ctxt, enc);
12948 	}
12949     }
12950 
12951     /*
12952      * Parse a possible text declaration first
12953      */
12954     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12955 	xmlParseTextDecl(ctxt);
12956 	/*
12957 	 * An XML-1.0 document can't reference an entity not XML-1.0
12958 	 */
12959 	if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12960 	    (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12961 	    xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12962 	                   "Version mismatch between document and entity\n");
12963 	}
12964     }
12965 
12966     /*
12967      * If the user provided its own SAX callbacks then reuse the
12968      * useData callback field, otherwise the expected setup in a
12969      * DOM builder is to have userData == ctxt
12970      */
12971     if (ctx->userData == ctx)
12972         ctxt->userData = ctxt;
12973     else
12974         ctxt->userData = ctx->userData;
12975 
12976     /*
12977      * Doing validity checking on chunk doesn't make sense
12978      */
12979     ctxt->instate = XML_PARSER_CONTENT;
12980     ctxt->validate = ctx->validate;
12981     ctxt->valid = ctx->valid;
12982     ctxt->loadsubset = ctx->loadsubset;
12983     ctxt->depth = ctx->depth + 1;
12984     ctxt->replaceEntities = ctx->replaceEntities;
12985     if (ctxt->validate) {
12986 	ctxt->vctxt.error = ctx->vctxt.error;
12987 	ctxt->vctxt.warning = ctx->vctxt.warning;
12988     } else {
12989 	ctxt->vctxt.error = NULL;
12990 	ctxt->vctxt.warning = NULL;
12991     }
12992     ctxt->vctxt.nodeTab = NULL;
12993     ctxt->vctxt.nodeNr = 0;
12994     ctxt->vctxt.nodeMax = 0;
12995     ctxt->vctxt.node = NULL;
12996     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12997     ctxt->dict = ctx->dict;
12998     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12999     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13000     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13001     ctxt->dictNames = ctx->dictNames;
13002     ctxt->attsDefault = ctx->attsDefault;
13003     ctxt->attsSpecial = ctx->attsSpecial;
13004     ctxt->linenumbers = ctx->linenumbers;
13005 
13006     xmlParseContent(ctxt);
13007 
13008     ctx->validate = ctxt->validate;
13009     ctx->valid = ctxt->valid;
13010     if ((RAW == '<') && (NXT(1) == '/')) {
13011 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13012     } else if (RAW != 0) {
13013 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13014     }
13015     if (ctxt->node != newDoc->children) {
13016 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13017     }
13018 
13019     if (!ctxt->wellFormed) {
13020         if (ctxt->errNo == 0)
13021 	    ret = 1;
13022 	else
13023 	    ret = ctxt->errNo;
13024     } else {
13025 	if (lst != NULL) {
13026 	    xmlNodePtr cur;
13027 
13028 	    /*
13029 	     * Return the newly created nodeset after unlinking it from
13030 	     * they pseudo parent.
13031 	     */
13032 	    cur = newDoc->children->children;
13033 	    *lst = cur;
13034 	    while (cur != NULL) {
13035 		cur->parent = NULL;
13036 		cur = cur->next;
13037 	    }
13038             newDoc->children->children = NULL;
13039 	}
13040 	ret = 0;
13041     }
13042     ctxt->sax = oldsax;
13043     ctxt->dict = NULL;
13044     ctxt->attsDefault = NULL;
13045     ctxt->attsSpecial = NULL;
13046     xmlFreeParserCtxt(ctxt);
13047     newDoc->intSubset = NULL;
13048     newDoc->extSubset = NULL;
13049     xmlFreeDoc(newDoc);
13050 
13051     return(ret);
13052 }
13053 
13054 /**
13055  * xmlParseExternalEntityPrivate:
13056  * @doc:  the document the chunk pertains to
13057  * @oldctxt:  the previous parser context if available
13058  * @sax:  the SAX handler bloc (possibly NULL)
13059  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13060  * @depth:  Used for loop detection, use 0
13061  * @URL:  the URL for the entity to load
13062  * @ID:  the System ID for the entity to load
13063  * @list:  the return value for the set of parsed nodes
13064  *
13065  * Private version of xmlParseExternalEntity()
13066  *
13067  * Returns 0 if the entity is well formed, -1 in case of args problem and
13068  *    the parser error code otherwise
13069  */
13070 
13071 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13072 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13073 	              xmlSAXHandlerPtr sax,
13074 		      void *user_data, int depth, const xmlChar *URL,
13075 		      const xmlChar *ID, xmlNodePtr *list) {
13076     xmlParserCtxtPtr ctxt;
13077     xmlDocPtr newDoc;
13078     xmlNodePtr newRoot;
13079     xmlSAXHandlerPtr oldsax = NULL;
13080     xmlParserErrors ret = XML_ERR_OK;
13081     xmlChar start[4];
13082     xmlCharEncoding enc;
13083 
13084     if (((depth > 40) &&
13085 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13086 	(depth > 1024)) {
13087 	return(XML_ERR_ENTITY_LOOP);
13088     }
13089 
13090     if (list != NULL)
13091         *list = NULL;
13092     if ((URL == NULL) && (ID == NULL))
13093 	return(XML_ERR_INTERNAL_ERROR);
13094     if (doc == NULL)
13095 	return(XML_ERR_INTERNAL_ERROR);
13096 
13097 
13098     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13099     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13100     ctxt->userData = ctxt;
13101     if (oldctxt != NULL) {
13102 	ctxt->_private = oldctxt->_private;
13103 	ctxt->loadsubset = oldctxt->loadsubset;
13104 	ctxt->validate = oldctxt->validate;
13105 	ctxt->external = oldctxt->external;
13106 	ctxt->record_info = oldctxt->record_info;
13107 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13108 	ctxt->node_seq.length = oldctxt->node_seq.length;
13109 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13110     } else {
13111 	/*
13112 	 * Doing validity checking on chunk without context
13113 	 * doesn't make sense
13114 	 */
13115 	ctxt->_private = NULL;
13116 	ctxt->validate = 0;
13117 	ctxt->external = 2;
13118 	ctxt->loadsubset = 0;
13119     }
13120     if (sax != NULL) {
13121 	oldsax = ctxt->sax;
13122         ctxt->sax = sax;
13123 	if (user_data != NULL)
13124 	    ctxt->userData = user_data;
13125     }
13126     xmlDetectSAX2(ctxt);
13127     newDoc = xmlNewDoc(BAD_CAST "1.0");
13128     if (newDoc == NULL) {
13129 	ctxt->node_seq.maximum = 0;
13130 	ctxt->node_seq.length = 0;
13131 	ctxt->node_seq.buffer = NULL;
13132 	xmlFreeParserCtxt(ctxt);
13133 	return(XML_ERR_INTERNAL_ERROR);
13134     }
13135     newDoc->properties = XML_DOC_INTERNAL;
13136     newDoc->intSubset = doc->intSubset;
13137     newDoc->extSubset = doc->extSubset;
13138     newDoc->dict = doc->dict;
13139     xmlDictReference(newDoc->dict);
13140 
13141     if (doc->URL != NULL) {
13142 	newDoc->URL = xmlStrdup(doc->URL);
13143     }
13144     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13145     if (newRoot == NULL) {
13146 	if (sax != NULL)
13147 	    ctxt->sax = oldsax;
13148 	ctxt->node_seq.maximum = 0;
13149 	ctxt->node_seq.length = 0;
13150 	ctxt->node_seq.buffer = NULL;
13151 	xmlFreeParserCtxt(ctxt);
13152 	newDoc->intSubset = NULL;
13153 	newDoc->extSubset = NULL;
13154         xmlFreeDoc(newDoc);
13155 	return(XML_ERR_INTERNAL_ERROR);
13156     }
13157     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13158     nodePush(ctxt, newDoc->children);
13159     ctxt->myDoc = doc;
13160     newRoot->doc = doc;
13161 
13162     /*
13163      * Get the 4 first bytes and decode the charset
13164      * if enc != XML_CHAR_ENCODING_NONE
13165      * plug some encoding conversion routines.
13166      */
13167     GROW;
13168     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13169 	start[0] = RAW;
13170 	start[1] = NXT(1);
13171 	start[2] = NXT(2);
13172 	start[3] = NXT(3);
13173 	enc = xmlDetectCharEncoding(start, 4);
13174 	if (enc != XML_CHAR_ENCODING_NONE) {
13175 	    xmlSwitchEncoding(ctxt, enc);
13176 	}
13177     }
13178 
13179     /*
13180      * Parse a possible text declaration first
13181      */
13182     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13183 	xmlParseTextDecl(ctxt);
13184     }
13185 
13186     ctxt->instate = XML_PARSER_CONTENT;
13187     ctxt->depth = depth;
13188 
13189     xmlParseContent(ctxt);
13190 
13191     if ((RAW == '<') && (NXT(1) == '/')) {
13192 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13193     } else if (RAW != 0) {
13194 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13195     }
13196     if (ctxt->node != newDoc->children) {
13197 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13198     }
13199 
13200     if (!ctxt->wellFormed) {
13201         if (ctxt->errNo == 0)
13202 	    ret = XML_ERR_INTERNAL_ERROR;
13203 	else
13204 	    ret = (xmlParserErrors)ctxt->errNo;
13205     } else {
13206 	if (list != NULL) {
13207 	    xmlNodePtr cur;
13208 
13209 	    /*
13210 	     * Return the newly created nodeset after unlinking it from
13211 	     * they pseudo parent.
13212 	     */
13213 	    cur = newDoc->children->children;
13214 	    *list = cur;
13215 	    while (cur != NULL) {
13216 		cur->parent = NULL;
13217 		cur = cur->next;
13218 	    }
13219             newDoc->children->children = NULL;
13220 	}
13221 	ret = XML_ERR_OK;
13222     }
13223 
13224     /*
13225      * Record in the parent context the number of entities replacement
13226      * done when parsing that reference.
13227      */
13228     if (oldctxt != NULL)
13229         oldctxt->nbentities += ctxt->nbentities;
13230 
13231     /*
13232      * Also record the size of the entity parsed
13233      */
13234     if (ctxt->input != NULL && oldctxt != NULL) {
13235 	oldctxt->sizeentities += ctxt->input->consumed;
13236 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13237     }
13238     /*
13239      * And record the last error if any
13240      */
13241     if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13242         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13243 
13244     if (sax != NULL)
13245 	ctxt->sax = oldsax;
13246     if (oldctxt != NULL) {
13247         oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13248         oldctxt->node_seq.length = ctxt->node_seq.length;
13249         oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13250     }
13251     ctxt->node_seq.maximum = 0;
13252     ctxt->node_seq.length = 0;
13253     ctxt->node_seq.buffer = NULL;
13254     xmlFreeParserCtxt(ctxt);
13255     newDoc->intSubset = NULL;
13256     newDoc->extSubset = NULL;
13257     xmlFreeDoc(newDoc);
13258 
13259     return(ret);
13260 }
13261 
13262 #ifdef LIBXML_SAX1_ENABLED
13263 /**
13264  * xmlParseExternalEntity:
13265  * @doc:  the document the chunk pertains to
13266  * @sax:  the SAX handler bloc (possibly NULL)
13267  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13268  * @depth:  Used for loop detection, use 0
13269  * @URL:  the URL for the entity to load
13270  * @ID:  the System ID for the entity to load
13271  * @lst:  the return value for the set of parsed nodes
13272  *
13273  * Parse an external general entity
13274  * An external general parsed entity is well-formed if it matches the
13275  * production labeled extParsedEnt.
13276  *
13277  * [78] extParsedEnt ::= TextDecl? content
13278  *
13279  * Returns 0 if the entity is well formed, -1 in case of args problem and
13280  *    the parser error code otherwise
13281  */
13282 
13283 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13284 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13285 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13286     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13287 		                       ID, lst));
13288 }
13289 
13290 /**
13291  * xmlParseBalancedChunkMemory:
13292  * @doc:  the document the chunk pertains to
13293  * @sax:  the SAX handler bloc (possibly NULL)
13294  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13295  * @depth:  Used for loop detection, use 0
13296  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13297  * @lst:  the return value for the set of parsed nodes
13298  *
13299  * Parse a well-balanced chunk of an XML document
13300  * called by the parser
13301  * The allowed sequence for the Well Balanced Chunk is the one defined by
13302  * the content production in the XML grammar:
13303  *
13304  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13305  *
13306  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13307  *    the parser error code otherwise
13308  */
13309 
13310 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13311 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13312      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13313     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13314                                                 depth, string, lst, 0 );
13315 }
13316 #endif /* LIBXML_SAX1_ENABLED */
13317 
13318 /**
13319  * xmlParseBalancedChunkMemoryInternal:
13320  * @oldctxt:  the existing parsing context
13321  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13322  * @user_data:  the user data field for the parser context
13323  * @lst:  the return value for the set of parsed nodes
13324  *
13325  *
13326  * Parse a well-balanced chunk of an XML document
13327  * called by the parser
13328  * The allowed sequence for the Well Balanced Chunk is the one defined by
13329  * the content production in the XML grammar:
13330  *
13331  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13332  *
13333  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13334  * error code otherwise
13335  *
13336  * In case recover is set to 1, the nodelist will not be empty even if
13337  * the parsed chunk is not well balanced.
13338  */
13339 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13340 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13341 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13342     xmlParserCtxtPtr ctxt;
13343     xmlDocPtr newDoc = NULL;
13344     xmlNodePtr newRoot;
13345     xmlSAXHandlerPtr oldsax = NULL;
13346     xmlNodePtr content = NULL;
13347     xmlNodePtr last = NULL;
13348     int size;
13349     xmlParserErrors ret = XML_ERR_OK;
13350 #ifdef SAX2
13351     int i;
13352 #endif
13353 
13354     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13355         (oldctxt->depth >  1024)) {
13356 	return(XML_ERR_ENTITY_LOOP);
13357     }
13358 
13359 
13360     if (lst != NULL)
13361         *lst = NULL;
13362     if (string == NULL)
13363         return(XML_ERR_INTERNAL_ERROR);
13364 
13365     size = xmlStrlen(string);
13366 
13367     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13368     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13369     if (user_data != NULL)
13370 	ctxt->userData = user_data;
13371     else
13372 	ctxt->userData = ctxt;
13373     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13374     ctxt->dict = oldctxt->dict;
13375     ctxt->input_id = oldctxt->input_id + 1;
13376     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13377     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13378     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13379 
13380 #ifdef SAX2
13381     /* propagate namespaces down the entity */
13382     for (i = 0;i < oldctxt->nsNr;i += 2) {
13383         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13384     }
13385 #endif
13386 
13387     oldsax = ctxt->sax;
13388     ctxt->sax = oldctxt->sax;
13389     xmlDetectSAX2(ctxt);
13390     ctxt->replaceEntities = oldctxt->replaceEntities;
13391     ctxt->options = oldctxt->options;
13392 
13393     ctxt->_private = oldctxt->_private;
13394     if (oldctxt->myDoc == NULL) {
13395 	newDoc = xmlNewDoc(BAD_CAST "1.0");
13396 	if (newDoc == NULL) {
13397 	    ctxt->sax = oldsax;
13398 	    ctxt->dict = NULL;
13399 	    xmlFreeParserCtxt(ctxt);
13400 	    return(XML_ERR_INTERNAL_ERROR);
13401 	}
13402 	newDoc->properties = XML_DOC_INTERNAL;
13403 	newDoc->dict = ctxt->dict;
13404 	xmlDictReference(newDoc->dict);
13405 	ctxt->myDoc = newDoc;
13406     } else {
13407 	ctxt->myDoc = oldctxt->myDoc;
13408         content = ctxt->myDoc->children;
13409 	last = ctxt->myDoc->last;
13410     }
13411     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13412     if (newRoot == NULL) {
13413 	ctxt->sax = oldsax;
13414 	ctxt->dict = NULL;
13415 	xmlFreeParserCtxt(ctxt);
13416 	if (newDoc != NULL) {
13417 	    xmlFreeDoc(newDoc);
13418 	}
13419 	return(XML_ERR_INTERNAL_ERROR);
13420     }
13421     ctxt->myDoc->children = NULL;
13422     ctxt->myDoc->last = NULL;
13423     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13424     nodePush(ctxt, ctxt->myDoc->children);
13425     ctxt->instate = XML_PARSER_CONTENT;
13426     ctxt->depth = oldctxt->depth + 1;
13427 
13428     ctxt->validate = 0;
13429     ctxt->loadsubset = oldctxt->loadsubset;
13430     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13431 	/*
13432 	 * ID/IDREF registration will be done in xmlValidateElement below
13433 	 */
13434 	ctxt->loadsubset |= XML_SKIP_IDS;
13435     }
13436     ctxt->dictNames = oldctxt->dictNames;
13437     ctxt->attsDefault = oldctxt->attsDefault;
13438     ctxt->attsSpecial = oldctxt->attsSpecial;
13439 
13440     xmlParseContent(ctxt);
13441     if ((RAW == '<') && (NXT(1) == '/')) {
13442 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13443     } else if (RAW != 0) {
13444 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13445     }
13446     if (ctxt->node != ctxt->myDoc->children) {
13447 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13448     }
13449 
13450     if (!ctxt->wellFormed) {
13451         if (ctxt->errNo == 0)
13452 	    ret = XML_ERR_INTERNAL_ERROR;
13453 	else
13454 	    ret = (xmlParserErrors)ctxt->errNo;
13455     } else {
13456       ret = XML_ERR_OK;
13457     }
13458 
13459     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13460 	xmlNodePtr cur;
13461 
13462 	/*
13463 	 * Return the newly created nodeset after unlinking it from
13464 	 * they pseudo parent.
13465 	 */
13466 	cur = ctxt->myDoc->children->children;
13467 	*lst = cur;
13468 	while (cur != NULL) {
13469 #ifdef LIBXML_VALID_ENABLED
13470 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13471 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13472 		(cur->type == XML_ELEMENT_NODE)) {
13473 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13474 			oldctxt->myDoc, cur);
13475 	    }
13476 #endif /* LIBXML_VALID_ENABLED */
13477 	    cur->parent = NULL;
13478 	    cur = cur->next;
13479 	}
13480 	ctxt->myDoc->children->children = NULL;
13481     }
13482     if (ctxt->myDoc != NULL) {
13483 	xmlFreeNode(ctxt->myDoc->children);
13484         ctxt->myDoc->children = content;
13485         ctxt->myDoc->last = last;
13486     }
13487 
13488     /*
13489      * Record in the parent context the number of entities replacement
13490      * done when parsing that reference.
13491      */
13492     if (oldctxt != NULL)
13493         oldctxt->nbentities += ctxt->nbentities;
13494 
13495     /*
13496      * Also record the last error if any
13497      */
13498     if (ctxt->lastError.code != XML_ERR_OK)
13499         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13500 
13501     ctxt->sax = oldsax;
13502     ctxt->dict = NULL;
13503     ctxt->attsDefault = NULL;
13504     ctxt->attsSpecial = NULL;
13505     xmlFreeParserCtxt(ctxt);
13506     if (newDoc != NULL) {
13507 	xmlFreeDoc(newDoc);
13508     }
13509 
13510     return(ret);
13511 }
13512 
13513 /**
13514  * xmlParseInNodeContext:
13515  * @node:  the context node
13516  * @data:  the input string
13517  * @datalen:  the input string length in bytes
13518  * @options:  a combination of xmlParserOption
13519  * @lst:  the return value for the set of parsed nodes
13520  *
13521  * Parse a well-balanced chunk of an XML document
13522  * within the context (DTD, namespaces, etc ...) of the given node.
13523  *
13524  * The allowed sequence for the data is a Well Balanced Chunk defined by
13525  * the content production in the XML grammar:
13526  *
13527  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13528  *
13529  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13530  * error code otherwise
13531  */
13532 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13533 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13534                       int options, xmlNodePtr *lst) {
13535 #ifdef SAX2
13536     xmlParserCtxtPtr ctxt;
13537     xmlDocPtr doc = NULL;
13538     xmlNodePtr fake, cur;
13539     int nsnr = 0;
13540 
13541     xmlParserErrors ret = XML_ERR_OK;
13542 
13543     /*
13544      * check all input parameters, grab the document
13545      */
13546     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13547         return(XML_ERR_INTERNAL_ERROR);
13548     switch (node->type) {
13549         case XML_ELEMENT_NODE:
13550         case XML_ATTRIBUTE_NODE:
13551         case XML_TEXT_NODE:
13552         case XML_CDATA_SECTION_NODE:
13553         case XML_ENTITY_REF_NODE:
13554         case XML_PI_NODE:
13555         case XML_COMMENT_NODE:
13556         case XML_DOCUMENT_NODE:
13557         case XML_HTML_DOCUMENT_NODE:
13558 	    break;
13559 	default:
13560 	    return(XML_ERR_INTERNAL_ERROR);
13561 
13562     }
13563     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13564            (node->type != XML_DOCUMENT_NODE) &&
13565 	   (node->type != XML_HTML_DOCUMENT_NODE))
13566 	node = node->parent;
13567     if (node == NULL)
13568 	return(XML_ERR_INTERNAL_ERROR);
13569     if (node->type == XML_ELEMENT_NODE)
13570 	doc = node->doc;
13571     else
13572         doc = (xmlDocPtr) node;
13573     if (doc == NULL)
13574 	return(XML_ERR_INTERNAL_ERROR);
13575 
13576     /*
13577      * allocate a context and set-up everything not related to the
13578      * node position in the tree
13579      */
13580     if (doc->type == XML_DOCUMENT_NODE)
13581 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13582 #ifdef LIBXML_HTML_ENABLED
13583     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13584 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13585         /*
13586          * When parsing in context, it makes no sense to add implied
13587          * elements like html/body/etc...
13588          */
13589         options |= HTML_PARSE_NOIMPLIED;
13590     }
13591 #endif
13592     else
13593         return(XML_ERR_INTERNAL_ERROR);
13594 
13595     if (ctxt == NULL)
13596         return(XML_ERR_NO_MEMORY);
13597 
13598     /*
13599      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13600      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13601      * we must wait until the last moment to free the original one.
13602      */
13603     if (doc->dict != NULL) {
13604         if (ctxt->dict != NULL)
13605 	    xmlDictFree(ctxt->dict);
13606 	ctxt->dict = doc->dict;
13607     } else
13608         options |= XML_PARSE_NODICT;
13609 
13610     if (doc->encoding != NULL) {
13611         xmlCharEncodingHandlerPtr hdlr;
13612 
13613         if (ctxt->encoding != NULL)
13614 	    xmlFree((xmlChar *) ctxt->encoding);
13615         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13616 
13617         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13618         if (hdlr != NULL) {
13619             xmlSwitchToEncoding(ctxt, hdlr);
13620 	} else {
13621             return(XML_ERR_UNSUPPORTED_ENCODING);
13622         }
13623     }
13624 
13625     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13626     xmlDetectSAX2(ctxt);
13627     ctxt->myDoc = doc;
13628     /* parsing in context, i.e. as within existing content */
13629     ctxt->input_id = 2;
13630     ctxt->instate = XML_PARSER_CONTENT;
13631 
13632     fake = xmlNewComment(NULL);
13633     if (fake == NULL) {
13634         xmlFreeParserCtxt(ctxt);
13635 	return(XML_ERR_NO_MEMORY);
13636     }
13637     xmlAddChild(node, fake);
13638 
13639     if (node->type == XML_ELEMENT_NODE) {
13640 	nodePush(ctxt, node);
13641 	/*
13642 	 * initialize the SAX2 namespaces stack
13643 	 */
13644 	cur = node;
13645 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13646 	    xmlNsPtr ns = cur->nsDef;
13647 	    const xmlChar *iprefix, *ihref;
13648 
13649 	    while (ns != NULL) {
13650 		if (ctxt->dict) {
13651 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13652 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13653 		} else {
13654 		    iprefix = ns->prefix;
13655 		    ihref = ns->href;
13656 		}
13657 
13658 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13659 		    nsPush(ctxt, iprefix, ihref);
13660 		    nsnr++;
13661 		}
13662 		ns = ns->next;
13663 	    }
13664 	    cur = cur->parent;
13665 	}
13666     }
13667 
13668     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13669 	/*
13670 	 * ID/IDREF registration will be done in xmlValidateElement below
13671 	 */
13672 	ctxt->loadsubset |= XML_SKIP_IDS;
13673     }
13674 
13675 #ifdef LIBXML_HTML_ENABLED
13676     if (doc->type == XML_HTML_DOCUMENT_NODE)
13677         __htmlParseContent(ctxt);
13678     else
13679 #endif
13680 	xmlParseContent(ctxt);
13681 
13682     nsPop(ctxt, nsnr);
13683     if ((RAW == '<') && (NXT(1) == '/')) {
13684 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13685     } else if (RAW != 0) {
13686 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13687     }
13688     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13689 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13690 	ctxt->wellFormed = 0;
13691     }
13692 
13693     if (!ctxt->wellFormed) {
13694         if (ctxt->errNo == 0)
13695 	    ret = XML_ERR_INTERNAL_ERROR;
13696 	else
13697 	    ret = (xmlParserErrors)ctxt->errNo;
13698     } else {
13699         ret = XML_ERR_OK;
13700     }
13701 
13702     /*
13703      * Return the newly created nodeset after unlinking it from
13704      * the pseudo sibling.
13705      */
13706 
13707     cur = fake->next;
13708     fake->next = NULL;
13709     node->last = fake;
13710 
13711     if (cur != NULL) {
13712 	cur->prev = NULL;
13713     }
13714 
13715     *lst = cur;
13716 
13717     while (cur != NULL) {
13718 	cur->parent = NULL;
13719 	cur = cur->next;
13720     }
13721 
13722     xmlUnlinkNode(fake);
13723     xmlFreeNode(fake);
13724 
13725 
13726     if (ret != XML_ERR_OK) {
13727         xmlFreeNodeList(*lst);
13728 	*lst = NULL;
13729     }
13730 
13731     if (doc->dict != NULL)
13732         ctxt->dict = NULL;
13733     xmlFreeParserCtxt(ctxt);
13734 
13735     return(ret);
13736 #else /* !SAX2 */
13737     return(XML_ERR_INTERNAL_ERROR);
13738 #endif
13739 }
13740 
13741 #ifdef LIBXML_SAX1_ENABLED
13742 /**
13743  * xmlParseBalancedChunkMemoryRecover:
13744  * @doc:  the document the chunk pertains to
13745  * @sax:  the SAX handler bloc (possibly NULL)
13746  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13747  * @depth:  Used for loop detection, use 0
13748  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13749  * @lst:  the return value for the set of parsed nodes
13750  * @recover: return nodes even if the data is broken (use 0)
13751  *
13752  *
13753  * Parse a well-balanced chunk of an XML document
13754  * called by the parser
13755  * The allowed sequence for the Well Balanced Chunk is the one defined by
13756  * the content production in the XML grammar:
13757  *
13758  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13759  *
13760  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13761  *    the parser error code otherwise
13762  *
13763  * In case recover is set to 1, the nodelist will not be empty even if
13764  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13765  * some extent.
13766  */
13767 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13768 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13769      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13770      int recover) {
13771     xmlParserCtxtPtr ctxt;
13772     xmlDocPtr newDoc;
13773     xmlSAXHandlerPtr oldsax = NULL;
13774     xmlNodePtr content, newRoot;
13775     int size;
13776     int ret = 0;
13777 
13778     if (depth > 40) {
13779 	return(XML_ERR_ENTITY_LOOP);
13780     }
13781 
13782 
13783     if (lst != NULL)
13784         *lst = NULL;
13785     if (string == NULL)
13786         return(-1);
13787 
13788     size = xmlStrlen(string);
13789 
13790     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13791     if (ctxt == NULL) return(-1);
13792     ctxt->userData = ctxt;
13793     if (sax != NULL) {
13794 	oldsax = ctxt->sax;
13795         ctxt->sax = sax;
13796 	if (user_data != NULL)
13797 	    ctxt->userData = user_data;
13798     }
13799     newDoc = xmlNewDoc(BAD_CAST "1.0");
13800     if (newDoc == NULL) {
13801 	xmlFreeParserCtxt(ctxt);
13802 	return(-1);
13803     }
13804     newDoc->properties = XML_DOC_INTERNAL;
13805     if ((doc != NULL) && (doc->dict != NULL)) {
13806         xmlDictFree(ctxt->dict);
13807 	ctxt->dict = doc->dict;
13808 	xmlDictReference(ctxt->dict);
13809 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13810 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13811 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13812 	ctxt->dictNames = 1;
13813     } else {
13814 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13815     }
13816     if (doc != NULL) {
13817 	newDoc->intSubset = doc->intSubset;
13818 	newDoc->extSubset = doc->extSubset;
13819     }
13820     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13821     if (newRoot == NULL) {
13822 	if (sax != NULL)
13823 	    ctxt->sax = oldsax;
13824 	xmlFreeParserCtxt(ctxt);
13825 	newDoc->intSubset = NULL;
13826 	newDoc->extSubset = NULL;
13827         xmlFreeDoc(newDoc);
13828 	return(-1);
13829     }
13830     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13831     nodePush(ctxt, newRoot);
13832     if (doc == NULL) {
13833 	ctxt->myDoc = newDoc;
13834     } else {
13835 	ctxt->myDoc = newDoc;
13836 	newDoc->children->doc = doc;
13837 	/* Ensure that doc has XML spec namespace */
13838 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13839 	newDoc->oldNs = doc->oldNs;
13840     }
13841     ctxt->instate = XML_PARSER_CONTENT;
13842     ctxt->input_id = 2;
13843     ctxt->depth = depth;
13844 
13845     /*
13846      * Doing validity checking on chunk doesn't make sense
13847      */
13848     ctxt->validate = 0;
13849     ctxt->loadsubset = 0;
13850     xmlDetectSAX2(ctxt);
13851 
13852     if ( doc != NULL ){
13853         content = doc->children;
13854         doc->children = NULL;
13855         xmlParseContent(ctxt);
13856         doc->children = content;
13857     }
13858     else {
13859         xmlParseContent(ctxt);
13860     }
13861     if ((RAW == '<') && (NXT(1) == '/')) {
13862 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13863     } else if (RAW != 0) {
13864 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13865     }
13866     if (ctxt->node != newDoc->children) {
13867 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13868     }
13869 
13870     if (!ctxt->wellFormed) {
13871         if (ctxt->errNo == 0)
13872 	    ret = 1;
13873 	else
13874 	    ret = ctxt->errNo;
13875     } else {
13876       ret = 0;
13877     }
13878 
13879     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13880 	xmlNodePtr cur;
13881 
13882 	/*
13883 	 * Return the newly created nodeset after unlinking it from
13884 	 * they pseudo parent.
13885 	 */
13886 	cur = newDoc->children->children;
13887 	*lst = cur;
13888 	while (cur != NULL) {
13889 	    xmlSetTreeDoc(cur, doc);
13890 	    cur->parent = NULL;
13891 	    cur = cur->next;
13892 	}
13893 	newDoc->children->children = NULL;
13894     }
13895 
13896     if (sax != NULL)
13897 	ctxt->sax = oldsax;
13898     xmlFreeParserCtxt(ctxt);
13899     newDoc->intSubset = NULL;
13900     newDoc->extSubset = NULL;
13901     newDoc->oldNs = NULL;
13902     xmlFreeDoc(newDoc);
13903 
13904     return(ret);
13905 }
13906 
13907 /**
13908  * xmlSAXParseEntity:
13909  * @sax:  the SAX handler block
13910  * @filename:  the filename
13911  *
13912  * parse an XML external entity out of context and build a tree.
13913  * It use the given SAX function block to handle the parsing callback.
13914  * If sax is NULL, fallback to the default DOM tree building routines.
13915  *
13916  * [78] extParsedEnt ::= TextDecl? content
13917  *
13918  * This correspond to a "Well Balanced" chunk
13919  *
13920  * Returns the resulting document tree
13921  */
13922 
13923 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13924 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13925     xmlDocPtr ret;
13926     xmlParserCtxtPtr ctxt;
13927 
13928     ctxt = xmlCreateFileParserCtxt(filename);
13929     if (ctxt == NULL) {
13930 	return(NULL);
13931     }
13932     if (sax != NULL) {
13933 	if (ctxt->sax != NULL)
13934 	    xmlFree(ctxt->sax);
13935         ctxt->sax = sax;
13936         ctxt->userData = NULL;
13937     }
13938 
13939     xmlParseExtParsedEnt(ctxt);
13940 
13941     if (ctxt->wellFormed)
13942 	ret = ctxt->myDoc;
13943     else {
13944         ret = NULL;
13945         xmlFreeDoc(ctxt->myDoc);
13946         ctxt->myDoc = NULL;
13947     }
13948     if (sax != NULL)
13949         ctxt->sax = NULL;
13950     xmlFreeParserCtxt(ctxt);
13951 
13952     return(ret);
13953 }
13954 
13955 /**
13956  * xmlParseEntity:
13957  * @filename:  the filename
13958  *
13959  * parse an XML external entity out of context and build a tree.
13960  *
13961  * [78] extParsedEnt ::= TextDecl? content
13962  *
13963  * This correspond to a "Well Balanced" chunk
13964  *
13965  * Returns the resulting document tree
13966  */
13967 
13968 xmlDocPtr
xmlParseEntity(const char * filename)13969 xmlParseEntity(const char *filename) {
13970     return(xmlSAXParseEntity(NULL, filename));
13971 }
13972 #endif /* LIBXML_SAX1_ENABLED */
13973 
13974 /**
13975  * xmlCreateEntityParserCtxtInternal:
13976  * @URL:  the entity URL
13977  * @ID:  the entity PUBLIC ID
13978  * @base:  a possible base for the target URI
13979  * @pctx:  parser context used to set options on new context
13980  *
13981  * Create a parser context for an external entity
13982  * Automatic support for ZLIB/Compress compressed document is provided
13983  * by default if found at compile-time.
13984  *
13985  * Returns the new parser context or NULL
13986  */
13987 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13988 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13989 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
13990     xmlParserCtxtPtr ctxt;
13991     xmlParserInputPtr inputStream;
13992     char *directory = NULL;
13993     xmlChar *uri;
13994 
13995     ctxt = xmlNewParserCtxt();
13996     if (ctxt == NULL) {
13997 	return(NULL);
13998     }
13999 
14000     if (pctx != NULL) {
14001         ctxt->options = pctx->options;
14002         ctxt->_private = pctx->_private;
14003 	/*
14004 	 * this is a subparser of pctx, so the input_id should be
14005 	 * incremented to distinguish from main entity
14006 	 */
14007 	ctxt->input_id = pctx->input_id + 1;
14008     }
14009 
14010     uri = xmlBuildURI(URL, base);
14011 
14012     if (uri == NULL) {
14013 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14014 	if (inputStream == NULL) {
14015 	    xmlFreeParserCtxt(ctxt);
14016 	    return(NULL);
14017 	}
14018 
14019 	inputPush(ctxt, inputStream);
14020 
14021 	if ((ctxt->directory == NULL) && (directory == NULL))
14022 	    directory = xmlParserGetDirectory((char *)URL);
14023 	if ((ctxt->directory == NULL) && (directory != NULL))
14024 	    ctxt->directory = directory;
14025     } else {
14026 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14027 	if (inputStream == NULL) {
14028 	    xmlFree(uri);
14029 	    xmlFreeParserCtxt(ctxt);
14030 	    return(NULL);
14031 	}
14032 
14033 	inputPush(ctxt, inputStream);
14034 
14035 	if ((ctxt->directory == NULL) && (directory == NULL))
14036 	    directory = xmlParserGetDirectory((char *)uri);
14037 	if ((ctxt->directory == NULL) && (directory != NULL))
14038 	    ctxt->directory = directory;
14039 	xmlFree(uri);
14040     }
14041     return(ctxt);
14042 }
14043 
14044 /**
14045  * xmlCreateEntityParserCtxt:
14046  * @URL:  the entity URL
14047  * @ID:  the entity PUBLIC ID
14048  * @base:  a possible base for the target URI
14049  *
14050  * Create a parser context for an external entity
14051  * Automatic support for ZLIB/Compress compressed document is provided
14052  * by default if found at compile-time.
14053  *
14054  * Returns the new parser context or NULL
14055  */
14056 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14057 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14058 	                  const xmlChar *base) {
14059     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14060 
14061 }
14062 
14063 /************************************************************************
14064  *									*
14065  *		Front ends when parsing from a file			*
14066  *									*
14067  ************************************************************************/
14068 
14069 /**
14070  * xmlCreateURLParserCtxt:
14071  * @filename:  the filename or URL
14072  * @options:  a combination of xmlParserOption
14073  *
14074  * Create a parser context for a file or URL content.
14075  * Automatic support for ZLIB/Compress compressed document is provided
14076  * by default if found at compile-time and for file accesses
14077  *
14078  * Returns the new parser context or NULL
14079  */
14080 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14081 xmlCreateURLParserCtxt(const char *filename, int options)
14082 {
14083     xmlParserCtxtPtr ctxt;
14084     xmlParserInputPtr inputStream;
14085     char *directory = NULL;
14086 
14087     ctxt = xmlNewParserCtxt();
14088     if (ctxt == NULL) {
14089 	xmlErrMemory(NULL, "cannot allocate parser context");
14090 	return(NULL);
14091     }
14092 
14093     if (options)
14094 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14095     ctxt->linenumbers = 1;
14096 
14097     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14098     if (inputStream == NULL) {
14099 	xmlFreeParserCtxt(ctxt);
14100 	return(NULL);
14101     }
14102 
14103     inputPush(ctxt, inputStream);
14104     if ((ctxt->directory == NULL) && (directory == NULL))
14105         directory = xmlParserGetDirectory(filename);
14106     if ((ctxt->directory == NULL) && (directory != NULL))
14107         ctxt->directory = directory;
14108 
14109     return(ctxt);
14110 }
14111 
14112 /**
14113  * xmlCreateFileParserCtxt:
14114  * @filename:  the filename
14115  *
14116  * Create a parser context for a file content.
14117  * Automatic support for ZLIB/Compress compressed document is provided
14118  * by default if found at compile-time.
14119  *
14120  * Returns the new parser context or NULL
14121  */
14122 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14123 xmlCreateFileParserCtxt(const char *filename)
14124 {
14125     return(xmlCreateURLParserCtxt(filename, 0));
14126 }
14127 
14128 #ifdef LIBXML_SAX1_ENABLED
14129 /**
14130  * xmlSAXParseFileWithData:
14131  * @sax:  the SAX handler block
14132  * @filename:  the filename
14133  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14134  *             documents
14135  * @data:  the userdata
14136  *
14137  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14138  * compressed document is provided by default if found at compile-time.
14139  * It use the given SAX function block to handle the parsing callback.
14140  * If sax is NULL, fallback to the default DOM tree building routines.
14141  *
14142  * User data (void *) is stored within the parser context in the
14143  * context's _private member, so it is available nearly everywhere in libxml
14144  *
14145  * Returns the resulting document tree
14146  */
14147 
14148 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14149 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14150                         int recovery, void *data) {
14151     xmlDocPtr ret;
14152     xmlParserCtxtPtr ctxt;
14153 
14154     xmlInitParser();
14155 
14156     ctxt = xmlCreateFileParserCtxt(filename);
14157     if (ctxt == NULL) {
14158 	return(NULL);
14159     }
14160     if (sax != NULL) {
14161 	if (ctxt->sax != NULL)
14162 	    xmlFree(ctxt->sax);
14163         ctxt->sax = sax;
14164     }
14165     xmlDetectSAX2(ctxt);
14166     if (data!=NULL) {
14167 	ctxt->_private = data;
14168     }
14169 
14170     if (ctxt->directory == NULL)
14171         ctxt->directory = xmlParserGetDirectory(filename);
14172 
14173     ctxt->recovery = recovery;
14174 
14175     xmlParseDocument(ctxt);
14176 
14177     if ((ctxt->wellFormed) || recovery) {
14178         ret = ctxt->myDoc;
14179 	if (ret != NULL) {
14180 	    if (ctxt->input->buf->compressed > 0)
14181 		ret->compression = 9;
14182 	    else
14183 		ret->compression = ctxt->input->buf->compressed;
14184 	}
14185     }
14186     else {
14187        ret = NULL;
14188        xmlFreeDoc(ctxt->myDoc);
14189        ctxt->myDoc = NULL;
14190     }
14191     if (sax != NULL)
14192         ctxt->sax = NULL;
14193     xmlFreeParserCtxt(ctxt);
14194 
14195     return(ret);
14196 }
14197 
14198 /**
14199  * xmlSAXParseFile:
14200  * @sax:  the SAX handler block
14201  * @filename:  the filename
14202  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14203  *             documents
14204  *
14205  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14206  * compressed document is provided by default if found at compile-time.
14207  * It use the given SAX function block to handle the parsing callback.
14208  * If sax is NULL, fallback to the default DOM tree building routines.
14209  *
14210  * Returns the resulting document tree
14211  */
14212 
14213 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14214 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14215                           int recovery) {
14216     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14217 }
14218 
14219 /**
14220  * xmlRecoverDoc:
14221  * @cur:  a pointer to an array of xmlChar
14222  *
14223  * parse an XML in-memory document and build a tree.
14224  * In the case the document is not Well Formed, a attempt to build a
14225  * tree is tried anyway
14226  *
14227  * Returns the resulting document tree or NULL in case of failure
14228  */
14229 
14230 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14231 xmlRecoverDoc(const xmlChar *cur) {
14232     return(xmlSAXParseDoc(NULL, cur, 1));
14233 }
14234 
14235 /**
14236  * xmlParseFile:
14237  * @filename:  the filename
14238  *
14239  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14240  * compressed document is provided by default if found at compile-time.
14241  *
14242  * Returns the resulting document tree if the file was wellformed,
14243  * NULL otherwise.
14244  */
14245 
14246 xmlDocPtr
xmlParseFile(const char * filename)14247 xmlParseFile(const char *filename) {
14248     return(xmlSAXParseFile(NULL, filename, 0));
14249 }
14250 
14251 /**
14252  * xmlRecoverFile:
14253  * @filename:  the filename
14254  *
14255  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14256  * compressed document is provided by default if found at compile-time.
14257  * In the case the document is not Well Formed, it attempts to build
14258  * a tree anyway
14259  *
14260  * Returns the resulting document tree or NULL in case of failure
14261  */
14262 
14263 xmlDocPtr
xmlRecoverFile(const char * filename)14264 xmlRecoverFile(const char *filename) {
14265     return(xmlSAXParseFile(NULL, filename, 1));
14266 }
14267 
14268 
14269 /**
14270  * xmlSetupParserForBuffer:
14271  * @ctxt:  an XML parser context
14272  * @buffer:  a xmlChar * buffer
14273  * @filename:  a file name
14274  *
14275  * Setup the parser context to parse a new buffer; Clears any prior
14276  * contents from the parser context. The buffer parameter must not be
14277  * NULL, but the filename parameter can be
14278  */
14279 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14280 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14281                              const char* filename)
14282 {
14283     xmlParserInputPtr input;
14284 
14285     if ((ctxt == NULL) || (buffer == NULL))
14286         return;
14287 
14288     input = xmlNewInputStream(ctxt);
14289     if (input == NULL) {
14290         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14291         xmlClearParserCtxt(ctxt);
14292         return;
14293     }
14294 
14295     xmlClearParserCtxt(ctxt);
14296     if (filename != NULL)
14297         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14298     input->base = buffer;
14299     input->cur = buffer;
14300     input->end = &buffer[xmlStrlen(buffer)];
14301     inputPush(ctxt, input);
14302 }
14303 
14304 /**
14305  * xmlSAXUserParseFile:
14306  * @sax:  a SAX handler
14307  * @user_data:  The user data returned on SAX callbacks
14308  * @filename:  a file name
14309  *
14310  * parse an XML file and call the given SAX handler routines.
14311  * Automatic support for ZLIB/Compress compressed document is provided
14312  *
14313  * Returns 0 in case of success or a error number otherwise
14314  */
14315 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14316 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14317                     const char *filename) {
14318     int ret = 0;
14319     xmlParserCtxtPtr ctxt;
14320 
14321     ctxt = xmlCreateFileParserCtxt(filename);
14322     if (ctxt == NULL) return -1;
14323     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14324 	xmlFree(ctxt->sax);
14325     ctxt->sax = sax;
14326     xmlDetectSAX2(ctxt);
14327 
14328     if (user_data != NULL)
14329 	ctxt->userData = user_data;
14330 
14331     xmlParseDocument(ctxt);
14332 
14333     if (ctxt->wellFormed)
14334 	ret = 0;
14335     else {
14336         if (ctxt->errNo != 0)
14337 	    ret = ctxt->errNo;
14338 	else
14339 	    ret = -1;
14340     }
14341     if (sax != NULL)
14342 	ctxt->sax = NULL;
14343     if (ctxt->myDoc != NULL) {
14344         xmlFreeDoc(ctxt->myDoc);
14345 	ctxt->myDoc = NULL;
14346     }
14347     xmlFreeParserCtxt(ctxt);
14348 
14349     return ret;
14350 }
14351 #endif /* LIBXML_SAX1_ENABLED */
14352 
14353 /************************************************************************
14354  *									*
14355  *		Front ends when parsing from memory			*
14356  *									*
14357  ************************************************************************/
14358 
14359 /**
14360  * xmlCreateMemoryParserCtxt:
14361  * @buffer:  a pointer to a char array
14362  * @size:  the size of the array
14363  *
14364  * Create a parser context for an XML in-memory document.
14365  *
14366  * Returns the new parser context or NULL
14367  */
14368 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14369 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14370     xmlParserCtxtPtr ctxt;
14371     xmlParserInputPtr input;
14372     xmlParserInputBufferPtr buf;
14373 
14374     if (buffer == NULL)
14375 	return(NULL);
14376     if (size <= 0)
14377 	return(NULL);
14378 
14379     ctxt = xmlNewParserCtxt();
14380     if (ctxt == NULL)
14381 	return(NULL);
14382 
14383     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14384     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14385     if (buf == NULL) {
14386 	xmlFreeParserCtxt(ctxt);
14387 	return(NULL);
14388     }
14389 
14390     input = xmlNewInputStream(ctxt);
14391     if (input == NULL) {
14392 	xmlFreeParserInputBuffer(buf);
14393 	xmlFreeParserCtxt(ctxt);
14394 	return(NULL);
14395     }
14396 
14397     input->filename = NULL;
14398     input->buf = buf;
14399     xmlBufResetInput(input->buf->buffer, input);
14400 
14401     inputPush(ctxt, input);
14402     return(ctxt);
14403 }
14404 
14405 #ifdef LIBXML_SAX1_ENABLED
14406 /**
14407  * xmlSAXParseMemoryWithData:
14408  * @sax:  the SAX handler block
14409  * @buffer:  an pointer to a char array
14410  * @size:  the size of the array
14411  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14412  *             documents
14413  * @data:  the userdata
14414  *
14415  * parse an XML in-memory block and use the given SAX function block
14416  * to handle the parsing callback. If sax is NULL, fallback to the default
14417  * DOM tree building routines.
14418  *
14419  * User data (void *) is stored within the parser context in the
14420  * context's _private member, so it is available nearly everywhere in libxml
14421  *
14422  * Returns the resulting document tree
14423  */
14424 
14425 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14426 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14427 	          int size, int recovery, void *data) {
14428     xmlDocPtr ret;
14429     xmlParserCtxtPtr ctxt;
14430 
14431     xmlInitParser();
14432 
14433     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14434     if (ctxt == NULL) return(NULL);
14435     if (sax != NULL) {
14436 	if (ctxt->sax != NULL)
14437 	    xmlFree(ctxt->sax);
14438         ctxt->sax = sax;
14439     }
14440     xmlDetectSAX2(ctxt);
14441     if (data!=NULL) {
14442 	ctxt->_private=data;
14443     }
14444 
14445     ctxt->recovery = recovery;
14446 
14447     xmlParseDocument(ctxt);
14448 
14449     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14450     else {
14451        ret = NULL;
14452        xmlFreeDoc(ctxt->myDoc);
14453        ctxt->myDoc = NULL;
14454     }
14455     if (sax != NULL)
14456 	ctxt->sax = NULL;
14457     xmlFreeParserCtxt(ctxt);
14458 
14459     return(ret);
14460 }
14461 
14462 /**
14463  * xmlSAXParseMemory:
14464  * @sax:  the SAX handler block
14465  * @buffer:  an pointer to a char array
14466  * @size:  the size of the array
14467  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14468  *             documents
14469  *
14470  * parse an XML in-memory block and use the given SAX function block
14471  * to handle the parsing callback. If sax is NULL, fallback to the default
14472  * DOM tree building routines.
14473  *
14474  * Returns the resulting document tree
14475  */
14476 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14477 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14478 	          int size, int recovery) {
14479     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14480 }
14481 
14482 /**
14483  * xmlParseMemory:
14484  * @buffer:  an pointer to a char array
14485  * @size:  the size of the array
14486  *
14487  * parse an XML in-memory block and build a tree.
14488  *
14489  * Returns the resulting document tree
14490  */
14491 
xmlParseMemory(const char * buffer,int size)14492 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14493    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14494 }
14495 
14496 /**
14497  * xmlRecoverMemory:
14498  * @buffer:  an pointer to a char array
14499  * @size:  the size of the array
14500  *
14501  * parse an XML in-memory block and build a tree.
14502  * In the case the document is not Well Formed, an attempt to
14503  * build a tree is tried anyway
14504  *
14505  * Returns the resulting document tree or NULL in case of error
14506  */
14507 
xmlRecoverMemory(const char * buffer,int size)14508 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14509    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14510 }
14511 
14512 /**
14513  * xmlSAXUserParseMemory:
14514  * @sax:  a SAX handler
14515  * @user_data:  The user data returned on SAX callbacks
14516  * @buffer:  an in-memory XML document input
14517  * @size:  the length of the XML document in bytes
14518  *
14519  * A better SAX parsing routine.
14520  * parse an XML in-memory buffer and call the given SAX handler routines.
14521  *
14522  * Returns 0 in case of success or a error number otherwise
14523  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14524 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14525 			  const char *buffer, int size) {
14526     int ret = 0;
14527     xmlParserCtxtPtr ctxt;
14528 
14529     xmlInitParser();
14530 
14531     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14532     if (ctxt == NULL) return -1;
14533     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14534         xmlFree(ctxt->sax);
14535     ctxt->sax = sax;
14536     xmlDetectSAX2(ctxt);
14537 
14538     if (user_data != NULL)
14539 	ctxt->userData = user_data;
14540 
14541     xmlParseDocument(ctxt);
14542 
14543     if (ctxt->wellFormed)
14544 	ret = 0;
14545     else {
14546         if (ctxt->errNo != 0)
14547 	    ret = ctxt->errNo;
14548 	else
14549 	    ret = -1;
14550     }
14551     if (sax != NULL)
14552         ctxt->sax = NULL;
14553     if (ctxt->myDoc != NULL) {
14554         xmlFreeDoc(ctxt->myDoc);
14555 	ctxt->myDoc = NULL;
14556     }
14557     xmlFreeParserCtxt(ctxt);
14558 
14559     return ret;
14560 }
14561 #endif /* LIBXML_SAX1_ENABLED */
14562 
14563 /**
14564  * xmlCreateDocParserCtxt:
14565  * @cur:  a pointer to an array of xmlChar
14566  *
14567  * Creates a parser context for an XML in-memory document.
14568  *
14569  * Returns the new parser context or NULL
14570  */
14571 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14572 xmlCreateDocParserCtxt(const xmlChar *cur) {
14573     int len;
14574 
14575     if (cur == NULL)
14576 	return(NULL);
14577     len = xmlStrlen(cur);
14578     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14579 }
14580 
14581 #ifdef LIBXML_SAX1_ENABLED
14582 /**
14583  * xmlSAXParseDoc:
14584  * @sax:  the SAX handler block
14585  * @cur:  a pointer to an array of xmlChar
14586  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14587  *             documents
14588  *
14589  * parse an XML in-memory document and build a tree.
14590  * It use the given SAX function block to handle the parsing callback.
14591  * If sax is NULL, fallback to the default DOM tree building routines.
14592  *
14593  * Returns the resulting document tree
14594  */
14595 
14596 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14597 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14598     xmlDocPtr ret;
14599     xmlParserCtxtPtr ctxt;
14600     xmlSAXHandlerPtr oldsax = NULL;
14601 
14602     if (cur == NULL) return(NULL);
14603 
14604 
14605     ctxt = xmlCreateDocParserCtxt(cur);
14606     if (ctxt == NULL) return(NULL);
14607     if (sax != NULL) {
14608         oldsax = ctxt->sax;
14609         ctxt->sax = sax;
14610         ctxt->userData = NULL;
14611     }
14612     xmlDetectSAX2(ctxt);
14613 
14614     xmlParseDocument(ctxt);
14615     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14616     else {
14617        ret = NULL;
14618        xmlFreeDoc(ctxt->myDoc);
14619        ctxt->myDoc = NULL;
14620     }
14621     if (sax != NULL)
14622 	ctxt->sax = oldsax;
14623     xmlFreeParserCtxt(ctxt);
14624 
14625     return(ret);
14626 }
14627 
14628 /**
14629  * xmlParseDoc:
14630  * @cur:  a pointer to an array of xmlChar
14631  *
14632  * parse an XML in-memory document and build a tree.
14633  *
14634  * Returns the resulting document tree
14635  */
14636 
14637 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14638 xmlParseDoc(const xmlChar *cur) {
14639     return(xmlSAXParseDoc(NULL, cur, 0));
14640 }
14641 #endif /* LIBXML_SAX1_ENABLED */
14642 
14643 #ifdef LIBXML_LEGACY_ENABLED
14644 /************************************************************************
14645  *									*
14646  *	Specific function to keep track of entities references		*
14647  *	and used by the XSLT debugger					*
14648  *									*
14649  ************************************************************************/
14650 
14651 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14652 
14653 /**
14654  * xmlAddEntityReference:
14655  * @ent : A valid entity
14656  * @firstNode : A valid first node for children of entity
14657  * @lastNode : A valid last node of children entity
14658  *
14659  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14660  */
14661 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14662 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14663                       xmlNodePtr lastNode)
14664 {
14665     if (xmlEntityRefFunc != NULL) {
14666         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14667     }
14668 }
14669 
14670 
14671 /**
14672  * xmlSetEntityReferenceFunc:
14673  * @func: A valid function
14674  *
14675  * Set the function to call call back when a xml reference has been made
14676  */
14677 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14678 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14679 {
14680     xmlEntityRefFunc = func;
14681 }
14682 #endif /* LIBXML_LEGACY_ENABLED */
14683 
14684 /************************************************************************
14685  *									*
14686  *				Miscellaneous				*
14687  *									*
14688  ************************************************************************/
14689 
14690 #ifdef LIBXML_XPATH_ENABLED
14691 #include <libxml/xpath.h>
14692 #endif
14693 
14694 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14695 static int xmlParserInitialized = 0;
14696 
14697 /**
14698  * xmlInitParser:
14699  *
14700  * Initialization function for the XML parser.
14701  * This is not reentrant. Call once before processing in case of
14702  * use in multithreaded programs.
14703  */
14704 
14705 void
xmlInitParser(void)14706 xmlInitParser(void) {
14707     if (xmlParserInitialized != 0)
14708 	return;
14709 
14710 #ifdef LIBXML_THREAD_ENABLED
14711     __xmlGlobalInitMutexLock();
14712     if (xmlParserInitialized == 0) {
14713 #endif
14714 	xmlInitThreads();
14715 	xmlInitGlobals();
14716 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14717 	    (xmlGenericError == NULL))
14718 	    initGenericErrorDefaultFunc(NULL);
14719 	xmlInitMemory();
14720         xmlInitializeDict();
14721 	xmlInitCharEncodingHandlers();
14722 	xmlDefaultSAXHandlerInit();
14723 	xmlRegisterDefaultInputCallbacks();
14724 #ifdef LIBXML_OUTPUT_ENABLED
14725 	xmlRegisterDefaultOutputCallbacks();
14726 #endif /* LIBXML_OUTPUT_ENABLED */
14727 #ifdef LIBXML_HTML_ENABLED
14728 	htmlInitAutoClose();
14729 	htmlDefaultSAXHandlerInit();
14730 #endif
14731 #ifdef LIBXML_XPATH_ENABLED
14732 	xmlXPathInit();
14733 #endif
14734 	xmlParserInitialized = 1;
14735 #ifdef LIBXML_THREAD_ENABLED
14736     }
14737     __xmlGlobalInitMutexUnlock();
14738 #endif
14739 }
14740 
14741 /**
14742  * xmlCleanupParser:
14743  *
14744  * This function name is somewhat misleading. It does not clean up
14745  * parser state, it cleans up memory allocated by the library itself.
14746  * It is a cleanup function for the XML library. It tries to reclaim all
14747  * related global memory allocated for the library processing.
14748  * It doesn't deallocate any document related memory. One should
14749  * call xmlCleanupParser() only when the process has finished using
14750  * the library and all XML/HTML documents built with it.
14751  * See also xmlInitParser() which has the opposite function of preparing
14752  * the library for operations.
14753  *
14754  * WARNING: if your application is multithreaded or has plugin support
14755  *          calling this may crash the application if another thread or
14756  *          a plugin is still using libxml2. It's sometimes very hard to
14757  *          guess if libxml2 is in use in the application, some libraries
14758  *          or plugins may use it without notice. In case of doubt abstain
14759  *          from calling this function or do it just before calling exit()
14760  *          to avoid leak reports from valgrind !
14761  */
14762 
14763 void
xmlCleanupParser(void)14764 xmlCleanupParser(void) {
14765     if (!xmlParserInitialized)
14766 	return;
14767 
14768     xmlCleanupCharEncodingHandlers();
14769 #ifdef LIBXML_CATALOG_ENABLED
14770     xmlCatalogCleanup();
14771 #endif
14772     xmlDictCleanup();
14773     xmlCleanupInputCallbacks();
14774 #ifdef LIBXML_OUTPUT_ENABLED
14775     xmlCleanupOutputCallbacks();
14776 #endif
14777 #ifdef LIBXML_SCHEMAS_ENABLED
14778     xmlSchemaCleanupTypes();
14779     xmlRelaxNGCleanupTypes();
14780 #endif
14781     xmlResetLastError();
14782     xmlCleanupGlobals();
14783     xmlCleanupThreads(); /* must be last if called not from the main thread */
14784     xmlCleanupMemory();
14785     xmlParserInitialized = 0;
14786 }
14787 
14788 /************************************************************************
14789  *									*
14790  *	New set (2.6.0) of simpler and more flexible APIs		*
14791  *									*
14792  ************************************************************************/
14793 
14794 /**
14795  * DICT_FREE:
14796  * @str:  a string
14797  *
14798  * Free a string if it is not owned by the "dict" dictionary in the
14799  * current scope
14800  */
14801 #define DICT_FREE(str)						\
14802 	if ((str) && ((!dict) ||				\
14803 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14804 	    xmlFree((char *)(str));
14805 
14806 /**
14807  * xmlCtxtReset:
14808  * @ctxt: an XML parser context
14809  *
14810  * Reset a parser context
14811  */
14812 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14813 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14814 {
14815     xmlParserInputPtr input;
14816     xmlDictPtr dict;
14817 
14818     if (ctxt == NULL)
14819         return;
14820 
14821     dict = ctxt->dict;
14822 
14823     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14824         xmlFreeInputStream(input);
14825     }
14826     ctxt->inputNr = 0;
14827     ctxt->input = NULL;
14828 
14829     ctxt->spaceNr = 0;
14830     if (ctxt->spaceTab != NULL) {
14831 	ctxt->spaceTab[0] = -1;
14832 	ctxt->space = &ctxt->spaceTab[0];
14833     } else {
14834         ctxt->space = NULL;
14835     }
14836 
14837 
14838     ctxt->nodeNr = 0;
14839     ctxt->node = NULL;
14840 
14841     ctxt->nameNr = 0;
14842     ctxt->name = NULL;
14843 
14844     DICT_FREE(ctxt->version);
14845     ctxt->version = NULL;
14846     DICT_FREE(ctxt->encoding);
14847     ctxt->encoding = NULL;
14848     DICT_FREE(ctxt->directory);
14849     ctxt->directory = NULL;
14850     DICT_FREE(ctxt->extSubURI);
14851     ctxt->extSubURI = NULL;
14852     DICT_FREE(ctxt->extSubSystem);
14853     ctxt->extSubSystem = NULL;
14854     if (ctxt->myDoc != NULL)
14855         xmlFreeDoc(ctxt->myDoc);
14856     ctxt->myDoc = NULL;
14857 
14858     ctxt->standalone = -1;
14859     ctxt->hasExternalSubset = 0;
14860     ctxt->hasPErefs = 0;
14861     ctxt->html = 0;
14862     ctxt->external = 0;
14863     ctxt->instate = XML_PARSER_START;
14864     ctxt->token = 0;
14865 
14866     ctxt->wellFormed = 1;
14867     ctxt->nsWellFormed = 1;
14868     ctxt->disableSAX = 0;
14869     ctxt->valid = 1;
14870 #if 0
14871     ctxt->vctxt.userData = ctxt;
14872     ctxt->vctxt.error = xmlParserValidityError;
14873     ctxt->vctxt.warning = xmlParserValidityWarning;
14874 #endif
14875     ctxt->record_info = 0;
14876     ctxt->nbChars = 0;
14877     ctxt->checkIndex = 0;
14878     ctxt->inSubset = 0;
14879     ctxt->errNo = XML_ERR_OK;
14880     ctxt->depth = 0;
14881     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14882     ctxt->catalogs = NULL;
14883     ctxt->nbentities = 0;
14884     ctxt->sizeentities = 0;
14885     ctxt->sizeentcopy = 0;
14886     xmlInitNodeInfoSeq(&ctxt->node_seq);
14887 
14888     if (ctxt->attsDefault != NULL) {
14889         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14890         ctxt->attsDefault = NULL;
14891     }
14892     if (ctxt->attsSpecial != NULL) {
14893         xmlHashFree(ctxt->attsSpecial, NULL);
14894         ctxt->attsSpecial = NULL;
14895     }
14896 
14897 #ifdef LIBXML_CATALOG_ENABLED
14898     if (ctxt->catalogs != NULL)
14899 	xmlCatalogFreeLocal(ctxt->catalogs);
14900 #endif
14901     if (ctxt->lastError.code != XML_ERR_OK)
14902         xmlResetError(&ctxt->lastError);
14903 }
14904 
14905 /**
14906  * xmlCtxtResetPush:
14907  * @ctxt: an XML parser context
14908  * @chunk:  a pointer to an array of chars
14909  * @size:  number of chars in the array
14910  * @filename:  an optional file name or URI
14911  * @encoding:  the document encoding, or NULL
14912  *
14913  * Reset a push parser context
14914  *
14915  * Returns 0 in case of success and 1 in case of error
14916  */
14917 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14918 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14919                  int size, const char *filename, const char *encoding)
14920 {
14921     xmlParserInputPtr inputStream;
14922     xmlParserInputBufferPtr buf;
14923     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14924 
14925     if (ctxt == NULL)
14926         return(1);
14927 
14928     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14929         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14930 
14931     buf = xmlAllocParserInputBuffer(enc);
14932     if (buf == NULL)
14933         return(1);
14934 
14935     if (ctxt == NULL) {
14936         xmlFreeParserInputBuffer(buf);
14937         return(1);
14938     }
14939 
14940     xmlCtxtReset(ctxt);
14941 
14942     if (ctxt->pushTab == NULL) {
14943         ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14944 	                                    sizeof(xmlChar *));
14945         if (ctxt->pushTab == NULL) {
14946 	    xmlErrMemory(ctxt, NULL);
14947             xmlFreeParserInputBuffer(buf);
14948             return(1);
14949         }
14950     }
14951 
14952     if (filename == NULL) {
14953         ctxt->directory = NULL;
14954     } else {
14955         ctxt->directory = xmlParserGetDirectory(filename);
14956     }
14957 
14958     inputStream = xmlNewInputStream(ctxt);
14959     if (inputStream == NULL) {
14960         xmlFreeParserInputBuffer(buf);
14961         return(1);
14962     }
14963 
14964     if (filename == NULL)
14965         inputStream->filename = NULL;
14966     else
14967         inputStream->filename = (char *)
14968             xmlCanonicPath((const xmlChar *) filename);
14969     inputStream->buf = buf;
14970     xmlBufResetInput(buf->buffer, inputStream);
14971 
14972     inputPush(ctxt, inputStream);
14973 
14974     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14975         (ctxt->input->buf != NULL)) {
14976 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14977         size_t cur = ctxt->input->cur - ctxt->input->base;
14978 
14979         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14980 
14981         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14982 #ifdef DEBUG_PUSH
14983         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14984 #endif
14985     }
14986 
14987     if (encoding != NULL) {
14988         xmlCharEncodingHandlerPtr hdlr;
14989 
14990         if (ctxt->encoding != NULL)
14991 	    xmlFree((xmlChar *) ctxt->encoding);
14992         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14993 
14994         hdlr = xmlFindCharEncodingHandler(encoding);
14995         if (hdlr != NULL) {
14996             xmlSwitchToEncoding(ctxt, hdlr);
14997 	} else {
14998 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14999 			      "Unsupported encoding %s\n", BAD_CAST encoding);
15000         }
15001     } else if (enc != XML_CHAR_ENCODING_NONE) {
15002         xmlSwitchEncoding(ctxt, enc);
15003     }
15004 
15005     return(0);
15006 }
15007 
15008 
15009 /**
15010  * xmlCtxtUseOptionsInternal:
15011  * @ctxt: an XML parser context
15012  * @options:  a combination of xmlParserOption
15013  * @encoding:  the user provided encoding to use
15014  *
15015  * Applies the options to the parser context
15016  *
15017  * Returns 0 in case of success, the set of unknown or unimplemented options
15018  *         in case of error.
15019  */
15020 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15021 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15022 {
15023     if (ctxt == NULL)
15024         return(-1);
15025     if (encoding != NULL) {
15026         if (ctxt->encoding != NULL)
15027 	    xmlFree((xmlChar *) ctxt->encoding);
15028         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15029     }
15030     if (options & XML_PARSE_RECOVER) {
15031         ctxt->recovery = 1;
15032         options -= XML_PARSE_RECOVER;
15033 	ctxt->options |= XML_PARSE_RECOVER;
15034     } else
15035         ctxt->recovery = 0;
15036     if (options & XML_PARSE_DTDLOAD) {
15037         ctxt->loadsubset = XML_DETECT_IDS;
15038         options -= XML_PARSE_DTDLOAD;
15039 	ctxt->options |= XML_PARSE_DTDLOAD;
15040     } else
15041         ctxt->loadsubset = 0;
15042     if (options & XML_PARSE_DTDATTR) {
15043         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15044         options -= XML_PARSE_DTDATTR;
15045 	ctxt->options |= XML_PARSE_DTDATTR;
15046     }
15047     if (options & XML_PARSE_NOENT) {
15048         ctxt->replaceEntities = 1;
15049         /* ctxt->loadsubset |= XML_DETECT_IDS; */
15050         options -= XML_PARSE_NOENT;
15051 	ctxt->options |= XML_PARSE_NOENT;
15052     } else
15053         ctxt->replaceEntities = 0;
15054     if (options & XML_PARSE_PEDANTIC) {
15055         ctxt->pedantic = 1;
15056         options -= XML_PARSE_PEDANTIC;
15057 	ctxt->options |= XML_PARSE_PEDANTIC;
15058     } else
15059         ctxt->pedantic = 0;
15060     if (options & XML_PARSE_NOBLANKS) {
15061         ctxt->keepBlanks = 0;
15062         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15063         options -= XML_PARSE_NOBLANKS;
15064 	ctxt->options |= XML_PARSE_NOBLANKS;
15065     } else
15066         ctxt->keepBlanks = 1;
15067     if (options & XML_PARSE_DTDVALID) {
15068         ctxt->validate = 1;
15069         if (options & XML_PARSE_NOWARNING)
15070             ctxt->vctxt.warning = NULL;
15071         if (options & XML_PARSE_NOERROR)
15072             ctxt->vctxt.error = NULL;
15073         options -= XML_PARSE_DTDVALID;
15074 	ctxt->options |= XML_PARSE_DTDVALID;
15075     } else
15076         ctxt->validate = 0;
15077     if (options & XML_PARSE_NOWARNING) {
15078         ctxt->sax->warning = NULL;
15079         options -= XML_PARSE_NOWARNING;
15080     }
15081     if (options & XML_PARSE_NOERROR) {
15082         ctxt->sax->error = NULL;
15083         ctxt->sax->fatalError = NULL;
15084         options -= XML_PARSE_NOERROR;
15085     }
15086 #ifdef LIBXML_SAX1_ENABLED
15087     if (options & XML_PARSE_SAX1) {
15088         ctxt->sax->startElement = xmlSAX2StartElement;
15089         ctxt->sax->endElement = xmlSAX2EndElement;
15090         ctxt->sax->startElementNs = NULL;
15091         ctxt->sax->endElementNs = NULL;
15092         ctxt->sax->initialized = 1;
15093         options -= XML_PARSE_SAX1;
15094 	ctxt->options |= XML_PARSE_SAX1;
15095     }
15096 #endif /* LIBXML_SAX1_ENABLED */
15097     if (options & XML_PARSE_NODICT) {
15098         ctxt->dictNames = 0;
15099         options -= XML_PARSE_NODICT;
15100 	ctxt->options |= XML_PARSE_NODICT;
15101     } else {
15102         ctxt->dictNames = 1;
15103     }
15104     if (options & XML_PARSE_NOCDATA) {
15105         ctxt->sax->cdataBlock = NULL;
15106         options -= XML_PARSE_NOCDATA;
15107 	ctxt->options |= XML_PARSE_NOCDATA;
15108     }
15109     if (options & XML_PARSE_NSCLEAN) {
15110 	ctxt->options |= XML_PARSE_NSCLEAN;
15111         options -= XML_PARSE_NSCLEAN;
15112     }
15113     if (options & XML_PARSE_NONET) {
15114 	ctxt->options |= XML_PARSE_NONET;
15115         options -= XML_PARSE_NONET;
15116     }
15117     if (options & XML_PARSE_COMPACT) {
15118 	ctxt->options |= XML_PARSE_COMPACT;
15119         options -= XML_PARSE_COMPACT;
15120     }
15121     if (options & XML_PARSE_OLD10) {
15122 	ctxt->options |= XML_PARSE_OLD10;
15123         options -= XML_PARSE_OLD10;
15124     }
15125     if (options & XML_PARSE_NOBASEFIX) {
15126 	ctxt->options |= XML_PARSE_NOBASEFIX;
15127         options -= XML_PARSE_NOBASEFIX;
15128     }
15129     if (options & XML_PARSE_HUGE) {
15130 	ctxt->options |= XML_PARSE_HUGE;
15131         options -= XML_PARSE_HUGE;
15132         if (ctxt->dict != NULL)
15133             xmlDictSetLimit(ctxt->dict, 0);
15134     }
15135     if (options & XML_PARSE_OLDSAX) {
15136 	ctxt->options |= XML_PARSE_OLDSAX;
15137         options -= XML_PARSE_OLDSAX;
15138     }
15139     if (options & XML_PARSE_IGNORE_ENC) {
15140 	ctxt->options |= XML_PARSE_IGNORE_ENC;
15141         options -= XML_PARSE_IGNORE_ENC;
15142     }
15143     if (options & XML_PARSE_BIG_LINES) {
15144 	ctxt->options |= XML_PARSE_BIG_LINES;
15145         options -= XML_PARSE_BIG_LINES;
15146     }
15147     ctxt->linenumbers = 1;
15148     return (options);
15149 }
15150 
15151 /**
15152  * xmlCtxtUseOptions:
15153  * @ctxt: an XML parser context
15154  * @options:  a combination of xmlParserOption
15155  *
15156  * Applies the options to the parser context
15157  *
15158  * Returns 0 in case of success, the set of unknown or unimplemented options
15159  *         in case of error.
15160  */
15161 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15162 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15163 {
15164    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15165 }
15166 
15167 /**
15168  * xmlDoRead:
15169  * @ctxt:  an XML parser context
15170  * @URL:  the base URL to use for the document
15171  * @encoding:  the document encoding, or NULL
15172  * @options:  a combination of xmlParserOption
15173  * @reuse:  keep the context for reuse
15174  *
15175  * Common front-end for the xmlRead functions
15176  *
15177  * Returns the resulting document tree or NULL
15178  */
15179 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15180 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15181           int options, int reuse)
15182 {
15183     xmlDocPtr ret;
15184 
15185     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15186     if (encoding != NULL) {
15187         xmlCharEncodingHandlerPtr hdlr;
15188 
15189 	hdlr = xmlFindCharEncodingHandler(encoding);
15190 	if (hdlr != NULL)
15191 	    xmlSwitchToEncoding(ctxt, hdlr);
15192     }
15193     if ((URL != NULL) && (ctxt->input != NULL) &&
15194         (ctxt->input->filename == NULL))
15195         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15196     xmlParseDocument(ctxt);
15197     if ((ctxt->wellFormed) || ctxt->recovery)
15198         ret = ctxt->myDoc;
15199     else {
15200         ret = NULL;
15201 	if (ctxt->myDoc != NULL) {
15202 	    xmlFreeDoc(ctxt->myDoc);
15203 	}
15204     }
15205     ctxt->myDoc = NULL;
15206     if (!reuse) {
15207 	xmlFreeParserCtxt(ctxt);
15208     }
15209 
15210     return (ret);
15211 }
15212 
15213 /**
15214  * xmlReadDoc:
15215  * @cur:  a pointer to a zero terminated string
15216  * @URL:  the base URL to use for the document
15217  * @encoding:  the document encoding, or NULL
15218  * @options:  a combination of xmlParserOption
15219  *
15220  * parse an XML in-memory document and build a tree.
15221  *
15222  * Returns the resulting document tree
15223  */
15224 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15225 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15226 {
15227     xmlParserCtxtPtr ctxt;
15228 
15229     if (cur == NULL)
15230         return (NULL);
15231     xmlInitParser();
15232 
15233     ctxt = xmlCreateDocParserCtxt(cur);
15234     if (ctxt == NULL)
15235         return (NULL);
15236     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15237 }
15238 
15239 /**
15240  * xmlReadFile:
15241  * @filename:  a file or URL
15242  * @encoding:  the document encoding, or NULL
15243  * @options:  a combination of xmlParserOption
15244  *
15245  * parse an XML file from the filesystem or the network.
15246  *
15247  * Returns the resulting document tree
15248  */
15249 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15250 xmlReadFile(const char *filename, const char *encoding, int options)
15251 {
15252     xmlParserCtxtPtr ctxt;
15253 
15254     xmlInitParser();
15255     ctxt = xmlCreateURLParserCtxt(filename, options);
15256     if (ctxt == NULL)
15257         return (NULL);
15258     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15259 }
15260 
15261 /**
15262  * xmlReadMemory:
15263  * @buffer:  a pointer to a char array
15264  * @size:  the size of the array
15265  * @URL:  the base URL to use for the document
15266  * @encoding:  the document encoding, or NULL
15267  * @options:  a combination of xmlParserOption
15268  *
15269  * parse an XML in-memory document and build a tree.
15270  *
15271  * Returns the resulting document tree
15272  */
15273 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15274 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15275 {
15276     xmlParserCtxtPtr ctxt;
15277 
15278     xmlInitParser();
15279     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15280     if (ctxt == NULL)
15281         return (NULL);
15282     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15283 }
15284 
15285 /**
15286  * xmlReadFd:
15287  * @fd:  an open file descriptor
15288  * @URL:  the base URL to use for the document
15289  * @encoding:  the document encoding, or NULL
15290  * @options:  a combination of xmlParserOption
15291  *
15292  * parse an XML from a file descriptor and build a tree.
15293  * NOTE that the file descriptor will not be closed when the
15294  *      reader is closed or reset.
15295  *
15296  * Returns the resulting document tree
15297  */
15298 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15299 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15300 {
15301     xmlParserCtxtPtr ctxt;
15302     xmlParserInputBufferPtr input;
15303     xmlParserInputPtr stream;
15304 
15305     if (fd < 0)
15306         return (NULL);
15307     xmlInitParser();
15308 
15309     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15310     if (input == NULL)
15311         return (NULL);
15312     input->closecallback = NULL;
15313     ctxt = xmlNewParserCtxt();
15314     if (ctxt == NULL) {
15315         xmlFreeParserInputBuffer(input);
15316         return (NULL);
15317     }
15318     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15319     if (stream == NULL) {
15320         xmlFreeParserInputBuffer(input);
15321 	xmlFreeParserCtxt(ctxt);
15322         return (NULL);
15323     }
15324     inputPush(ctxt, stream);
15325     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15326 }
15327 
15328 /**
15329  * xmlReadIO:
15330  * @ioread:  an I/O read function
15331  * @ioclose:  an I/O close function
15332  * @ioctx:  an I/O handler
15333  * @URL:  the base URL to use for the document
15334  * @encoding:  the document encoding, or NULL
15335  * @options:  a combination of xmlParserOption
15336  *
15337  * parse an XML document from I/O functions and source and build a tree.
15338  *
15339  * Returns the resulting document tree
15340  */
15341 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15342 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15343           void *ioctx, const char *URL, const char *encoding, int options)
15344 {
15345     xmlParserCtxtPtr ctxt;
15346     xmlParserInputBufferPtr input;
15347     xmlParserInputPtr stream;
15348 
15349     if (ioread == NULL)
15350         return (NULL);
15351     xmlInitParser();
15352 
15353     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15354                                          XML_CHAR_ENCODING_NONE);
15355     if (input == NULL) {
15356         if (ioclose != NULL)
15357             ioclose(ioctx);
15358         return (NULL);
15359     }
15360     ctxt = xmlNewParserCtxt();
15361     if (ctxt == NULL) {
15362         xmlFreeParserInputBuffer(input);
15363         return (NULL);
15364     }
15365     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15366     if (stream == NULL) {
15367         xmlFreeParserInputBuffer(input);
15368 	xmlFreeParserCtxt(ctxt);
15369         return (NULL);
15370     }
15371     inputPush(ctxt, stream);
15372     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15373 }
15374 
15375 /**
15376  * xmlCtxtReadDoc:
15377  * @ctxt:  an XML parser context
15378  * @cur:  a pointer to a zero terminated string
15379  * @URL:  the base URL to use for the document
15380  * @encoding:  the document encoding, or NULL
15381  * @options:  a combination of xmlParserOption
15382  *
15383  * parse an XML in-memory document and build a tree.
15384  * This reuses the existing @ctxt parser context
15385  *
15386  * Returns the resulting document tree
15387  */
15388 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15389 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15390                const char *URL, const char *encoding, int options)
15391 {
15392     xmlParserInputPtr stream;
15393 
15394     if (cur == NULL)
15395         return (NULL);
15396     if (ctxt == NULL)
15397         return (NULL);
15398     xmlInitParser();
15399 
15400     xmlCtxtReset(ctxt);
15401 
15402     stream = xmlNewStringInputStream(ctxt, cur);
15403     if (stream == NULL) {
15404         return (NULL);
15405     }
15406     inputPush(ctxt, stream);
15407     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15408 }
15409 
15410 /**
15411  * xmlCtxtReadFile:
15412  * @ctxt:  an XML parser context
15413  * @filename:  a file or URL
15414  * @encoding:  the document encoding, or NULL
15415  * @options:  a combination of xmlParserOption
15416  *
15417  * parse an XML file from the filesystem or the network.
15418  * This reuses the existing @ctxt parser context
15419  *
15420  * Returns the resulting document tree
15421  */
15422 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15423 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15424                 const char *encoding, int options)
15425 {
15426     xmlParserInputPtr stream;
15427 
15428     if (filename == NULL)
15429         return (NULL);
15430     if (ctxt == NULL)
15431         return (NULL);
15432     xmlInitParser();
15433 
15434     xmlCtxtReset(ctxt);
15435 
15436     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15437     if (stream == NULL) {
15438         return (NULL);
15439     }
15440     inputPush(ctxt, stream);
15441     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15442 }
15443 
15444 /**
15445  * xmlCtxtReadMemory:
15446  * @ctxt:  an XML parser context
15447  * @buffer:  a pointer to a char array
15448  * @size:  the size of the array
15449  * @URL:  the base URL to use for the document
15450  * @encoding:  the document encoding, or NULL
15451  * @options:  a combination of xmlParserOption
15452  *
15453  * parse an XML in-memory document and build a tree.
15454  * This reuses the existing @ctxt parser context
15455  *
15456  * Returns the resulting document tree
15457  */
15458 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15459 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15460                   const char *URL, const char *encoding, int options)
15461 {
15462     xmlParserInputBufferPtr input;
15463     xmlParserInputPtr stream;
15464 
15465     if (ctxt == NULL)
15466         return (NULL);
15467     if (buffer == NULL)
15468         return (NULL);
15469     xmlInitParser();
15470 
15471     xmlCtxtReset(ctxt);
15472 
15473     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15474     if (input == NULL) {
15475 	return(NULL);
15476     }
15477 
15478     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15479     if (stream == NULL) {
15480 	xmlFreeParserInputBuffer(input);
15481 	return(NULL);
15482     }
15483 
15484     inputPush(ctxt, stream);
15485     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15486 }
15487 
15488 /**
15489  * xmlCtxtReadFd:
15490  * @ctxt:  an XML parser context
15491  * @fd:  an open file descriptor
15492  * @URL:  the base URL to use for the document
15493  * @encoding:  the document encoding, or NULL
15494  * @options:  a combination of xmlParserOption
15495  *
15496  * parse an XML from a file descriptor and build a tree.
15497  * This reuses the existing @ctxt parser context
15498  * NOTE that the file descriptor will not be closed when the
15499  *      reader is closed or reset.
15500  *
15501  * Returns the resulting document tree
15502  */
15503 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15504 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15505               const char *URL, const char *encoding, int options)
15506 {
15507     xmlParserInputBufferPtr input;
15508     xmlParserInputPtr stream;
15509 
15510     if (fd < 0)
15511         return (NULL);
15512     if (ctxt == NULL)
15513         return (NULL);
15514     xmlInitParser();
15515 
15516     xmlCtxtReset(ctxt);
15517 
15518 
15519     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15520     if (input == NULL)
15521         return (NULL);
15522     input->closecallback = NULL;
15523     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15524     if (stream == NULL) {
15525         xmlFreeParserInputBuffer(input);
15526         return (NULL);
15527     }
15528     inputPush(ctxt, stream);
15529     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15530 }
15531 
15532 /**
15533  * xmlCtxtReadIO:
15534  * @ctxt:  an XML parser context
15535  * @ioread:  an I/O read function
15536  * @ioclose:  an I/O close function
15537  * @ioctx:  an I/O handler
15538  * @URL:  the base URL to use for the document
15539  * @encoding:  the document encoding, or NULL
15540  * @options:  a combination of xmlParserOption
15541  *
15542  * parse an XML document from I/O functions and source and build a tree.
15543  * This reuses the existing @ctxt parser context
15544  *
15545  * Returns the resulting document tree
15546  */
15547 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15548 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15549               xmlInputCloseCallback ioclose, void *ioctx,
15550 	      const char *URL,
15551               const char *encoding, int options)
15552 {
15553     xmlParserInputBufferPtr input;
15554     xmlParserInputPtr stream;
15555 
15556     if (ioread == NULL)
15557         return (NULL);
15558     if (ctxt == NULL)
15559         return (NULL);
15560     xmlInitParser();
15561 
15562     xmlCtxtReset(ctxt);
15563 
15564     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15565                                          XML_CHAR_ENCODING_NONE);
15566     if (input == NULL) {
15567         if (ioclose != NULL)
15568             ioclose(ioctx);
15569         return (NULL);
15570     }
15571     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15572     if (stream == NULL) {
15573         xmlFreeParserInputBuffer(input);
15574         return (NULL);
15575     }
15576     inputPush(ctxt, stream);
15577     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15578 }
15579 
15580 #define bottom_parser
15581 #include "elfgcchack.h"
15582