• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 #define IN_LIBXML
34 #include "libxml.h"
35 
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41 
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <string.h>
45 #include <stdarg.h>
46 #include <libxml/xmlmemory.h>
47 #include <libxml/threads.h>
48 #include <libxml/globals.h>
49 #include <libxml/tree.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/valid.h>
53 #include <libxml/entities.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/encoding.h>
56 #include <libxml/xmlIO.h>
57 #include <libxml/uri.h>
58 #ifdef LIBXML_CATALOG_ENABLED
59 #include <libxml/catalog.h>
60 #endif
61 #ifdef LIBXML_SCHEMAS_ENABLED
62 #include <libxml/xmlschemastypes.h>
63 #include <libxml/relaxng.h>
64 #endif
65 #ifdef HAVE_CTYPE_H
66 #include <ctype.h>
67 #endif
68 #ifdef HAVE_STDLIB_H
69 #include <stdlib.h>
70 #endif
71 #ifdef HAVE_SYS_STAT_H
72 #include <sys/stat.h>
73 #endif
74 #ifdef HAVE_FCNTL_H
75 #include <fcntl.h>
76 #endif
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 #ifdef HAVE_ZLIB_H
81 #include <zlib.h>
82 #endif
83 #ifdef HAVE_LZMA_H
84 #include <lzma.h>
85 #endif
86 
87 #include "buf.h"
88 #include "enc.h"
89 
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92 
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
96 
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98 
99 /************************************************************************
100  *									*
101  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
102  *									*
103  ************************************************************************/
104 
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
107 
108 /*
109  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110  *    replacement over the size in byte of the input indicates that you have
111  *    and eponential behaviour. A value of 10 correspond to at least 3 entity
112  *    replacement per byte of input.
113  */
114 #define XML_PARSER_NON_LINEAR 10
115 
116 /*
117  * xmlParserEntityCheck
118  *
119  * Function to check non-linear entity expansion behaviour
120  * This is here to detect and stop exponential linear entity expansion
121  * This is not a limitation of the parser but a safety
122  * boundary feature. It can be disabled with the XML_PARSE_HUGE
123  * parser option.
124  */
125 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127                      xmlEntityPtr ent, size_t replacement)
128 {
129     size_t consumed = 0;
130 
131     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132         return (0);
133     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134         return (1);
135 
136     /*
137      * This may look absurd but is needed to detect
138      * entities problems
139      */
140     if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 	(ent->content != NULL) && (ent->checked == 0) &&
142 	(ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
143 	unsigned long oldnbent = ctxt->nbentities;
144 	xmlChar *rep;
145 
146 	ent->checked = 1;
147 
148         ++ctxt->depth;
149 	rep = xmlStringDecodeEntities(ctxt, ent->content,
150 				  XML_SUBSTITUTE_REF, 0, 0, 0);
151         --ctxt->depth;
152 	if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
153 	    ent->content[0] = 0;
154 	}
155 
156 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 	if (rep != NULL) {
158 	    if (xmlStrchr(rep, '<'))
159 		ent->checked |= 1;
160 	    xmlFree(rep);
161 	    rep = NULL;
162 	}
163     }
164     if (replacement != 0) {
165 	if (replacement < XML_MAX_TEXT_LENGTH)
166 	    return(0);
167 
168         /*
169 	 * If the volume of entity copy reaches 10 times the
170 	 * amount of parsed data and over the large text threshold
171 	 * then that's very likely to be an abuse.
172 	 */
173         if (ctxt->input != NULL) {
174 	    consumed = ctxt->input->consumed +
175 	               (ctxt->input->cur - ctxt->input->base);
176 	}
177         consumed += ctxt->sizeentities;
178 
179         if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 	    return(0);
181     } else if (size != 0) {
182         /*
183          * Do the check based on the replacement size of the entity
184          */
185         if (size < XML_PARSER_BIG_ENTITY)
186 	    return(0);
187 
188         /*
189          * A limit on the amount of text data reasonably used
190          */
191         if (ctxt->input != NULL) {
192             consumed = ctxt->input->consumed +
193                 (ctxt->input->cur - ctxt->input->base);
194         }
195         consumed += ctxt->sizeentities;
196 
197         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199             return (0);
200     } else if (ent != NULL) {
201         /*
202          * use the number of parsed entities in the replacement
203          */
204         size = ent->checked / 2;
205 
206         /*
207          * The amount of data parsed counting entities size only once
208          */
209         if (ctxt->input != NULL) {
210             consumed = ctxt->input->consumed +
211                 (ctxt->input->cur - ctxt->input->base);
212         }
213         consumed += ctxt->sizeentities;
214 
215         /*
216          * Check the density of entities for the amount of data
217 	 * knowing an entity reference will take at least 3 bytes
218          */
219         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220             return (0);
221     } else {
222         /*
223          * strange we got no data for checking
224          */
225 	if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 	     (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 	    (ctxt->nbentities <= 10000))
228 	    return (0);
229     }
230     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231     return (1);
232 }
233 
234 /**
235  * xmlParserMaxDepth:
236  *
237  * arbitrary depth limit for the XML documents that we allow to
238  * process. This is not a limitation of the parser but a safety
239  * boundary feature. It can be disabled with the XML_PARSE_HUGE
240  * parser option.
241  */
242 unsigned int xmlParserMaxDepth = 256;
243 
244 
245 
246 #define SAX2 1
247 #define XML_PARSER_BIG_BUFFER_SIZE 300
248 #define XML_PARSER_BUFFER_SIZE 100
249 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250 
251 /**
252  * XML_PARSER_CHUNK_SIZE
253  *
254  * When calling GROW that's the minimal amount of data
255  * the parser expected to have received. It is not a hard
256  * limit but an optimization when reading strings like Names
257  * It is not strictly needed as long as inputs available characters
258  * are followed by 0, which should be provided by the I/O level
259  */
260 #define XML_PARSER_CHUNK_SIZE 100
261 
262 /*
263  * List of XML prefixed PI allowed by W3C specs
264  */
265 
266 static const char *xmlW3CPIs[] = {
267     "xml-stylesheet",
268     "xml-model",
269     NULL
270 };
271 
272 
273 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
274 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275                                               const xmlChar **str);
276 
277 static xmlParserErrors
278 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 	              xmlSAXHandlerPtr sax,
280 		      void *user_data, int depth, const xmlChar *URL,
281 		      const xmlChar *ID, xmlNodePtr *list);
282 
283 static int
284 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285                           const char *encoding);
286 #ifdef LIBXML_LEGACY_ENABLED
287 static void
288 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289                       xmlNodePtr lastNode);
290 #endif /* LIBXML_LEGACY_ENABLED */
291 
292 static xmlParserErrors
293 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
295 
296 static int
297 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298 
299 /************************************************************************
300  *									*
301  *		Some factorized error routines				*
302  *									*
303  ************************************************************************/
304 
305 /**
306  * xmlErrAttributeDup:
307  * @ctxt:  an XML parser context
308  * @prefix:  the attribute prefix
309  * @localname:  the attribute localname
310  *
311  * Handle a redefinition of attribute error
312  */
313 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)314 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315                    const xmlChar * localname)
316 {
317     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318         (ctxt->instate == XML_PARSER_EOF))
319 	return;
320     if (ctxt != NULL)
321 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
322 
323     if (prefix == NULL)
324         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
325                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
326                         (const char *) localname, NULL, NULL, 0, 0,
327                         "Attribute %s redefined\n", localname);
328     else
329         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
330                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
331                         (const char *) prefix, (const char *) localname,
332                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333                         localname);
334     if (ctxt != NULL) {
335 	ctxt->wellFormed = 0;
336 	if (ctxt->recovery == 0)
337 	    ctxt->disableSAX = 1;
338     }
339 }
340 
341 /**
342  * xmlFatalErr:
343  * @ctxt:  an XML parser context
344  * @error:  the error number
345  * @extra:  extra information string
346  *
347  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348  */
349 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)350 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
351 {
352     const char *errmsg;
353 
354     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355         (ctxt->instate == XML_PARSER_EOF))
356 	return;
357     switch (error) {
358         case XML_ERR_INVALID_HEX_CHARREF:
359             errmsg = "CharRef: invalid hexadecimal value";
360             break;
361         case XML_ERR_INVALID_DEC_CHARREF:
362             errmsg = "CharRef: invalid decimal value";
363             break;
364         case XML_ERR_INVALID_CHARREF:
365             errmsg = "CharRef: invalid value";
366             break;
367         case XML_ERR_INTERNAL_ERROR:
368             errmsg = "internal error";
369             break;
370         case XML_ERR_PEREF_AT_EOF:
371             errmsg = "PEReference at end of document";
372             break;
373         case XML_ERR_PEREF_IN_PROLOG:
374             errmsg = "PEReference in prolog";
375             break;
376         case XML_ERR_PEREF_IN_EPILOG:
377             errmsg = "PEReference in epilog";
378             break;
379         case XML_ERR_PEREF_NO_NAME:
380             errmsg = "PEReference: no name";
381             break;
382         case XML_ERR_PEREF_SEMICOL_MISSING:
383             errmsg = "PEReference: expecting ';'";
384             break;
385         case XML_ERR_ENTITY_LOOP:
386             errmsg = "Detected an entity reference loop";
387             break;
388         case XML_ERR_ENTITY_NOT_STARTED:
389             errmsg = "EntityValue: \" or ' expected";
390             break;
391         case XML_ERR_ENTITY_PE_INTERNAL:
392             errmsg = "PEReferences forbidden in internal subset";
393             break;
394         case XML_ERR_ENTITY_NOT_FINISHED:
395             errmsg = "EntityValue: \" or ' expected";
396             break;
397         case XML_ERR_ATTRIBUTE_NOT_STARTED:
398             errmsg = "AttValue: \" or ' expected";
399             break;
400         case XML_ERR_LT_IN_ATTRIBUTE:
401             errmsg = "Unescaped '<' not allowed in attributes values";
402             break;
403         case XML_ERR_LITERAL_NOT_STARTED:
404             errmsg = "SystemLiteral \" or ' expected";
405             break;
406         case XML_ERR_LITERAL_NOT_FINISHED:
407             errmsg = "Unfinished System or Public ID \" or ' expected";
408             break;
409         case XML_ERR_MISPLACED_CDATA_END:
410             errmsg = "Sequence ']]>' not allowed in content";
411             break;
412         case XML_ERR_URI_REQUIRED:
413             errmsg = "SYSTEM or PUBLIC, the URI is missing";
414             break;
415         case XML_ERR_PUBID_REQUIRED:
416             errmsg = "PUBLIC, the Public Identifier is missing";
417             break;
418         case XML_ERR_HYPHEN_IN_COMMENT:
419             errmsg = "Comment must not contain '--' (double-hyphen)";
420             break;
421         case XML_ERR_PI_NOT_STARTED:
422             errmsg = "xmlParsePI : no target name";
423             break;
424         case XML_ERR_RESERVED_XML_NAME:
425             errmsg = "Invalid PI name";
426             break;
427         case XML_ERR_NOTATION_NOT_STARTED:
428             errmsg = "NOTATION: Name expected here";
429             break;
430         case XML_ERR_NOTATION_NOT_FINISHED:
431             errmsg = "'>' required to close NOTATION declaration";
432             break;
433         case XML_ERR_VALUE_REQUIRED:
434             errmsg = "Entity value required";
435             break;
436         case XML_ERR_URI_FRAGMENT:
437             errmsg = "Fragment not allowed";
438             break;
439         case XML_ERR_ATTLIST_NOT_STARTED:
440             errmsg = "'(' required to start ATTLIST enumeration";
441             break;
442         case XML_ERR_NMTOKEN_REQUIRED:
443             errmsg = "NmToken expected in ATTLIST enumeration";
444             break;
445         case XML_ERR_ATTLIST_NOT_FINISHED:
446             errmsg = "')' required to finish ATTLIST enumeration";
447             break;
448         case XML_ERR_MIXED_NOT_STARTED:
449             errmsg = "MixedContentDecl : '|' or ')*' expected";
450             break;
451         case XML_ERR_PCDATA_REQUIRED:
452             errmsg = "MixedContentDecl : '#PCDATA' expected";
453             break;
454         case XML_ERR_ELEMCONTENT_NOT_STARTED:
455             errmsg = "ContentDecl : Name or '(' expected";
456             break;
457         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
458             errmsg = "ContentDecl : ',' '|' or ')' expected";
459             break;
460         case XML_ERR_PEREF_IN_INT_SUBSET:
461             errmsg =
462                 "PEReference: forbidden within markup decl in internal subset";
463             break;
464         case XML_ERR_GT_REQUIRED:
465             errmsg = "expected '>'";
466             break;
467         case XML_ERR_CONDSEC_INVALID:
468             errmsg = "XML conditional section '[' expected";
469             break;
470         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
471             errmsg = "Content error in the external subset";
472             break;
473         case XML_ERR_CONDSEC_INVALID_KEYWORD:
474             errmsg =
475                 "conditional section INCLUDE or IGNORE keyword expected";
476             break;
477         case XML_ERR_CONDSEC_NOT_FINISHED:
478             errmsg = "XML conditional section not closed";
479             break;
480         case XML_ERR_XMLDECL_NOT_STARTED:
481             errmsg = "Text declaration '<?xml' required";
482             break;
483         case XML_ERR_XMLDECL_NOT_FINISHED:
484             errmsg = "parsing XML declaration: '?>' expected";
485             break;
486         case XML_ERR_EXT_ENTITY_STANDALONE:
487             errmsg = "external parsed entities cannot be standalone";
488             break;
489         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
490             errmsg = "EntityRef: expecting ';'";
491             break;
492         case XML_ERR_DOCTYPE_NOT_FINISHED:
493             errmsg = "DOCTYPE improperly terminated";
494             break;
495         case XML_ERR_LTSLASH_REQUIRED:
496             errmsg = "EndTag: '</' not found";
497             break;
498         case XML_ERR_EQUAL_REQUIRED:
499             errmsg = "expected '='";
500             break;
501         case XML_ERR_STRING_NOT_CLOSED:
502             errmsg = "String not closed expecting \" or '";
503             break;
504         case XML_ERR_STRING_NOT_STARTED:
505             errmsg = "String not started expecting ' or \"";
506             break;
507         case XML_ERR_ENCODING_NAME:
508             errmsg = "Invalid XML encoding name";
509             break;
510         case XML_ERR_STANDALONE_VALUE:
511             errmsg = "standalone accepts only 'yes' or 'no'";
512             break;
513         case XML_ERR_DOCUMENT_EMPTY:
514             errmsg = "Document is empty";
515             break;
516         case XML_ERR_DOCUMENT_END:
517             errmsg = "Extra content at the end of the document";
518             break;
519         case XML_ERR_NOT_WELL_BALANCED:
520             errmsg = "chunk is not well balanced";
521             break;
522         case XML_ERR_EXTRA_CONTENT:
523             errmsg = "extra content at the end of well balanced chunk";
524             break;
525         case XML_ERR_VERSION_MISSING:
526             errmsg = "Malformed declaration expecting version";
527             break;
528         case XML_ERR_NAME_TOO_LONG:
529             errmsg = "Name too long use XML_PARSE_HUGE option";
530             break;
531 #if 0
532         case:
533             errmsg = "";
534             break;
535 #endif
536         default:
537             errmsg = "Unregistered error message";
538     }
539     if (ctxt != NULL)
540 	ctxt->errNo = error;
541     if (info == NULL) {
542         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544                         errmsg);
545     } else {
546         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548                         errmsg, info);
549     }
550     if (ctxt != NULL) {
551 	ctxt->wellFormed = 0;
552 	if (ctxt->recovery == 0)
553 	    ctxt->disableSAX = 1;
554     }
555 }
556 
557 /**
558  * xmlFatalErrMsg:
559  * @ctxt:  an XML parser context
560  * @error:  the error number
561  * @msg:  the error message
562  *
563  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564  */
565 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)566 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567                const char *msg)
568 {
569     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570         (ctxt->instate == XML_PARSER_EOF))
571 	return;
572     if (ctxt != NULL)
573 	ctxt->errNo = error;
574     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
576     if (ctxt != NULL) {
577 	ctxt->wellFormed = 0;
578 	if (ctxt->recovery == 0)
579 	    ctxt->disableSAX = 1;
580     }
581 }
582 
583 /**
584  * xmlWarningMsg:
585  * @ctxt:  an XML parser context
586  * @error:  the error number
587  * @msg:  the error message
588  * @str1:  extra data
589  * @str2:  extra data
590  *
591  * Handle a warning.
592  */
593 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)594 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595               const char *msg, const xmlChar *str1, const xmlChar *str2)
596 {
597     xmlStructuredErrorFunc schannel = NULL;
598 
599     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600         (ctxt->instate == XML_PARSER_EOF))
601 	return;
602     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603         (ctxt->sax->initialized == XML_SAX2_MAGIC))
604         schannel = ctxt->sax->serror;
605     if (ctxt != NULL) {
606         __xmlRaiseError(schannel,
607                     (ctxt->sax) ? ctxt->sax->warning : NULL,
608                     ctxt->userData,
609                     ctxt, NULL, XML_FROM_PARSER, error,
610                     XML_ERR_WARNING, NULL, 0,
611 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
612 		    msg, (const char *) str1, (const char *) str2);
613     } else {
614         __xmlRaiseError(schannel, NULL, NULL,
615                     ctxt, NULL, XML_FROM_PARSER, error,
616                     XML_ERR_WARNING, NULL, 0,
617 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
618 		    msg, (const char *) str1, (const char *) str2);
619     }
620 }
621 
622 /**
623  * xmlValidityError:
624  * @ctxt:  an XML parser context
625  * @error:  the error number
626  * @msg:  the error message
627  * @str1:  extra data
628  *
629  * Handle a validity error.
630  */
631 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)632 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633               const char *msg, const xmlChar *str1, const xmlChar *str2)
634 {
635     xmlStructuredErrorFunc schannel = NULL;
636 
637     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638         (ctxt->instate == XML_PARSER_EOF))
639 	return;
640     if (ctxt != NULL) {
641 	ctxt->errNo = error;
642 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 	    schannel = ctxt->sax->serror;
644     }
645     if (ctxt != NULL) {
646         __xmlRaiseError(schannel,
647                     ctxt->vctxt.error, ctxt->vctxt.userData,
648                     ctxt, NULL, XML_FROM_DTD, error,
649                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 		    (const char *) str2, NULL, 0, 0,
651 		    msg, (const char *) str1, (const char *) str2);
652 	ctxt->valid = 0;
653     } else {
654         __xmlRaiseError(schannel, NULL, NULL,
655                     ctxt, NULL, XML_FROM_DTD, error,
656                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 		    (const char *) str2, NULL, 0, 0,
658 		    msg, (const char *) str1, (const char *) str2);
659     }
660 }
661 
662 /**
663  * xmlFatalErrMsgInt:
664  * @ctxt:  an XML parser context
665  * @error:  the error number
666  * @msg:  the error message
667  * @val:  an integer value
668  *
669  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670  */
671 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)672 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
673                   const char *msg, int val)
674 {
675     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676         (ctxt->instate == XML_PARSER_EOF))
677 	return;
678     if (ctxt != NULL)
679 	ctxt->errNo = error;
680     __xmlRaiseError(NULL, NULL, NULL,
681                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
683     if (ctxt != NULL) {
684 	ctxt->wellFormed = 0;
685 	if (ctxt->recovery == 0)
686 	    ctxt->disableSAX = 1;
687     }
688 }
689 
690 /**
691  * xmlFatalErrMsgStrIntStr:
692  * @ctxt:  an XML parser context
693  * @error:  the error number
694  * @msg:  the error message
695  * @str1:  an string info
696  * @val:  an integer value
697  * @str2:  an string info
698  *
699  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700  */
701 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)702 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
703                   const char *msg, const xmlChar *str1, int val,
704 		  const xmlChar *str2)
705 {
706     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707         (ctxt->instate == XML_PARSER_EOF))
708 	return;
709     if (ctxt != NULL)
710 	ctxt->errNo = error;
711     __xmlRaiseError(NULL, NULL, NULL,
712                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713                     NULL, 0, (const char *) str1, (const char *) str2,
714 		    NULL, val, 0, msg, str1, val, str2);
715     if (ctxt != NULL) {
716 	ctxt->wellFormed = 0;
717 	if (ctxt->recovery == 0)
718 	    ctxt->disableSAX = 1;
719     }
720 }
721 
722 /**
723  * xmlFatalErrMsgStr:
724  * @ctxt:  an XML parser context
725  * @error:  the error number
726  * @msg:  the error message
727  * @val:  a string value
728  *
729  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730  */
731 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)732 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
733                   const char *msg, const xmlChar * val)
734 {
735     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736         (ctxt->instate == XML_PARSER_EOF))
737 	return;
738     if (ctxt != NULL)
739 	ctxt->errNo = error;
740     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
741                     XML_FROM_PARSER, error, XML_ERR_FATAL,
742                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743                     val);
744     if (ctxt != NULL) {
745 	ctxt->wellFormed = 0;
746 	if (ctxt->recovery == 0)
747 	    ctxt->disableSAX = 1;
748     }
749 }
750 
751 /**
752  * xmlErrMsgStr:
753  * @ctxt:  an XML parser context
754  * @error:  the error number
755  * @msg:  the error message
756  * @val:  a string value
757  *
758  * Handle a non fatal parser error
759  */
760 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)761 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762                   const char *msg, const xmlChar * val)
763 {
764     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765         (ctxt->instate == XML_PARSER_EOF))
766 	return;
767     if (ctxt != NULL)
768 	ctxt->errNo = error;
769     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770                     XML_FROM_PARSER, error, XML_ERR_ERROR,
771                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772                     val);
773 }
774 
775 /**
776  * xmlNsErr:
777  * @ctxt:  an XML parser context
778  * @error:  the error number
779  * @msg:  the message
780  * @info1:  extra information string
781  * @info2:  extra information string
782  *
783  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784  */
785 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)786 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787          const char *msg,
788          const xmlChar * info1, const xmlChar * info2,
789          const xmlChar * info3)
790 {
791     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792         (ctxt->instate == XML_PARSER_EOF))
793 	return;
794     if (ctxt != NULL)
795 	ctxt->errNo = error;
796     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
797                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
798                     (const char *) info2, (const char *) info3, 0, 0, msg,
799                     info1, info2, info3);
800     if (ctxt != NULL)
801 	ctxt->nsWellFormed = 0;
802 }
803 
804 /**
805  * xmlNsWarn
806  * @ctxt:  an XML parser context
807  * @error:  the error number
808  * @msg:  the message
809  * @info1:  extra information string
810  * @info2:  extra information string
811  *
812  * Handle a namespace warning error
813  */
814 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)815 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816          const char *msg,
817          const xmlChar * info1, const xmlChar * info2,
818          const xmlChar * info3)
819 {
820     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821         (ctxt->instate == XML_PARSER_EOF))
822 	return;
823     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
825                     (const char *) info2, (const char *) info3, 0, 0, msg,
826                     info1, info2, info3);
827 }
828 
829 /************************************************************************
830  *									*
831  *		Library wide options					*
832  *									*
833  ************************************************************************/
834 
835 /**
836   * xmlHasFeature:
837   * @feature: the feature to be examined
838   *
839   * Examines if the library has been compiled with a given feature.
840   *
841   * Returns a non-zero value if the feature exist, otherwise zero.
842   * Returns zero (0) if the feature does not exist or an unknown
843   * unknown feature is requested, non-zero otherwise.
844   */
845 int
xmlHasFeature(xmlFeature feature)846 xmlHasFeature(xmlFeature feature)
847 {
848     switch (feature) {
849 	case XML_WITH_THREAD:
850 #ifdef LIBXML_THREAD_ENABLED
851 	    return(1);
852 #else
853 	    return(0);
854 #endif
855         case XML_WITH_TREE:
856 #ifdef LIBXML_TREE_ENABLED
857             return(1);
858 #else
859             return(0);
860 #endif
861         case XML_WITH_OUTPUT:
862 #ifdef LIBXML_OUTPUT_ENABLED
863             return(1);
864 #else
865             return(0);
866 #endif
867         case XML_WITH_PUSH:
868 #ifdef LIBXML_PUSH_ENABLED
869             return(1);
870 #else
871             return(0);
872 #endif
873         case XML_WITH_READER:
874 #ifdef LIBXML_READER_ENABLED
875             return(1);
876 #else
877             return(0);
878 #endif
879         case XML_WITH_PATTERN:
880 #ifdef LIBXML_PATTERN_ENABLED
881             return(1);
882 #else
883             return(0);
884 #endif
885         case XML_WITH_WRITER:
886 #ifdef LIBXML_WRITER_ENABLED
887             return(1);
888 #else
889             return(0);
890 #endif
891         case XML_WITH_SAX1:
892 #ifdef LIBXML_SAX1_ENABLED
893             return(1);
894 #else
895             return(0);
896 #endif
897         case XML_WITH_FTP:
898 #ifdef LIBXML_FTP_ENABLED
899             return(1);
900 #else
901             return(0);
902 #endif
903         case XML_WITH_HTTP:
904 #ifdef LIBXML_HTTP_ENABLED
905             return(1);
906 #else
907             return(0);
908 #endif
909         case XML_WITH_VALID:
910 #ifdef LIBXML_VALID_ENABLED
911             return(1);
912 #else
913             return(0);
914 #endif
915         case XML_WITH_HTML:
916 #ifdef LIBXML_HTML_ENABLED
917             return(1);
918 #else
919             return(0);
920 #endif
921         case XML_WITH_LEGACY:
922 #ifdef LIBXML_LEGACY_ENABLED
923             return(1);
924 #else
925             return(0);
926 #endif
927         case XML_WITH_C14N:
928 #ifdef LIBXML_C14N_ENABLED
929             return(1);
930 #else
931             return(0);
932 #endif
933         case XML_WITH_CATALOG:
934 #ifdef LIBXML_CATALOG_ENABLED
935             return(1);
936 #else
937             return(0);
938 #endif
939         case XML_WITH_XPATH:
940 #ifdef LIBXML_XPATH_ENABLED
941             return(1);
942 #else
943             return(0);
944 #endif
945         case XML_WITH_XPTR:
946 #ifdef LIBXML_XPTR_ENABLED
947             return(1);
948 #else
949             return(0);
950 #endif
951         case XML_WITH_XINCLUDE:
952 #ifdef LIBXML_XINCLUDE_ENABLED
953             return(1);
954 #else
955             return(0);
956 #endif
957         case XML_WITH_ICONV:
958 #ifdef LIBXML_ICONV_ENABLED
959             return(1);
960 #else
961             return(0);
962 #endif
963         case XML_WITH_ISO8859X:
964 #ifdef LIBXML_ISO8859X_ENABLED
965             return(1);
966 #else
967             return(0);
968 #endif
969         case XML_WITH_UNICODE:
970 #ifdef LIBXML_UNICODE_ENABLED
971             return(1);
972 #else
973             return(0);
974 #endif
975         case XML_WITH_REGEXP:
976 #ifdef LIBXML_REGEXP_ENABLED
977             return(1);
978 #else
979             return(0);
980 #endif
981         case XML_WITH_AUTOMATA:
982 #ifdef LIBXML_AUTOMATA_ENABLED
983             return(1);
984 #else
985             return(0);
986 #endif
987         case XML_WITH_EXPR:
988 #ifdef LIBXML_EXPR_ENABLED
989             return(1);
990 #else
991             return(0);
992 #endif
993         case XML_WITH_SCHEMAS:
994 #ifdef LIBXML_SCHEMAS_ENABLED
995             return(1);
996 #else
997             return(0);
998 #endif
999         case XML_WITH_SCHEMATRON:
1000 #ifdef LIBXML_SCHEMATRON_ENABLED
1001             return(1);
1002 #else
1003             return(0);
1004 #endif
1005         case XML_WITH_MODULES:
1006 #ifdef LIBXML_MODULES_ENABLED
1007             return(1);
1008 #else
1009             return(0);
1010 #endif
1011         case XML_WITH_DEBUG:
1012 #ifdef LIBXML_DEBUG_ENABLED
1013             return(1);
1014 #else
1015             return(0);
1016 #endif
1017         case XML_WITH_DEBUG_MEM:
1018 #ifdef DEBUG_MEMORY_LOCATION
1019             return(1);
1020 #else
1021             return(0);
1022 #endif
1023         case XML_WITH_DEBUG_RUN:
1024 #ifdef LIBXML_DEBUG_RUNTIME
1025             return(1);
1026 #else
1027             return(0);
1028 #endif
1029         case XML_WITH_ZLIB:
1030 #ifdef LIBXML_ZLIB_ENABLED
1031             return(1);
1032 #else
1033             return(0);
1034 #endif
1035         case XML_WITH_LZMA:
1036 #ifdef LIBXML_LZMA_ENABLED
1037             return(1);
1038 #else
1039             return(0);
1040 #endif
1041         case XML_WITH_ICU:
1042 #ifdef LIBXML_ICU_ENABLED
1043             return(1);
1044 #else
1045             return(0);
1046 #endif
1047         default:
1048 	    break;
1049      }
1050      return(0);
1051 }
1052 
1053 /************************************************************************
1054  *									*
1055  *		SAX2 defaulted attributes handling			*
1056  *									*
1057  ************************************************************************/
1058 
1059 /**
1060  * xmlDetectSAX2:
1061  * @ctxt:  an XML parser context
1062  *
1063  * Do the SAX2 detection and specific intialization
1064  */
1065 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1066 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067     if (ctxt == NULL) return;
1068 #ifdef LIBXML_SAX1_ENABLED
1069     if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070         ((ctxt->sax->startElementNs != NULL) ||
1071          (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1072 #else
1073     ctxt->sax2 = 1;
1074 #endif /* LIBXML_SAX1_ENABLED */
1075 
1076     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1079     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 		(ctxt->str_xml_ns == NULL)) {
1081         xmlErrMemory(ctxt, NULL);
1082     }
1083 }
1084 
1085 typedef struct _xmlDefAttrs xmlDefAttrs;
1086 typedef xmlDefAttrs *xmlDefAttrsPtr;
1087 struct _xmlDefAttrs {
1088     int nbAttrs;	/* number of defaulted attributes on that element */
1089     int maxAttrs;       /* the size of the array */
1090     const xmlChar *values[5]; /* array of localname/prefix/values/external */
1091 };
1092 
1093 /**
1094  * xmlAttrNormalizeSpace:
1095  * @src: the source string
1096  * @dst: the target string
1097  *
1098  * Normalize the space in non CDATA attribute values:
1099  * If the attribute type is not CDATA, then the XML processor MUST further
1100  * process the normalized attribute value by discarding any leading and
1101  * trailing space (#x20) characters, and by replacing sequences of space
1102  * (#x20) characters by a single space (#x20) character.
1103  * Note that the size of dst need to be at least src, and if one doesn't need
1104  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1105  * passing src as dst is just fine.
1106  *
1107  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1108  *         is needed.
1109  */
1110 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1111 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1112 {
1113     if ((src == NULL) || (dst == NULL))
1114         return(NULL);
1115 
1116     while (*src == 0x20) src++;
1117     while (*src != 0) {
1118 	if (*src == 0x20) {
1119 	    while (*src == 0x20) src++;
1120 	    if (*src != 0)
1121 		*dst++ = 0x20;
1122 	} else {
1123 	    *dst++ = *src++;
1124 	}
1125     }
1126     *dst = 0;
1127     if (dst == src)
1128        return(NULL);
1129     return(dst);
1130 }
1131 
1132 /**
1133  * xmlAttrNormalizeSpace2:
1134  * @src: the source string
1135  *
1136  * Normalize the space in non CDATA attribute values, a slightly more complex
1137  * front end to avoid allocation problems when running on attribute values
1138  * coming from the input.
1139  *
1140  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1141  *         is needed.
1142  */
1143 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1144 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1145 {
1146     int i;
1147     int remove_head = 0;
1148     int need_realloc = 0;
1149     const xmlChar *cur;
1150 
1151     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1152         return(NULL);
1153     i = *len;
1154     if (i <= 0)
1155         return(NULL);
1156 
1157     cur = src;
1158     while (*cur == 0x20) {
1159         cur++;
1160 	remove_head++;
1161     }
1162     while (*cur != 0) {
1163 	if (*cur == 0x20) {
1164 	    cur++;
1165 	    if ((*cur == 0x20) || (*cur == 0)) {
1166 	        need_realloc = 1;
1167 		break;
1168 	    }
1169 	} else
1170 	    cur++;
1171     }
1172     if (need_realloc) {
1173         xmlChar *ret;
1174 
1175 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1176 	if (ret == NULL) {
1177 	    xmlErrMemory(ctxt, NULL);
1178 	    return(NULL);
1179 	}
1180 	xmlAttrNormalizeSpace(ret, ret);
1181 	*len = (int) strlen((const char *)ret);
1182         return(ret);
1183     } else if (remove_head) {
1184         *len -= remove_head;
1185         memmove(src, src + remove_head, 1 + *len);
1186 	return(src);
1187     }
1188     return(NULL);
1189 }
1190 
1191 /**
1192  * xmlAddDefAttrs:
1193  * @ctxt:  an XML parser context
1194  * @fullname:  the element fullname
1195  * @fullattr:  the attribute fullname
1196  * @value:  the attribute value
1197  *
1198  * Add a defaulted attribute for an element
1199  */
1200 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1201 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1202                const xmlChar *fullname,
1203                const xmlChar *fullattr,
1204                const xmlChar *value) {
1205     xmlDefAttrsPtr defaults;
1206     int len;
1207     const xmlChar *name;
1208     const xmlChar *prefix;
1209 
1210     /*
1211      * Allows to detect attribute redefinitions
1212      */
1213     if (ctxt->attsSpecial != NULL) {
1214         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1215 	    return;
1216     }
1217 
1218     if (ctxt->attsDefault == NULL) {
1219         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1220 	if (ctxt->attsDefault == NULL)
1221 	    goto mem_error;
1222     }
1223 
1224     /*
1225      * split the element name into prefix:localname , the string found
1226      * are within the DTD and then not associated to namespace names.
1227      */
1228     name = xmlSplitQName3(fullname, &len);
1229     if (name == NULL) {
1230         name = xmlDictLookup(ctxt->dict, fullname, -1);
1231 	prefix = NULL;
1232     } else {
1233         name = xmlDictLookup(ctxt->dict, name, -1);
1234 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1235     }
1236 
1237     /*
1238      * make sure there is some storage
1239      */
1240     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1241     if (defaults == NULL) {
1242         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1243 	                   (4 * 5) * sizeof(const xmlChar *));
1244 	if (defaults == NULL)
1245 	    goto mem_error;
1246 	defaults->nbAttrs = 0;
1247 	defaults->maxAttrs = 4;
1248 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1249 	                        defaults, NULL) < 0) {
1250 	    xmlFree(defaults);
1251 	    goto mem_error;
1252 	}
1253     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1254         xmlDefAttrsPtr temp;
1255 
1256         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1257 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1258 	if (temp == NULL)
1259 	    goto mem_error;
1260 	defaults = temp;
1261 	defaults->maxAttrs *= 2;
1262 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1263 	                        defaults, NULL) < 0) {
1264 	    xmlFree(defaults);
1265 	    goto mem_error;
1266 	}
1267     }
1268 
1269     /*
1270      * Split the element name into prefix:localname , the string found
1271      * are within the DTD and hen not associated to namespace names.
1272      */
1273     name = xmlSplitQName3(fullattr, &len);
1274     if (name == NULL) {
1275         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1276 	prefix = NULL;
1277     } else {
1278         name = xmlDictLookup(ctxt->dict, name, -1);
1279 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1280     }
1281 
1282     defaults->values[5 * defaults->nbAttrs] = name;
1283     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1284     /* intern the string and precompute the end */
1285     len = xmlStrlen(value);
1286     value = xmlDictLookup(ctxt->dict, value, len);
1287     defaults->values[5 * defaults->nbAttrs + 2] = value;
1288     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1289     if (ctxt->external)
1290         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1291     else
1292         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1293     defaults->nbAttrs++;
1294 
1295     return;
1296 
1297 mem_error:
1298     xmlErrMemory(ctxt, NULL);
1299     return;
1300 }
1301 
1302 /**
1303  * xmlAddSpecialAttr:
1304  * @ctxt:  an XML parser context
1305  * @fullname:  the element fullname
1306  * @fullattr:  the attribute fullname
1307  * @type:  the attribute type
1308  *
1309  * Register this attribute type
1310  */
1311 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1312 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1313 		  const xmlChar *fullname,
1314 		  const xmlChar *fullattr,
1315 		  int type)
1316 {
1317     if (ctxt->attsSpecial == NULL) {
1318         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1319 	if (ctxt->attsSpecial == NULL)
1320 	    goto mem_error;
1321     }
1322 
1323     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1324         return;
1325 
1326     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1327                      (void *) (long) type);
1328     return;
1329 
1330 mem_error:
1331     xmlErrMemory(ctxt, NULL);
1332     return;
1333 }
1334 
1335 /**
1336  * xmlCleanSpecialAttrCallback:
1337  *
1338  * Removes CDATA attributes from the special attribute table
1339  */
1340 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1341 xmlCleanSpecialAttrCallback(void *payload, void *data,
1342                             const xmlChar *fullname, const xmlChar *fullattr,
1343                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1344     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1345 
1346     if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1347         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1348     }
1349 }
1350 
1351 /**
1352  * xmlCleanSpecialAttr:
1353  * @ctxt:  an XML parser context
1354  *
1355  * Trim the list of attributes defined to remove all those of type
1356  * CDATA as they are not special. This call should be done when finishing
1357  * to parse the DTD and before starting to parse the document root.
1358  */
1359 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1360 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1361 {
1362     if (ctxt->attsSpecial == NULL)
1363         return;
1364 
1365     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1366 
1367     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1368         xmlHashFree(ctxt->attsSpecial, NULL);
1369         ctxt->attsSpecial = NULL;
1370     }
1371     return;
1372 }
1373 
1374 /**
1375  * xmlCheckLanguageID:
1376  * @lang:  pointer to the string value
1377  *
1378  * Checks that the value conforms to the LanguageID production:
1379  *
1380  * NOTE: this is somewhat deprecated, those productions were removed from
1381  *       the XML Second edition.
1382  *
1383  * [33] LanguageID ::= Langcode ('-' Subcode)*
1384  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1385  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1386  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1387  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1388  * [38] Subcode ::= ([a-z] | [A-Z])+
1389  *
1390  * The current REC reference the sucessors of RFC 1766, currently 5646
1391  *
1392  * http://www.rfc-editor.org/rfc/rfc5646.txt
1393  * langtag       = language
1394  *                 ["-" script]
1395  *                 ["-" region]
1396  *                 *("-" variant)
1397  *                 *("-" extension)
1398  *                 ["-" privateuse]
1399  * language      = 2*3ALPHA            ; shortest ISO 639 code
1400  *                 ["-" extlang]       ; sometimes followed by
1401  *                                     ; extended language subtags
1402  *               / 4ALPHA              ; or reserved for future use
1403  *               / 5*8ALPHA            ; or registered language subtag
1404  *
1405  * extlang       = 3ALPHA              ; selected ISO 639 codes
1406  *                 *2("-" 3ALPHA)      ; permanently reserved
1407  *
1408  * script        = 4ALPHA              ; ISO 15924 code
1409  *
1410  * region        = 2ALPHA              ; ISO 3166-1 code
1411  *               / 3DIGIT              ; UN M.49 code
1412  *
1413  * variant       = 5*8alphanum         ; registered variants
1414  *               / (DIGIT 3alphanum)
1415  *
1416  * extension     = singleton 1*("-" (2*8alphanum))
1417  *
1418  *                                     ; Single alphanumerics
1419  *                                     ; "x" reserved for private use
1420  * singleton     = DIGIT               ; 0 - 9
1421  *               / %x41-57             ; A - W
1422  *               / %x59-5A             ; Y - Z
1423  *               / %x61-77             ; a - w
1424  *               / %x79-7A             ; y - z
1425  *
1426  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1427  * The parser below doesn't try to cope with extension or privateuse
1428  * that could be added but that's not interoperable anyway
1429  *
1430  * Returns 1 if correct 0 otherwise
1431  **/
1432 int
xmlCheckLanguageID(const xmlChar * lang)1433 xmlCheckLanguageID(const xmlChar * lang)
1434 {
1435     const xmlChar *cur = lang, *nxt;
1436 
1437     if (cur == NULL)
1438         return (0);
1439     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1440         ((cur[0] == 'I') && (cur[1] == '-')) ||
1441         ((cur[0] == 'x') && (cur[1] == '-')) ||
1442         ((cur[0] == 'X') && (cur[1] == '-'))) {
1443         /*
1444          * Still allow IANA code and user code which were coming
1445          * from the previous version of the XML-1.0 specification
1446          * it's deprecated but we should not fail
1447          */
1448         cur += 2;
1449         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1450                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1451             cur++;
1452         return(cur[0] == 0);
1453     }
1454     nxt = cur;
1455     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1456            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1457            nxt++;
1458     if (nxt - cur >= 4) {
1459         /*
1460          * Reserved
1461          */
1462         if ((nxt - cur > 8) || (nxt[0] != 0))
1463             return(0);
1464         return(1);
1465     }
1466     if (nxt - cur < 2)
1467         return(0);
1468     /* we got an ISO 639 code */
1469     if (nxt[0] == 0)
1470         return(1);
1471     if (nxt[0] != '-')
1472         return(0);
1473 
1474     nxt++;
1475     cur = nxt;
1476     /* now we can have extlang or script or region or variant */
1477     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1478         goto region_m49;
1479 
1480     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1481            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1482            nxt++;
1483     if (nxt - cur == 4)
1484         goto script;
1485     if (nxt - cur == 2)
1486         goto region;
1487     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1488         goto variant;
1489     if (nxt - cur != 3)
1490         return(0);
1491     /* we parsed an extlang */
1492     if (nxt[0] == 0)
1493         return(1);
1494     if (nxt[0] != '-')
1495         return(0);
1496 
1497     nxt++;
1498     cur = nxt;
1499     /* now we can have script or region or variant */
1500     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1501         goto region_m49;
1502 
1503     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1504            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1505            nxt++;
1506     if (nxt - cur == 2)
1507         goto region;
1508     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1509         goto variant;
1510     if (nxt - cur != 4)
1511         return(0);
1512     /* we parsed a script */
1513 script:
1514     if (nxt[0] == 0)
1515         return(1);
1516     if (nxt[0] != '-')
1517         return(0);
1518 
1519     nxt++;
1520     cur = nxt;
1521     /* now we can have region or variant */
1522     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1523         goto region_m49;
1524 
1525     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1526            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1527            nxt++;
1528 
1529     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1530         goto variant;
1531     if (nxt - cur != 2)
1532         return(0);
1533     /* we parsed a region */
1534 region:
1535     if (nxt[0] == 0)
1536         return(1);
1537     if (nxt[0] != '-')
1538         return(0);
1539 
1540     nxt++;
1541     cur = nxt;
1542     /* now we can just have a variant */
1543     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1544            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1545            nxt++;
1546 
1547     if ((nxt - cur < 5) || (nxt - cur > 8))
1548         return(0);
1549 
1550     /* we parsed a variant */
1551 variant:
1552     if (nxt[0] == 0)
1553         return(1);
1554     if (nxt[0] != '-')
1555         return(0);
1556     /* extensions and private use subtags not checked */
1557     return (1);
1558 
1559 region_m49:
1560     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1561         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1562         nxt += 3;
1563         goto region;
1564     }
1565     return(0);
1566 }
1567 
1568 /************************************************************************
1569  *									*
1570  *		Parser stacks related functions and macros		*
1571  *									*
1572  ************************************************************************/
1573 
1574 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1575                                             const xmlChar ** str);
1576 
1577 #ifdef SAX2
1578 /**
1579  * nsPush:
1580  * @ctxt:  an XML parser context
1581  * @prefix:  the namespace prefix or NULL
1582  * @URL:  the namespace name
1583  *
1584  * Pushes a new parser namespace on top of the ns stack
1585  *
1586  * Returns -1 in case of error, -2 if the namespace should be discarded
1587  *	   and the index in the stack otherwise.
1588  */
1589 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1590 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1591 {
1592     if (ctxt->options & XML_PARSE_NSCLEAN) {
1593         int i;
1594 	for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1595 	    if (ctxt->nsTab[i] == prefix) {
1596 		/* in scope */
1597 	        if (ctxt->nsTab[i + 1] == URL)
1598 		    return(-2);
1599 		/* out of scope keep it */
1600 		break;
1601 	    }
1602 	}
1603     }
1604     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1605 	ctxt->nsMax = 10;
1606 	ctxt->nsNr = 0;
1607 	ctxt->nsTab = (const xmlChar **)
1608 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1609 	if (ctxt->nsTab == NULL) {
1610 	    xmlErrMemory(ctxt, NULL);
1611 	    ctxt->nsMax = 0;
1612             return (-1);
1613 	}
1614     } else if (ctxt->nsNr >= ctxt->nsMax) {
1615         const xmlChar ** tmp;
1616         ctxt->nsMax *= 2;
1617         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1618 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1619         if (tmp == NULL) {
1620             xmlErrMemory(ctxt, NULL);
1621 	    ctxt->nsMax /= 2;
1622             return (-1);
1623         }
1624 	ctxt->nsTab = tmp;
1625     }
1626     ctxt->nsTab[ctxt->nsNr++] = prefix;
1627     ctxt->nsTab[ctxt->nsNr++] = URL;
1628     return (ctxt->nsNr);
1629 }
1630 /**
1631  * nsPop:
1632  * @ctxt: an XML parser context
1633  * @nr:  the number to pop
1634  *
1635  * Pops the top @nr parser prefix/namespace from the ns stack
1636  *
1637  * Returns the number of namespaces removed
1638  */
1639 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1640 nsPop(xmlParserCtxtPtr ctxt, int nr)
1641 {
1642     int i;
1643 
1644     if (ctxt->nsTab == NULL) return(0);
1645     if (ctxt->nsNr < nr) {
1646         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1647         nr = ctxt->nsNr;
1648     }
1649     if (ctxt->nsNr <= 0)
1650         return (0);
1651 
1652     for (i = 0;i < nr;i++) {
1653          ctxt->nsNr--;
1654 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1655     }
1656     return(nr);
1657 }
1658 #endif
1659 
1660 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1661 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1662     const xmlChar **atts;
1663     int *attallocs;
1664     int maxatts;
1665 
1666     if (ctxt->atts == NULL) {
1667 	maxatts = 55; /* allow for 10 attrs by default */
1668 	atts = (const xmlChar **)
1669 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1670 	if (atts == NULL) goto mem_error;
1671 	ctxt->atts = atts;
1672 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1673 	if (attallocs == NULL) goto mem_error;
1674 	ctxt->attallocs = attallocs;
1675 	ctxt->maxatts = maxatts;
1676     } else if (nr + 5 > ctxt->maxatts) {
1677 	maxatts = (nr + 5) * 2;
1678 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1679 				     maxatts * sizeof(const xmlChar *));
1680 	if (atts == NULL) goto mem_error;
1681 	ctxt->atts = atts;
1682 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1683 	                             (maxatts / 5) * sizeof(int));
1684 	if (attallocs == NULL) goto mem_error;
1685 	ctxt->attallocs = attallocs;
1686 	ctxt->maxatts = maxatts;
1687     }
1688     return(ctxt->maxatts);
1689 mem_error:
1690     xmlErrMemory(ctxt, NULL);
1691     return(-1);
1692 }
1693 
1694 /**
1695  * inputPush:
1696  * @ctxt:  an XML parser context
1697  * @value:  the parser input
1698  *
1699  * Pushes a new parser input on top of the input stack
1700  *
1701  * Returns -1 in case of error, the index in the stack otherwise
1702  */
1703 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1704 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1705 {
1706     if ((ctxt == NULL) || (value == NULL))
1707         return(-1);
1708     if (ctxt->inputNr >= ctxt->inputMax) {
1709         ctxt->inputMax *= 2;
1710         ctxt->inputTab =
1711             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1712                                              ctxt->inputMax *
1713                                              sizeof(ctxt->inputTab[0]));
1714         if (ctxt->inputTab == NULL) {
1715             xmlErrMemory(ctxt, NULL);
1716 	    xmlFreeInputStream(value);
1717 	    ctxt->inputMax /= 2;
1718 	    value = NULL;
1719             return (-1);
1720         }
1721     }
1722     ctxt->inputTab[ctxt->inputNr] = value;
1723     ctxt->input = value;
1724     return (ctxt->inputNr++);
1725 }
1726 /**
1727  * inputPop:
1728  * @ctxt: an XML parser context
1729  *
1730  * Pops the top parser input from the input stack
1731  *
1732  * Returns the input just removed
1733  */
1734 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1735 inputPop(xmlParserCtxtPtr ctxt)
1736 {
1737     xmlParserInputPtr ret;
1738 
1739     if (ctxt == NULL)
1740         return(NULL);
1741     if (ctxt->inputNr <= 0)
1742         return (NULL);
1743     ctxt->inputNr--;
1744     if (ctxt->inputNr > 0)
1745         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1746     else
1747         ctxt->input = NULL;
1748     ret = ctxt->inputTab[ctxt->inputNr];
1749     ctxt->inputTab[ctxt->inputNr] = NULL;
1750     return (ret);
1751 }
1752 /**
1753  * nodePush:
1754  * @ctxt:  an XML parser context
1755  * @value:  the element node
1756  *
1757  * Pushes a new element node on top of the node stack
1758  *
1759  * Returns -1 in case of error, the index in the stack otherwise
1760  */
1761 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1762 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1763 {
1764     if (ctxt == NULL) return(0);
1765     if (ctxt->nodeNr >= ctxt->nodeMax) {
1766         xmlNodePtr *tmp;
1767 
1768 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1769                                       ctxt->nodeMax * 2 *
1770                                       sizeof(ctxt->nodeTab[0]));
1771         if (tmp == NULL) {
1772             xmlErrMemory(ctxt, NULL);
1773             return (-1);
1774         }
1775         ctxt->nodeTab = tmp;
1776 	ctxt->nodeMax *= 2;
1777     }
1778     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1779         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1780 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1781 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1782 			  xmlParserMaxDepth);
1783 	xmlHaltParser(ctxt);
1784 	return(-1);
1785     }
1786     ctxt->nodeTab[ctxt->nodeNr] = value;
1787     ctxt->node = value;
1788     return (ctxt->nodeNr++);
1789 }
1790 
1791 /**
1792  * nodePop:
1793  * @ctxt: an XML parser context
1794  *
1795  * Pops the top element node from the node stack
1796  *
1797  * Returns the node just removed
1798  */
1799 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1800 nodePop(xmlParserCtxtPtr ctxt)
1801 {
1802     xmlNodePtr ret;
1803 
1804     if (ctxt == NULL) return(NULL);
1805     if (ctxt->nodeNr <= 0)
1806         return (NULL);
1807     ctxt->nodeNr--;
1808     if (ctxt->nodeNr > 0)
1809         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1810     else
1811         ctxt->node = NULL;
1812     ret = ctxt->nodeTab[ctxt->nodeNr];
1813     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1814     return (ret);
1815 }
1816 
1817 #ifdef LIBXML_PUSH_ENABLED
1818 /**
1819  * nameNsPush:
1820  * @ctxt:  an XML parser context
1821  * @value:  the element name
1822  * @prefix:  the element prefix
1823  * @URI:  the element namespace name
1824  *
1825  * Pushes a new element name/prefix/URL on top of the name stack
1826  *
1827  * Returns -1 in case of error, the index in the stack otherwise
1828  */
1829 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1830 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1831            const xmlChar *prefix, const xmlChar *URI, int nsNr)
1832 {
1833     if (ctxt->nameNr >= ctxt->nameMax) {
1834         const xmlChar * *tmp;
1835         void **tmp2;
1836         ctxt->nameMax *= 2;
1837         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1838                                     ctxt->nameMax *
1839                                     sizeof(ctxt->nameTab[0]));
1840         if (tmp == NULL) {
1841 	    ctxt->nameMax /= 2;
1842 	    goto mem_error;
1843         }
1844 	ctxt->nameTab = tmp;
1845         tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1846                                     ctxt->nameMax * 3 *
1847                                     sizeof(ctxt->pushTab[0]));
1848         if (tmp2 == NULL) {
1849 	    ctxt->nameMax /= 2;
1850 	    goto mem_error;
1851         }
1852 	ctxt->pushTab = tmp2;
1853     }
1854     ctxt->nameTab[ctxt->nameNr] = value;
1855     ctxt->name = value;
1856     ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1857     ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1858     ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1859     return (ctxt->nameNr++);
1860 mem_error:
1861     xmlErrMemory(ctxt, NULL);
1862     return (-1);
1863 }
1864 /**
1865  * nameNsPop:
1866  * @ctxt: an XML parser context
1867  *
1868  * Pops the top element/prefix/URI name from the name stack
1869  *
1870  * Returns the name just removed
1871  */
1872 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1873 nameNsPop(xmlParserCtxtPtr ctxt)
1874 {
1875     const xmlChar *ret;
1876 
1877     if (ctxt->nameNr <= 0)
1878         return (NULL);
1879     ctxt->nameNr--;
1880     if (ctxt->nameNr > 0)
1881         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1882     else
1883         ctxt->name = NULL;
1884     ret = ctxt->nameTab[ctxt->nameNr];
1885     ctxt->nameTab[ctxt->nameNr] = NULL;
1886     return (ret);
1887 }
1888 #endif /* LIBXML_PUSH_ENABLED */
1889 
1890 /**
1891  * namePush:
1892  * @ctxt:  an XML parser context
1893  * @value:  the element name
1894  *
1895  * Pushes a new element name on top of the name stack
1896  *
1897  * Returns -1 in case of error, the index in the stack otherwise
1898  */
1899 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1900 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1901 {
1902     if (ctxt == NULL) return (-1);
1903 
1904     if (ctxt->nameNr >= ctxt->nameMax) {
1905         const xmlChar * *tmp;
1906         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1907                                     ctxt->nameMax * 2 *
1908                                     sizeof(ctxt->nameTab[0]));
1909         if (tmp == NULL) {
1910 	    goto mem_error;
1911         }
1912 	ctxt->nameTab = tmp;
1913         ctxt->nameMax *= 2;
1914     }
1915     ctxt->nameTab[ctxt->nameNr] = value;
1916     ctxt->name = value;
1917     return (ctxt->nameNr++);
1918 mem_error:
1919     xmlErrMemory(ctxt, NULL);
1920     return (-1);
1921 }
1922 /**
1923  * namePop:
1924  * @ctxt: an XML parser context
1925  *
1926  * Pops the top element name from the name stack
1927  *
1928  * Returns the name just removed
1929  */
1930 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1931 namePop(xmlParserCtxtPtr ctxt)
1932 {
1933     const xmlChar *ret;
1934 
1935     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1936         return (NULL);
1937     ctxt->nameNr--;
1938     if (ctxt->nameNr > 0)
1939         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1940     else
1941         ctxt->name = NULL;
1942     ret = ctxt->nameTab[ctxt->nameNr];
1943     ctxt->nameTab[ctxt->nameNr] = NULL;
1944     return (ret);
1945 }
1946 
spacePush(xmlParserCtxtPtr ctxt,int val)1947 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1948     if (ctxt->spaceNr >= ctxt->spaceMax) {
1949         int *tmp;
1950 
1951 	ctxt->spaceMax *= 2;
1952         tmp = (int *) xmlRealloc(ctxt->spaceTab,
1953 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1954         if (tmp == NULL) {
1955 	    xmlErrMemory(ctxt, NULL);
1956 	    ctxt->spaceMax /=2;
1957 	    return(-1);
1958 	}
1959 	ctxt->spaceTab = tmp;
1960     }
1961     ctxt->spaceTab[ctxt->spaceNr] = val;
1962     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1963     return(ctxt->spaceNr++);
1964 }
1965 
spacePop(xmlParserCtxtPtr ctxt)1966 static int spacePop(xmlParserCtxtPtr ctxt) {
1967     int ret;
1968     if (ctxt->spaceNr <= 0) return(0);
1969     ctxt->spaceNr--;
1970     if (ctxt->spaceNr > 0)
1971 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1972     else
1973         ctxt->space = &ctxt->spaceTab[0];
1974     ret = ctxt->spaceTab[ctxt->spaceNr];
1975     ctxt->spaceTab[ctxt->spaceNr] = -1;
1976     return(ret);
1977 }
1978 
1979 /*
1980  * Macros for accessing the content. Those should be used only by the parser,
1981  * and not exported.
1982  *
1983  * Dirty macros, i.e. one often need to make assumption on the context to
1984  * use them
1985  *
1986  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1987  *           To be used with extreme caution since operations consuming
1988  *           characters may move the input buffer to a different location !
1989  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1990  *           This should be used internally by the parser
1991  *           only to compare to ASCII values otherwise it would break when
1992  *           running with UTF-8 encoding.
1993  *   RAW     same as CUR but in the input buffer, bypass any token
1994  *           extraction that may have been done
1995  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1996  *           to compare on ASCII based substring.
1997  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1998  *           strings without newlines within the parser.
1999  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2000  *           defined char within the parser.
2001  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2002  *
2003  *   NEXT    Skip to the next character, this does the proper decoding
2004  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2005  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2006  *   CUR_CHAR(l) returns the current unicode character (int), set l
2007  *           to the number of xmlChars used for the encoding [0-5].
2008  *   CUR_SCHAR  same but operate on a string instead of the context
2009  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2010  *            the index
2011  *   GROW, SHRINK  handling of input buffers
2012  */
2013 
2014 #define RAW (*ctxt->input->cur)
2015 #define CUR (*ctxt->input->cur)
2016 #define NXT(val) ctxt->input->cur[(val)]
2017 #define CUR_PTR ctxt->input->cur
2018 #define BASE_PTR ctxt->input->base
2019 
2020 #define CMP4( s, c1, c2, c3, c4 ) \
2021   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2022     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2023 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2024   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2025 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2026   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2027 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2028   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2029 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2030   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2031 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2032   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2033     ((unsigned char *) s)[ 8 ] == c9 )
2034 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2035   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2036     ((unsigned char *) s)[ 9 ] == c10 )
2037 
2038 #define SKIP(val) do {							\
2039     ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2040     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2041     if ((*ctxt->input->cur == 0) &&					\
2042         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
2043 	    xmlPopInput(ctxt);						\
2044   } while (0)
2045 
2046 #define SKIPL(val) do {							\
2047     int skipl;								\
2048     for(skipl=0; skipl<val; skipl++) {					\
2049 	if (*(ctxt->input->cur) == '\n') {				\
2050 	ctxt->input->line++; ctxt->input->col = 1;			\
2051 	} else ctxt->input->col++;					\
2052 	ctxt->nbChars++;						\
2053 	ctxt->input->cur++;						\
2054     }									\
2055     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2056     if ((*ctxt->input->cur == 0) &&					\
2057         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
2058 	    xmlPopInput(ctxt);						\
2059   } while (0)
2060 
2061 #define SHRINK if ((ctxt->progressive == 0) &&				\
2062 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2063 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2064 	xmlSHRINK (ctxt);
2065 
xmlSHRINK(xmlParserCtxtPtr ctxt)2066 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2067     xmlParserInputShrink(ctxt->input);
2068     if ((*ctxt->input->cur == 0) &&
2069         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2070 	    xmlPopInput(ctxt);
2071   }
2072 
2073 #define GROW if ((ctxt->progressive == 0) &&				\
2074 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2075 	xmlGROW (ctxt);
2076 
xmlGROW(xmlParserCtxtPtr ctxt)2077 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2078     unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079     unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080 
2081     if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082          (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2083          ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2084         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2085         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2086         xmlHaltParser(ctxt);
2087 	return;
2088     }
2089     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2090     if ((ctxt->input->cur > ctxt->input->end) ||
2091         (ctxt->input->cur < ctxt->input->base)) {
2092         xmlHaltParser(ctxt);
2093         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2094 	return;
2095     }
2096     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2097         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2098 	    xmlPopInput(ctxt);
2099 }
2100 
2101 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102 
2103 #define NEXT xmlNextChar(ctxt)
2104 
2105 #define NEXT1 {								\
2106 	ctxt->input->col++;						\
2107 	ctxt->input->cur++;						\
2108 	ctxt->nbChars++;						\
2109 	if (*ctxt->input->cur == 0)					\
2110 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2111     }
2112 
2113 #define NEXTL(l) do {							\
2114     if (*(ctxt->input->cur) == '\n') {					\
2115 	ctxt->input->line++; ctxt->input->col = 1;			\
2116     } else ctxt->input->col++;						\
2117     ctxt->input->cur += l;				\
2118     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2119   } while (0)
2120 
2121 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2122 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2123 
2124 #define COPY_BUF(l,b,i,v)						\
2125     if (l == 1) b[i++] = (xmlChar) v;					\
2126     else i += xmlCopyCharMultiByte(&b[i],v)
2127 
2128 /**
2129  * xmlSkipBlankChars:
2130  * @ctxt:  the XML parser context
2131  *
2132  * skip all blanks character found at that point in the input streams.
2133  * It pops up finished entities in the process if allowable at that point.
2134  *
2135  * Returns the number of space chars skipped
2136  */
2137 
2138 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2139 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2140     int res = 0;
2141 
2142     /*
2143      * It's Okay to use CUR/NEXT here since all the blanks are on
2144      * the ASCII range.
2145      */
2146     if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2147 	const xmlChar *cur;
2148 	/*
2149 	 * if we are in the document content, go really fast
2150 	 */
2151 	cur = ctxt->input->cur;
2152 	while (IS_BLANK_CH(*cur)) {
2153 	    if (*cur == '\n') {
2154 		ctxt->input->line++; ctxt->input->col = 1;
2155 	    } else {
2156 		ctxt->input->col++;
2157 	    }
2158 	    cur++;
2159 	    res++;
2160 	    if (*cur == 0) {
2161 		ctxt->input->cur = cur;
2162 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2163 		cur = ctxt->input->cur;
2164 	    }
2165 	}
2166 	ctxt->input->cur = cur;
2167     } else {
2168 	int cur;
2169 	do {
2170 	    cur = CUR;
2171 	    while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2172 	           (ctxt->instate != XML_PARSER_EOF))) {
2173 		NEXT;
2174 		cur = CUR;
2175 		res++;
2176 	    }
2177 	    while ((cur == 0) && (ctxt->inputNr > 1) &&
2178 		   (ctxt->instate != XML_PARSER_COMMENT)) {
2179 		xmlPopInput(ctxt);
2180 		cur = CUR;
2181 	    }
2182 	    /*
2183 	     * Need to handle support of entities branching here
2184 	     */
2185 	    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2186 	} while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2187 	         (ctxt->instate != XML_PARSER_EOF));
2188     }
2189     return(res);
2190 }
2191 
2192 /************************************************************************
2193  *									*
2194  *		Commodity functions to handle entities			*
2195  *									*
2196  ************************************************************************/
2197 
2198 /**
2199  * xmlPopInput:
2200  * @ctxt:  an XML parser context
2201  *
2202  * xmlPopInput: the current input pointed by ctxt->input came to an end
2203  *          pop it and return the next char.
2204  *
2205  * Returns the current xmlChar in the parser context
2206  */
2207 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2208 xmlPopInput(xmlParserCtxtPtr ctxt) {
2209     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2210     if (xmlParserDebugEntities)
2211 	xmlGenericError(xmlGenericErrorContext,
2212 		"Popping input %d\n", ctxt->inputNr);
2213     xmlFreeInputStream(inputPop(ctxt));
2214     if ((*ctxt->input->cur == 0) &&
2215         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2216 	    return(xmlPopInput(ctxt));
2217     return(CUR);
2218 }
2219 
2220 /**
2221  * xmlPushInput:
2222  * @ctxt:  an XML parser context
2223  * @input:  an XML parser input fragment (entity, XML fragment ...).
2224  *
2225  * xmlPushInput: switch to a new input stream which is stacked on top
2226  *               of the previous one(s).
2227  * Returns -1 in case of error or the index in the input stack
2228  */
2229 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2230 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2231     int ret;
2232     if (input == NULL) return(-1);
2233 
2234     if (xmlParserDebugEntities) {
2235 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2236 	    xmlGenericError(xmlGenericErrorContext,
2237 		    "%s(%d): ", ctxt->input->filename,
2238 		    ctxt->input->line);
2239 	xmlGenericError(xmlGenericErrorContext,
2240 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2241     }
2242     ret = inputPush(ctxt, input);
2243     if (ctxt->instate == XML_PARSER_EOF)
2244         return(-1);
2245     GROW;
2246     return(ret);
2247 }
2248 
2249 /**
2250  * xmlParseCharRef:
2251  * @ctxt:  an XML parser context
2252  *
2253  * parse Reference declarations
2254  *
2255  * [66] CharRef ::= '&#' [0-9]+ ';' |
2256  *                  '&#x' [0-9a-fA-F]+ ';'
2257  *
2258  * [ WFC: Legal Character ]
2259  * Characters referred to using character references must match the
2260  * production for Char.
2261  *
2262  * Returns the value parsed (as an int), 0 in case of error
2263  */
2264 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2265 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2266     unsigned int val = 0;
2267     int count = 0;
2268     unsigned int outofrange = 0;
2269 
2270     /*
2271      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2272      */
2273     if ((RAW == '&') && (NXT(1) == '#') &&
2274         (NXT(2) == 'x')) {
2275 	SKIP(3);
2276 	GROW;
2277 	while (RAW != ';') { /* loop blocked by count */
2278 	    if (count++ > 20) {
2279 		count = 0;
2280 		GROW;
2281                 if (ctxt->instate == XML_PARSER_EOF)
2282                     return(0);
2283 	    }
2284 	    if ((RAW >= '0') && (RAW <= '9'))
2285 	        val = val * 16 + (CUR - '0');
2286 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2287 	        val = val * 16 + (CUR - 'a') + 10;
2288 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2289 	        val = val * 16 + (CUR - 'A') + 10;
2290 	    else {
2291 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2292 		val = 0;
2293 		break;
2294 	    }
2295 	    if (val > 0x10FFFF)
2296 	        outofrange = val;
2297 
2298 	    NEXT;
2299 	    count++;
2300 	}
2301 	if (RAW == ';') {
2302 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2303 	    ctxt->input->col++;
2304 	    ctxt->nbChars ++;
2305 	    ctxt->input->cur++;
2306 	}
2307     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2308 	SKIP(2);
2309 	GROW;
2310 	while (RAW != ';') { /* loop blocked by count */
2311 	    if (count++ > 20) {
2312 		count = 0;
2313 		GROW;
2314                 if (ctxt->instate == XML_PARSER_EOF)
2315                     return(0);
2316 	    }
2317 	    if ((RAW >= '0') && (RAW <= '9'))
2318 	        val = val * 10 + (CUR - '0');
2319 	    else {
2320 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2321 		val = 0;
2322 		break;
2323 	    }
2324 	    if (val > 0x10FFFF)
2325 	        outofrange = val;
2326 
2327 	    NEXT;
2328 	    count++;
2329 	}
2330 	if (RAW == ';') {
2331 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2332 	    ctxt->input->col++;
2333 	    ctxt->nbChars ++;
2334 	    ctxt->input->cur++;
2335 	}
2336     } else {
2337         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2338     }
2339 
2340     /*
2341      * [ WFC: Legal Character ]
2342      * Characters referred to using character references must match the
2343      * production for Char.
2344      */
2345     if ((IS_CHAR(val) && (outofrange == 0))) {
2346         return(val);
2347     } else {
2348         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2349                           "xmlParseCharRef: invalid xmlChar value %d\n",
2350 	                  val);
2351     }
2352     return(0);
2353 }
2354 
2355 /**
2356  * xmlParseStringCharRef:
2357  * @ctxt:  an XML parser context
2358  * @str:  a pointer to an index in the string
2359  *
2360  * parse Reference declarations, variant parsing from a string rather
2361  * than an an input flow.
2362  *
2363  * [66] CharRef ::= '&#' [0-9]+ ';' |
2364  *                  '&#x' [0-9a-fA-F]+ ';'
2365  *
2366  * [ WFC: Legal Character ]
2367  * Characters referred to using character references must match the
2368  * production for Char.
2369  *
2370  * Returns the value parsed (as an int), 0 in case of error, str will be
2371  *         updated to the current value of the index
2372  */
2373 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2374 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2375     const xmlChar *ptr;
2376     xmlChar cur;
2377     unsigned int val = 0;
2378     unsigned int outofrange = 0;
2379 
2380     if ((str == NULL) || (*str == NULL)) return(0);
2381     ptr = *str;
2382     cur = *ptr;
2383     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2384 	ptr += 3;
2385 	cur = *ptr;
2386 	while (cur != ';') { /* Non input consuming loop */
2387 	    if ((cur >= '0') && (cur <= '9'))
2388 	        val = val * 16 + (cur - '0');
2389 	    else if ((cur >= 'a') && (cur <= 'f'))
2390 	        val = val * 16 + (cur - 'a') + 10;
2391 	    else if ((cur >= 'A') && (cur <= 'F'))
2392 	        val = val * 16 + (cur - 'A') + 10;
2393 	    else {
2394 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2395 		val = 0;
2396 		break;
2397 	    }
2398 	    if (val > 0x10FFFF)
2399 	        outofrange = val;
2400 
2401 	    ptr++;
2402 	    cur = *ptr;
2403 	}
2404 	if (cur == ';')
2405 	    ptr++;
2406     } else if  ((cur == '&') && (ptr[1] == '#')){
2407 	ptr += 2;
2408 	cur = *ptr;
2409 	while (cur != ';') { /* Non input consuming loops */
2410 	    if ((cur >= '0') && (cur <= '9'))
2411 	        val = val * 10 + (cur - '0');
2412 	    else {
2413 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2414 		val = 0;
2415 		break;
2416 	    }
2417 	    if (val > 0x10FFFF)
2418 	        outofrange = val;
2419 
2420 	    ptr++;
2421 	    cur = *ptr;
2422 	}
2423 	if (cur == ';')
2424 	    ptr++;
2425     } else {
2426 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2427 	return(0);
2428     }
2429     *str = ptr;
2430 
2431     /*
2432      * [ WFC: Legal Character ]
2433      * Characters referred to using character references must match the
2434      * production for Char.
2435      */
2436     if ((IS_CHAR(val) && (outofrange == 0))) {
2437         return(val);
2438     } else {
2439         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2440 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2441 			  val);
2442     }
2443     return(0);
2444 }
2445 
2446 /**
2447  * xmlNewBlanksWrapperInputStream:
2448  * @ctxt:  an XML parser context
2449  * @entity:  an Entity pointer
2450  *
2451  * Create a new input stream for wrapping
2452  * blanks around a PEReference
2453  *
2454  * Returns the new input stream or NULL
2455  */
2456 
deallocblankswrapper(xmlChar * str)2457 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2458 
2459 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2460 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2461     xmlParserInputPtr input;
2462     xmlChar *buffer;
2463     size_t length;
2464     if (entity == NULL) {
2465 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2466 	            "xmlNewBlanksWrapperInputStream entity\n");
2467 	return(NULL);
2468     }
2469     if (xmlParserDebugEntities)
2470 	xmlGenericError(xmlGenericErrorContext,
2471 		"new blanks wrapper for entity: %s\n", entity->name);
2472     input = xmlNewInputStream(ctxt);
2473     if (input == NULL) {
2474 	return(NULL);
2475     }
2476     length = xmlStrlen(entity->name) + 5;
2477     buffer = xmlMallocAtomic(length);
2478     if (buffer == NULL) {
2479 	xmlErrMemory(ctxt, NULL);
2480         xmlFree(input);
2481 	return(NULL);
2482     }
2483     buffer [0] = ' ';
2484     buffer [1] = '%';
2485     buffer [length-3] = ';';
2486     buffer [length-2] = ' ';
2487     buffer [length-1] = 0;
2488     memcpy(buffer + 2, entity->name, length - 5);
2489     input->free = deallocblankswrapper;
2490     input->base = buffer;
2491     input->cur = buffer;
2492     input->length = length;
2493     input->end = &buffer[length];
2494     return(input);
2495 }
2496 
2497 /**
2498  * xmlParserHandlePEReference:
2499  * @ctxt:  the parser context
2500  *
2501  * [69] PEReference ::= '%' Name ';'
2502  *
2503  * [ WFC: No Recursion ]
2504  * A parsed entity must not contain a recursive
2505  * reference to itself, either directly or indirectly.
2506  *
2507  * [ WFC: Entity Declared ]
2508  * In a document without any DTD, a document with only an internal DTD
2509  * subset which contains no parameter entity references, or a document
2510  * with "standalone='yes'", ...  ... The declaration of a parameter
2511  * entity must precede any reference to it...
2512  *
2513  * [ VC: Entity Declared ]
2514  * In a document with an external subset or external parameter entities
2515  * with "standalone='no'", ...  ... The declaration of a parameter entity
2516  * must precede any reference to it...
2517  *
2518  * [ WFC: In DTD ]
2519  * Parameter-entity references may only appear in the DTD.
2520  * NOTE: misleading but this is handled.
2521  *
2522  * A PEReference may have been detected in the current input stream
2523  * the handling is done accordingly to
2524  *      http://www.w3.org/TR/REC-xml#entproc
2525  * i.e.
2526  *   - Included in literal in entity values
2527  *   - Included as Parameter Entity reference within DTDs
2528  */
2529 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2530 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2531     const xmlChar *name;
2532     xmlEntityPtr entity = NULL;
2533     xmlParserInputPtr input;
2534 
2535     if (RAW != '%') return;
2536     switch(ctxt->instate) {
2537 	case XML_PARSER_CDATA_SECTION:
2538 	    return;
2539         case XML_PARSER_COMMENT:
2540 	    return;
2541 	case XML_PARSER_START_TAG:
2542 	    return;
2543 	case XML_PARSER_END_TAG:
2544 	    return;
2545         case XML_PARSER_EOF:
2546 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2547 	    return;
2548         case XML_PARSER_PROLOG:
2549 	case XML_PARSER_START:
2550 	case XML_PARSER_MISC:
2551 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2552 	    return;
2553 	case XML_PARSER_ENTITY_DECL:
2554         case XML_PARSER_CONTENT:
2555         case XML_PARSER_ATTRIBUTE_VALUE:
2556         case XML_PARSER_PI:
2557 	case XML_PARSER_SYSTEM_LITERAL:
2558 	case XML_PARSER_PUBLIC_LITERAL:
2559 	    /* we just ignore it there */
2560 	    return;
2561         case XML_PARSER_EPILOG:
2562 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2563 	    return;
2564 	case XML_PARSER_ENTITY_VALUE:
2565 	    /*
2566 	     * NOTE: in the case of entity values, we don't do the
2567 	     *       substitution here since we need the literal
2568 	     *       entity value to be able to save the internal
2569 	     *       subset of the document.
2570 	     *       This will be handled by xmlStringDecodeEntities
2571 	     */
2572 	    return;
2573         case XML_PARSER_DTD:
2574 	    /*
2575 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2576 	     * In the internal DTD subset, parameter-entity references
2577 	     * can occur only where markup declarations can occur, not
2578 	     * within markup declarations.
2579 	     * In that case this is handled in xmlParseMarkupDecl
2580 	     */
2581 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2582 		return;
2583 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2584 		return;
2585             break;
2586         case XML_PARSER_IGNORE:
2587             return;
2588     }
2589 
2590     NEXT;
2591     name = xmlParseName(ctxt);
2592     if (xmlParserDebugEntities)
2593 	xmlGenericError(xmlGenericErrorContext,
2594 		"PEReference: %s\n", name);
2595     if (name == NULL) {
2596 	xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2597     } else {
2598 	if (RAW == ';') {
2599 	    NEXT;
2600 	    if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2601 		entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2602 	    if (ctxt->instate == XML_PARSER_EOF)
2603 	        return;
2604 	    if (entity == NULL) {
2605 
2606 		/*
2607 		 * [ WFC: Entity Declared ]
2608 		 * In a document without any DTD, a document with only an
2609 		 * internal DTD subset which contains no parameter entity
2610 		 * references, or a document with "standalone='yes'", ...
2611 		 * ... The declaration of a parameter entity must precede
2612 		 * any reference to it...
2613 		 */
2614 		if ((ctxt->standalone == 1) ||
2615 		    ((ctxt->hasExternalSubset == 0) &&
2616 		     (ctxt->hasPErefs == 0))) {
2617 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2618 			 "PEReference: %%%s; not found\n", name);
2619 	        } else {
2620 		    /*
2621 		     * [ VC: Entity Declared ]
2622 		     * In a document with an external subset or external
2623 		     * parameter entities with "standalone='no'", ...
2624 		     * ... The declaration of a parameter entity must precede
2625 		     * any reference to it...
2626 		     */
2627 		    if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2628 		        xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2629 			                 "PEReference: %%%s; not found\n",
2630 				         name, NULL);
2631 		    } else
2632 		        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2633 			              "PEReference: %%%s; not found\n",
2634 				      name, NULL);
2635 		    ctxt->valid = 0;
2636 		}
2637 		xmlParserEntityCheck(ctxt, 0, NULL, 0);
2638 	    } else if (ctxt->input->free != deallocblankswrapper) {
2639 		    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2640 		    if (xmlPushInput(ctxt, input) < 0)
2641 		        return;
2642 	    } else {
2643 	        if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2644 		    (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2645 		    xmlChar start[4];
2646 		    xmlCharEncoding enc;
2647 
2648 		    /*
2649 		     * Note: external parameter entities will not be loaded, it
2650 		     * is not required for a non-validating parser, unless the
2651 		     * option of validating, or substituting entities were
2652 		     * given. Doing so is far more secure as the parser will
2653 		     * only process data coming from the document entity by
2654 		     * default.
2655 		     */
2656                     if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2657 		        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2658 			((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2659 			((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2660 			((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2661 			(ctxt->replaceEntities == 0) &&
2662 			(ctxt->validate == 0))
2663 			return;
2664 
2665 		    /*
2666 		     * handle the extra spaces added before and after
2667 		     * c.f. http://www.w3.org/TR/REC-xml#as-PE
2668 		     * this is done independently.
2669 		     */
2670 		    input = xmlNewEntityInputStream(ctxt, entity);
2671 		    if (xmlPushInput(ctxt, input) < 0)
2672 		        return;
2673 
2674 		    /*
2675 		     * Get the 4 first bytes and decode the charset
2676 		     * if enc != XML_CHAR_ENCODING_NONE
2677 		     * plug some encoding conversion routines.
2678 		     * Note that, since we may have some non-UTF8
2679 		     * encoding (like UTF16, bug 135229), the 'length'
2680 		     * is not known, but we can calculate based upon
2681 		     * the amount of data in the buffer.
2682 		     */
2683 		    GROW
2684                     if (ctxt->instate == XML_PARSER_EOF)
2685                         return;
2686 		    if ((ctxt->input->end - ctxt->input->cur)>=4) {
2687 			start[0] = RAW;
2688 			start[1] = NXT(1);
2689 			start[2] = NXT(2);
2690 			start[3] = NXT(3);
2691 			enc = xmlDetectCharEncoding(start, 4);
2692 			if (enc != XML_CHAR_ENCODING_NONE) {
2693 			    xmlSwitchEncoding(ctxt, enc);
2694 			}
2695 		    }
2696 
2697 		    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2698 			(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2699 			(IS_BLANK_CH(NXT(5)))) {
2700 			xmlParseTextDecl(ctxt);
2701 		    }
2702 		} else {
2703 		    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2704 			     "PEReference: %s is not a parameter entity\n",
2705 				      name);
2706 		}
2707 	    }
2708 	} else {
2709 	    xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2710 	}
2711     }
2712 }
2713 
2714 /*
2715  * Macro used to grow the current buffer.
2716  * buffer##_size is expected to be a size_t
2717  * mem_error: is expected to handle memory allocation failures
2718  */
2719 #define growBuffer(buffer, n) {						\
2720     xmlChar *tmp;							\
2721     size_t new_size = buffer##_size * 2 + n;                            \
2722     if (new_size < buffer##_size) goto mem_error;                       \
2723     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2724     if (tmp == NULL) goto mem_error;					\
2725     buffer = tmp;							\
2726     buffer##_size = new_size;                                           \
2727 }
2728 
2729 /**
2730  * xmlStringLenDecodeEntities:
2731  * @ctxt:  the parser context
2732  * @str:  the input string
2733  * @len: the string length
2734  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2735  * @end:  an end marker xmlChar, 0 if none
2736  * @end2:  an end marker xmlChar, 0 if none
2737  * @end3:  an end marker xmlChar, 0 if none
2738  *
2739  * Takes a entity string content and process to do the adequate substitutions.
2740  *
2741  * [67] Reference ::= EntityRef | CharRef
2742  *
2743  * [69] PEReference ::= '%' Name ';'
2744  *
2745  * Returns A newly allocated string with the substitution done. The caller
2746  *      must deallocate it !
2747  */
2748 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2749 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2750 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2751     xmlChar *buffer = NULL;
2752     size_t buffer_size = 0;
2753     size_t nbchars = 0;
2754 
2755     xmlChar *current = NULL;
2756     xmlChar *rep = NULL;
2757     const xmlChar *last;
2758     xmlEntityPtr ent;
2759     int c,l;
2760 
2761     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2762 	return(NULL);
2763     last = str + len;
2764 
2765     if (((ctxt->depth > 40) &&
2766          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2767 	(ctxt->depth > 1024)) {
2768 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2769 	return(NULL);
2770     }
2771 
2772     /*
2773      * allocate a translation buffer.
2774      */
2775     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2776     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2777     if (buffer == NULL) goto mem_error;
2778 
2779     /*
2780      * OK loop until we reach one of the ending char or a size limit.
2781      * we are operating on already parsed values.
2782      */
2783     if (str < last)
2784 	c = CUR_SCHAR(str, l);
2785     else
2786         c = 0;
2787     while ((c != 0) && (c != end) && /* non input consuming loop */
2788 	   (c != end2) && (c != end3)) {
2789 
2790 	if (c == 0) break;
2791         if ((c == '&') && (str[1] == '#')) {
2792 	    int val = xmlParseStringCharRef(ctxt, &str);
2793 	    if (val != 0) {
2794 		COPY_BUF(0,buffer,nbchars,val);
2795 	    }
2796 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798 	    }
2799 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2800 	    if (xmlParserDebugEntities)
2801 		xmlGenericError(xmlGenericErrorContext,
2802 			"String decoding Entity Reference: %.30s\n",
2803 			str);
2804 	    ent = xmlParseStringEntityRef(ctxt, &str);
2805 	    if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2806 	        (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2807 	        goto int_error;
2808 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2809 	    if (ent != NULL)
2810 	        ctxt->nbentities += ent->checked / 2;
2811 	    if ((ent != NULL) &&
2812 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2813 		if (ent->content != NULL) {
2814 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2815 		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2816 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2817 		    }
2818 		} else {
2819 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2820 			    "predefined entity has no content\n");
2821 		}
2822 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2823 		ctxt->depth++;
2824 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2825 			                      0, 0, 0);
2826 		ctxt->depth--;
2827 
2828 		if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2829 		    (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2830 		    goto int_error;
2831 
2832 		if (rep != NULL) {
2833 		    current = rep;
2834 		    while (*current != 0) { /* non input consuming loop */
2835 			buffer[nbchars++] = *current++;
2836 			if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2837 			    if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2838 				goto int_error;
2839 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2840 			}
2841 		    }
2842 		    xmlFree(rep);
2843 		    rep = NULL;
2844 		}
2845 	    } else if (ent != NULL) {
2846 		int i = xmlStrlen(ent->name);
2847 		const xmlChar *cur = ent->name;
2848 
2849 		buffer[nbchars++] = '&';
2850 		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2851 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2852 		}
2853 		for (;i > 0;i--)
2854 		    buffer[nbchars++] = *cur++;
2855 		buffer[nbchars++] = ';';
2856 	    }
2857 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2858 	    if (xmlParserDebugEntities)
2859 		xmlGenericError(xmlGenericErrorContext,
2860 			"String decoding PE Reference: %.30s\n", str);
2861 	    ent = xmlParseStringPEReference(ctxt, &str);
2862 	    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2863 	        goto int_error;
2864 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2865 	    if (ent != NULL)
2866 	        ctxt->nbentities += ent->checked / 2;
2867 	    if (ent != NULL) {
2868                 if (ent->content == NULL) {
2869 		    /*
2870 		     * Note: external parsed entities will not be loaded,
2871 		     * it is not required for a non-validating parser to
2872 		     * complete external PEreferences coming from the
2873 		     * internal subset
2874 		     */
2875 		    if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2876 			((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2877 			(ctxt->validate != 0)) {
2878 			xmlLoadEntityContent(ctxt, ent);
2879 		    } else {
2880 			xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2881 		  "not validating will not read content for PE entity %s\n",
2882 		                      ent->name, NULL);
2883 		    }
2884 		}
2885 		ctxt->depth++;
2886 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2887 			                      0, 0, 0);
2888 		ctxt->depth--;
2889 		if (rep != NULL) {
2890 		    current = rep;
2891 		    while (*current != 0) { /* non input consuming loop */
2892 			buffer[nbchars++] = *current++;
2893 			if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2894 			    if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2895 			        goto int_error;
2896 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2897 			}
2898 		    }
2899 		    xmlFree(rep);
2900 		    rep = NULL;
2901 		}
2902 	    }
2903 	} else {
2904 	    COPY_BUF(l,buffer,nbchars,c);
2905 	    str += l;
2906 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2907 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2908 	    }
2909 	}
2910 	if (str < last)
2911 	    c = CUR_SCHAR(str, l);
2912 	else
2913 	    c = 0;
2914     }
2915     buffer[nbchars] = 0;
2916     return(buffer);
2917 
2918 mem_error:
2919     xmlErrMemory(ctxt, NULL);
2920 int_error:
2921     if (rep != NULL)
2922         xmlFree(rep);
2923     if (buffer != NULL)
2924         xmlFree(buffer);
2925     return(NULL);
2926 }
2927 
2928 /**
2929  * xmlStringDecodeEntities:
2930  * @ctxt:  the parser context
2931  * @str:  the input string
2932  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2933  * @end:  an end marker xmlChar, 0 if none
2934  * @end2:  an end marker xmlChar, 0 if none
2935  * @end3:  an end marker xmlChar, 0 if none
2936  *
2937  * Takes a entity string content and process to do the adequate substitutions.
2938  *
2939  * [67] Reference ::= EntityRef | CharRef
2940  *
2941  * [69] PEReference ::= '%' Name ';'
2942  *
2943  * Returns A newly allocated string with the substitution done. The caller
2944  *      must deallocate it !
2945  */
2946 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2947 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2948 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2949     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2950     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2951            end, end2, end3));
2952 }
2953 
2954 /************************************************************************
2955  *									*
2956  *		Commodity functions, cleanup needed ?			*
2957  *									*
2958  ************************************************************************/
2959 
2960 /**
2961  * areBlanks:
2962  * @ctxt:  an XML parser context
2963  * @str:  a xmlChar *
2964  * @len:  the size of @str
2965  * @blank_chars: we know the chars are blanks
2966  *
2967  * Is this a sequence of blank chars that one can ignore ?
2968  *
2969  * Returns 1 if ignorable 0 otherwise.
2970  */
2971 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2972 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2973                      int blank_chars) {
2974     int i, ret;
2975     xmlNodePtr lastChild;
2976 
2977     /*
2978      * Don't spend time trying to differentiate them, the same callback is
2979      * used !
2980      */
2981     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2982 	return(0);
2983 
2984     /*
2985      * Check for xml:space value.
2986      */
2987     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2988         (*(ctxt->space) == -2))
2989 	return(0);
2990 
2991     /*
2992      * Check that the string is made of blanks
2993      */
2994     if (blank_chars == 0) {
2995 	for (i = 0;i < len;i++)
2996 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2997     }
2998 
2999     /*
3000      * Look if the element is mixed content in the DTD if available
3001      */
3002     if (ctxt->node == NULL) return(0);
3003     if (ctxt->myDoc != NULL) {
3004 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3005         if (ret == 0) return(1);
3006         if (ret == 1) return(0);
3007     }
3008 
3009     /*
3010      * Otherwise, heuristic :-\
3011      */
3012     if ((RAW != '<') && (RAW != 0xD)) return(0);
3013     if ((ctxt->node->children == NULL) &&
3014 	(RAW == '<') && (NXT(1) == '/')) return(0);
3015 
3016     lastChild = xmlGetLastChild(ctxt->node);
3017     if (lastChild == NULL) {
3018         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3019             (ctxt->node->content != NULL)) return(0);
3020     } else if (xmlNodeIsText(lastChild))
3021         return(0);
3022     else if ((ctxt->node->children != NULL) &&
3023              (xmlNodeIsText(ctxt->node->children)))
3024         return(0);
3025     return(1);
3026 }
3027 
3028 /************************************************************************
3029  *									*
3030  *		Extra stuff for namespace support			*
3031  *	Relates to http://www.w3.org/TR/WD-xml-names			*
3032  *									*
3033  ************************************************************************/
3034 
3035 /**
3036  * xmlSplitQName:
3037  * @ctxt:  an XML parser context
3038  * @name:  an XML parser context
3039  * @prefix:  a xmlChar **
3040  *
3041  * parse an UTF8 encoded XML qualified name string
3042  *
3043  * [NS 5] QName ::= (Prefix ':')? LocalPart
3044  *
3045  * [NS 6] Prefix ::= NCName
3046  *
3047  * [NS 7] LocalPart ::= NCName
3048  *
3049  * Returns the local part, and prefix is updated
3050  *   to get the Prefix if any.
3051  */
3052 
3053 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)3054 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3055     xmlChar buf[XML_MAX_NAMELEN + 5];
3056     xmlChar *buffer = NULL;
3057     int len = 0;
3058     int max = XML_MAX_NAMELEN;
3059     xmlChar *ret = NULL;
3060     const xmlChar *cur = name;
3061     int c;
3062 
3063     if (prefix == NULL) return(NULL);
3064     *prefix = NULL;
3065 
3066     if (cur == NULL) return(NULL);
3067 
3068 #ifndef XML_XML_NAMESPACE
3069     /* xml: prefix is not really a namespace */
3070     if ((cur[0] == 'x') && (cur[1] == 'm') &&
3071         (cur[2] == 'l') && (cur[3] == ':'))
3072 	return(xmlStrdup(name));
3073 #endif
3074 
3075     /* nasty but well=formed */
3076     if (cur[0] == ':')
3077 	return(xmlStrdup(name));
3078 
3079     c = *cur++;
3080     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3081 	buf[len++] = c;
3082 	c = *cur++;
3083     }
3084     if (len >= max) {
3085 	/*
3086 	 * Okay someone managed to make a huge name, so he's ready to pay
3087 	 * for the processing speed.
3088 	 */
3089 	max = len * 2;
3090 
3091 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3092 	if (buffer == NULL) {
3093 	    xmlErrMemory(ctxt, NULL);
3094 	    return(NULL);
3095 	}
3096 	memcpy(buffer, buf, len);
3097 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3098 	    if (len + 10 > max) {
3099 	        xmlChar *tmp;
3100 
3101 		max *= 2;
3102 		tmp = (xmlChar *) xmlRealloc(buffer,
3103 						max * sizeof(xmlChar));
3104 		if (tmp == NULL) {
3105 		    xmlFree(buffer);
3106 		    xmlErrMemory(ctxt, NULL);
3107 		    return(NULL);
3108 		}
3109 		buffer = tmp;
3110 	    }
3111 	    buffer[len++] = c;
3112 	    c = *cur++;
3113 	}
3114 	buffer[len] = 0;
3115     }
3116 
3117     if ((c == ':') && (*cur == 0)) {
3118         if (buffer != NULL)
3119 	    xmlFree(buffer);
3120 	*prefix = NULL;
3121 	return(xmlStrdup(name));
3122     }
3123 
3124     if (buffer == NULL)
3125 	ret = xmlStrndup(buf, len);
3126     else {
3127 	ret = buffer;
3128 	buffer = NULL;
3129 	max = XML_MAX_NAMELEN;
3130     }
3131 
3132 
3133     if (c == ':') {
3134 	c = *cur;
3135         *prefix = ret;
3136 	if (c == 0) {
3137 	    return(xmlStrndup(BAD_CAST "", 0));
3138 	}
3139 	len = 0;
3140 
3141 	/*
3142 	 * Check that the first character is proper to start
3143 	 * a new name
3144 	 */
3145 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3146 	      ((c >= 0x41) && (c <= 0x5A)) ||
3147 	      (c == '_') || (c == ':'))) {
3148 	    int l;
3149 	    int first = CUR_SCHAR(cur, l);
3150 
3151 	    if (!IS_LETTER(first) && (first != '_')) {
3152 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3153 			    "Name %s is not XML Namespace compliant\n",
3154 				  name);
3155 	    }
3156 	}
3157 	cur++;
3158 
3159 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3160 	    buf[len++] = c;
3161 	    c = *cur++;
3162 	}
3163 	if (len >= max) {
3164 	    /*
3165 	     * Okay someone managed to make a huge name, so he's ready to pay
3166 	     * for the processing speed.
3167 	     */
3168 	    max = len * 2;
3169 
3170 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3171 	    if (buffer == NULL) {
3172 	        xmlErrMemory(ctxt, NULL);
3173 		return(NULL);
3174 	    }
3175 	    memcpy(buffer, buf, len);
3176 	    while (c != 0) { /* tested bigname2.xml */
3177 		if (len + 10 > max) {
3178 		    xmlChar *tmp;
3179 
3180 		    max *= 2;
3181 		    tmp = (xmlChar *) xmlRealloc(buffer,
3182 						    max * sizeof(xmlChar));
3183 		    if (tmp == NULL) {
3184 			xmlErrMemory(ctxt, NULL);
3185 			xmlFree(buffer);
3186 			return(NULL);
3187 		    }
3188 		    buffer = tmp;
3189 		}
3190 		buffer[len++] = c;
3191 		c = *cur++;
3192 	    }
3193 	    buffer[len] = 0;
3194 	}
3195 
3196 	if (buffer == NULL)
3197 	    ret = xmlStrndup(buf, len);
3198 	else {
3199 	    ret = buffer;
3200 	}
3201     }
3202 
3203     return(ret);
3204 }
3205 
3206 /************************************************************************
3207  *									*
3208  *			The parser itself				*
3209  *	Relates to http://www.w3.org/TR/REC-xml				*
3210  *									*
3211  ************************************************************************/
3212 
3213 /************************************************************************
3214  *									*
3215  *	Routines to parse Name, NCName and NmToken			*
3216  *									*
3217  ************************************************************************/
3218 #ifdef DEBUG
3219 static unsigned long nbParseName = 0;
3220 static unsigned long nbParseNmToken = 0;
3221 static unsigned long nbParseNCName = 0;
3222 static unsigned long nbParseNCNameComplex = 0;
3223 static unsigned long nbParseNameComplex = 0;
3224 static unsigned long nbParseStringName = 0;
3225 #endif
3226 
3227 /*
3228  * The two following functions are related to the change of accepted
3229  * characters for Name and NmToken in the Revision 5 of XML-1.0
3230  * They correspond to the modified production [4] and the new production [4a]
3231  * changes in that revision. Also note that the macros used for the
3232  * productions Letter, Digit, CombiningChar and Extender are not needed
3233  * anymore.
3234  * We still keep compatibility to pre-revision5 parsing semantic if the
3235  * new XML_PARSE_OLD10 option is given to the parser.
3236  */
3237 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3238 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3239     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3240         /*
3241 	 * Use the new checks of production [4] [4a] amd [5] of the
3242 	 * Update 5 of XML-1.0
3243 	 */
3244 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3245 	    (((c >= 'a') && (c <= 'z')) ||
3246 	     ((c >= 'A') && (c <= 'Z')) ||
3247 	     (c == '_') || (c == ':') ||
3248 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3249 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3250 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3251 	     ((c >= 0x370) && (c <= 0x37D)) ||
3252 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3253 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3254 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3255 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3257 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3258 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3260 	    return(1);
3261     } else {
3262         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3263 	    return(1);
3264     }
3265     return(0);
3266 }
3267 
3268 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3269 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3270     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3271         /*
3272 	 * Use the new checks of production [4] [4a] amd [5] of the
3273 	 * Update 5 of XML-1.0
3274 	 */
3275 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3276 	    (((c >= 'a') && (c <= 'z')) ||
3277 	     ((c >= 'A') && (c <= 'Z')) ||
3278 	     ((c >= '0') && (c <= '9')) || /* !start */
3279 	     (c == '_') || (c == ':') ||
3280 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3281 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3282 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3283 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3284 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3285 	     ((c >= 0x370) && (c <= 0x37D)) ||
3286 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3287 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3288 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3289 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3290 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3291 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3292 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3293 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3294 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3295 	     return(1);
3296     } else {
3297         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298             (c == '.') || (c == '-') ||
3299 	    (c == '_') || (c == ':') ||
3300 	    (IS_COMBINING(c)) ||
3301 	    (IS_EXTENDER(c)))
3302 	    return(1);
3303     }
3304     return(0);
3305 }
3306 
3307 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3308                                           int *len, int *alloc, int normalize);
3309 
3310 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3311 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3312     int len = 0, l;
3313     int c;
3314     int count = 0;
3315 
3316 #ifdef DEBUG
3317     nbParseNameComplex++;
3318 #endif
3319 
3320     /*
3321      * Handler for more complex cases
3322      */
3323     GROW;
3324     if (ctxt->instate == XML_PARSER_EOF)
3325         return(NULL);
3326     c = CUR_CHAR(l);
3327     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3328         /*
3329 	 * Use the new checks of production [4] [4a] amd [5] of the
3330 	 * Update 5 of XML-1.0
3331 	 */
3332 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3333 	    (!(((c >= 'a') && (c <= 'z')) ||
3334 	       ((c >= 'A') && (c <= 'Z')) ||
3335 	       (c == '_') || (c == ':') ||
3336 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3337 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3338 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3339 	       ((c >= 0x370) && (c <= 0x37D)) ||
3340 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3341 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3342 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3343 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3344 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3345 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3346 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3347 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3348 	    return(NULL);
3349 	}
3350 	len += l;
3351 	NEXTL(l);
3352 	c = CUR_CHAR(l);
3353 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3354 	       (((c >= 'a') && (c <= 'z')) ||
3355 	        ((c >= 'A') && (c <= 'Z')) ||
3356 	        ((c >= '0') && (c <= '9')) || /* !start */
3357 	        (c == '_') || (c == ':') ||
3358 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3359 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3360 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3361 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3362 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3363 	        ((c >= 0x370) && (c <= 0x37D)) ||
3364 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3365 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3366 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3367 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3368 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3369 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3370 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3371 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3372 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3373 		)) {
3374 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3375 		count = 0;
3376 		GROW;
3377                 if (ctxt->instate == XML_PARSER_EOF)
3378                     return(NULL);
3379 	    }
3380 	    len += l;
3381 	    NEXTL(l);
3382 	    c = CUR_CHAR(l);
3383 	}
3384     } else {
3385 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3386 	    (!IS_LETTER(c) && (c != '_') &&
3387 	     (c != ':'))) {
3388 	    return(NULL);
3389 	}
3390 	len += l;
3391 	NEXTL(l);
3392 	c = CUR_CHAR(l);
3393 
3394 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3395 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3396 		(c == '.') || (c == '-') ||
3397 		(c == '_') || (c == ':') ||
3398 		(IS_COMBINING(c)) ||
3399 		(IS_EXTENDER(c)))) {
3400 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3401 		count = 0;
3402 		GROW;
3403                 if (ctxt->instate == XML_PARSER_EOF)
3404                     return(NULL);
3405 	    }
3406 	    len += l;
3407 	    NEXTL(l);
3408 	    c = CUR_CHAR(l);
3409 	    if (c == 0) {
3410 		count = 0;
3411 		GROW;
3412                 if (ctxt->instate == XML_PARSER_EOF)
3413                     return(NULL);
3414 		c = CUR_CHAR(l);
3415 	    }
3416 	}
3417     }
3418     if ((len > XML_MAX_NAME_LENGTH) &&
3419         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3420         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3421         return(NULL);
3422     }
3423     if (ctxt->input->cur > ctxt->input->base && (*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) {
3424         if (ctxt->input->base > ctxt->input->cur - (len + 1)) {
3425             return(NULL);
3426         }
3427         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3428     }
3429     if (ctxt->input->base > ctxt->input->cur - len) {
3430         return(NULL);
3431     }
3432     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3433 }
3434 
3435 /**
3436  * xmlParseName:
3437  * @ctxt:  an XML parser context
3438  *
3439  * parse an XML name.
3440  *
3441  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3442  *                  CombiningChar | Extender
3443  *
3444  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3445  *
3446  * [6] Names ::= Name (#x20 Name)*
3447  *
3448  * Returns the Name parsed or NULL
3449  */
3450 
3451 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3452 xmlParseName(xmlParserCtxtPtr ctxt) {
3453     const xmlChar *in;
3454     const xmlChar *ret;
3455     int count = 0;
3456 
3457     GROW;
3458 
3459 #ifdef DEBUG
3460     nbParseName++;
3461 #endif
3462 
3463     /*
3464      * Accelerator for simple ASCII names
3465      */
3466     in = ctxt->input->cur;
3467     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3468 	((*in >= 0x41) && (*in <= 0x5A)) ||
3469 	(*in == '_') || (*in == ':')) {
3470 	in++;
3471 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3472 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3473 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3474 	       (*in == '_') || (*in == '-') ||
3475 	       (*in == ':') || (*in == '.'))
3476 	    in++;
3477 	if ((*in > 0) && (*in < 0x80)) {
3478 	    count = in - ctxt->input->cur;
3479             if ((count > XML_MAX_NAME_LENGTH) &&
3480                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3481                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3482                 return(NULL);
3483             }
3484 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3485 	    ctxt->input->cur = in;
3486 	    ctxt->nbChars += count;
3487 	    ctxt->input->col += count;
3488 	    if (ret == NULL)
3489 	        xmlErrMemory(ctxt, NULL);
3490 	    return(ret);
3491 	}
3492     }
3493     /* accelerator for special cases */
3494     return(xmlParseNameComplex(ctxt));
3495 }
3496 
3497 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3498 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3499     int len = 0, l;
3500     int c;
3501     int count = 0;
3502     size_t startPosition = 0;
3503 
3504 #ifdef DEBUG
3505     nbParseNCNameComplex++;
3506 #endif
3507 
3508     /*
3509      * Handler for more complex cases
3510      */
3511     GROW;
3512     startPosition = CUR_PTR - BASE_PTR;
3513     c = CUR_CHAR(l);
3514     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3515 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3516 	return(NULL);
3517     }
3518 
3519     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3520 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3521 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3522             if ((len > XML_MAX_NAME_LENGTH) &&
3523                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3524                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3525                 return(NULL);
3526             }
3527 	    count = 0;
3528 	    GROW;
3529             if (ctxt->instate == XML_PARSER_EOF)
3530                 return(NULL);
3531 	}
3532 	len += l;
3533 	NEXTL(l);
3534 	c = CUR_CHAR(l);
3535 	if (c == 0) {
3536 	    count = 0;
3537 	    /*
3538 	     * when shrinking to extend the buffer we really need to preserve
3539 	     * the part of the name we already parsed. Hence rolling back
3540 	     * by current lenght.
3541 	     */
3542 	    ctxt->input->cur -= l;
3543 	    GROW;
3544 	    ctxt->input->cur += l;
3545             if (ctxt->instate == XML_PARSER_EOF)
3546                 return(NULL);
3547 	    c = CUR_CHAR(l);
3548 	}
3549     }
3550     if ((len > XML_MAX_NAME_LENGTH) &&
3551         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553         return(NULL);
3554     }
3555     return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3556 }
3557 
3558 /**
3559  * xmlParseNCName:
3560  * @ctxt:  an XML parser context
3561  * @len:  length of the string parsed
3562  *
3563  * parse an XML name.
3564  *
3565  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3566  *                      CombiningChar | Extender
3567  *
3568  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3569  *
3570  * Returns the Name parsed or NULL
3571  */
3572 
3573 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3574 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3575     const xmlChar *in, *e;
3576     const xmlChar *ret;
3577     int count = 0;
3578 
3579 #ifdef DEBUG
3580     nbParseNCName++;
3581 #endif
3582 
3583     /*
3584      * Accelerator for simple ASCII names
3585      */
3586     in = ctxt->input->cur;
3587     e = ctxt->input->end;
3588     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3589 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3590 	 (*in == '_')) && (in < e)) {
3591 	in++;
3592 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3593 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3594 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3595 	        (*in == '_') || (*in == '-') ||
3596 	        (*in == '.')) && (in < e))
3597 	    in++;
3598 	if (in >= e)
3599 	    goto complex;
3600 	if ((*in > 0) && (*in < 0x80)) {
3601 	    count = in - ctxt->input->cur;
3602             if ((count > XML_MAX_NAME_LENGTH) &&
3603                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3604                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3605                 return(NULL);
3606             }
3607 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3608 	    ctxt->input->cur = in;
3609 	    ctxt->nbChars += count;
3610 	    ctxt->input->col += count;
3611 	    if (ret == NULL) {
3612 	        xmlErrMemory(ctxt, NULL);
3613 	    }
3614 	    return(ret);
3615 	}
3616     }
3617 complex:
3618     return(xmlParseNCNameComplex(ctxt));
3619 }
3620 
3621 /**
3622  * xmlParseNameAndCompare:
3623  * @ctxt:  an XML parser context
3624  *
3625  * parse an XML name and compares for match
3626  * (specialized for endtag parsing)
3627  *
3628  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3629  * and the name for mismatch
3630  */
3631 
3632 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3633 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3634     register const xmlChar *cmp = other;
3635     register const xmlChar *in;
3636     const xmlChar *ret;
3637 
3638     GROW;
3639     if (ctxt->instate == XML_PARSER_EOF)
3640         return(NULL);
3641 
3642     in = ctxt->input->cur;
3643     while (*in != 0 && *in == *cmp) {
3644 	++in;
3645 	++cmp;
3646 	ctxt->input->col++;
3647     }
3648     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3649 	/* success */
3650 	ctxt->input->cur = in;
3651 	return (const xmlChar*) 1;
3652     }
3653     /* failure (or end of input buffer), check with full function */
3654     ret = xmlParseName (ctxt);
3655     /* strings coming from the dictionary direct compare possible */
3656     if (ret == other) {
3657 	return (const xmlChar*) 1;
3658     }
3659     return ret;
3660 }
3661 
3662 /**
3663  * xmlParseStringName:
3664  * @ctxt:  an XML parser context
3665  * @str:  a pointer to the string pointer (IN/OUT)
3666  *
3667  * parse an XML name.
3668  *
3669  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3670  *                  CombiningChar | Extender
3671  *
3672  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3673  *
3674  * [6] Names ::= Name (#x20 Name)*
3675  *
3676  * Returns the Name parsed or NULL. The @str pointer
3677  * is updated to the current location in the string.
3678  */
3679 
3680 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3681 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3682     xmlChar buf[XML_MAX_NAMELEN + 5];
3683     const xmlChar *cur = *str;
3684     int len = 0, l;
3685     int c;
3686 
3687 #ifdef DEBUG
3688     nbParseStringName++;
3689 #endif
3690 
3691     c = CUR_SCHAR(cur, l);
3692     if (!xmlIsNameStartChar(ctxt, c)) {
3693 	return(NULL);
3694     }
3695 
3696     COPY_BUF(l,buf,len,c);
3697     cur += l;
3698     c = CUR_SCHAR(cur, l);
3699     while (xmlIsNameChar(ctxt, c)) {
3700 	COPY_BUF(l,buf,len,c);
3701 	cur += l;
3702 	c = CUR_SCHAR(cur, l);
3703 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3704 	    /*
3705 	     * Okay someone managed to make a huge name, so he's ready to pay
3706 	     * for the processing speed.
3707 	     */
3708 	    xmlChar *buffer;
3709 	    int max = len * 2;
3710 
3711 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3712 	    if (buffer == NULL) {
3713 	        xmlErrMemory(ctxt, NULL);
3714 		return(NULL);
3715 	    }
3716 	    memcpy(buffer, buf, len);
3717 	    while (xmlIsNameChar(ctxt, c)) {
3718 		if (len + 10 > max) {
3719 		    xmlChar *tmp;
3720 
3721                     if ((len > XML_MAX_NAME_LENGTH) &&
3722                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3723                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3724 			xmlFree(buffer);
3725                         return(NULL);
3726                     }
3727 		    max *= 2;
3728 		    tmp = (xmlChar *) xmlRealloc(buffer,
3729 			                            max * sizeof(xmlChar));
3730 		    if (tmp == NULL) {
3731 			xmlErrMemory(ctxt, NULL);
3732 			xmlFree(buffer);
3733 			return(NULL);
3734 		    }
3735 		    buffer = tmp;
3736 		}
3737 		COPY_BUF(l,buffer,len,c);
3738 		cur += l;
3739 		c = CUR_SCHAR(cur, l);
3740 	    }
3741 	    buffer[len] = 0;
3742 	    *str = cur;
3743 	    return(buffer);
3744 	}
3745     }
3746     if ((len > XML_MAX_NAME_LENGTH) &&
3747         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3748         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3749         return(NULL);
3750     }
3751     *str = cur;
3752     return(xmlStrndup(buf, len));
3753 }
3754 
3755 /**
3756  * xmlParseNmtoken:
3757  * @ctxt:  an XML parser context
3758  *
3759  * parse an XML Nmtoken.
3760  *
3761  * [7] Nmtoken ::= (NameChar)+
3762  *
3763  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3764  *
3765  * Returns the Nmtoken parsed or NULL
3766  */
3767 
3768 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3769 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3770     xmlChar buf[XML_MAX_NAMELEN + 5];
3771     int len = 0, l;
3772     int c;
3773     int count = 0;
3774 
3775 #ifdef DEBUG
3776     nbParseNmToken++;
3777 #endif
3778 
3779     GROW;
3780     if (ctxt->instate == XML_PARSER_EOF)
3781         return(NULL);
3782     c = CUR_CHAR(l);
3783 
3784     while (xmlIsNameChar(ctxt, c)) {
3785 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3786 	    count = 0;
3787 	    GROW;
3788 	}
3789 	COPY_BUF(l,buf,len,c);
3790 	NEXTL(l);
3791 	c = CUR_CHAR(l);
3792 	if (c == 0) {
3793 	    count = 0;
3794 	    GROW;
3795 	    if (ctxt->instate == XML_PARSER_EOF)
3796 		return(NULL);
3797             c = CUR_CHAR(l);
3798 	}
3799 	if (len >= XML_MAX_NAMELEN) {
3800 	    /*
3801 	     * Okay someone managed to make a huge token, so he's ready to pay
3802 	     * for the processing speed.
3803 	     */
3804 	    xmlChar *buffer;
3805 	    int max = len * 2;
3806 
3807 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3808 	    if (buffer == NULL) {
3809 	        xmlErrMemory(ctxt, NULL);
3810 		return(NULL);
3811 	    }
3812 	    memcpy(buffer, buf, len);
3813 	    while (xmlIsNameChar(ctxt, c)) {
3814 		if (count++ > XML_PARSER_CHUNK_SIZE) {
3815 		    count = 0;
3816 		    GROW;
3817                     if (ctxt->instate == XML_PARSER_EOF) {
3818                         xmlFree(buffer);
3819                         return(NULL);
3820                     }
3821 		}
3822 		if (len + 10 > max) {
3823 		    xmlChar *tmp;
3824 
3825                     if ((max > XML_MAX_NAME_LENGTH) &&
3826                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3827                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3828                         xmlFree(buffer);
3829                         return(NULL);
3830                     }
3831 		    max *= 2;
3832 		    tmp = (xmlChar *) xmlRealloc(buffer,
3833 			                            max * sizeof(xmlChar));
3834 		    if (tmp == NULL) {
3835 			xmlErrMemory(ctxt, NULL);
3836 			xmlFree(buffer);
3837 			return(NULL);
3838 		    }
3839 		    buffer = tmp;
3840 		}
3841 		COPY_BUF(l,buffer,len,c);
3842 		NEXTL(l);
3843 		c = CUR_CHAR(l);
3844 	    }
3845 	    buffer[len] = 0;
3846 	    return(buffer);
3847 	}
3848     }
3849     if (len == 0)
3850         return(NULL);
3851     if ((len > XML_MAX_NAME_LENGTH) &&
3852         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3853         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3854         return(NULL);
3855     }
3856     return(xmlStrndup(buf, len));
3857 }
3858 
3859 /**
3860  * xmlParseEntityValue:
3861  * @ctxt:  an XML parser context
3862  * @orig:  if non-NULL store a copy of the original entity value
3863  *
3864  * parse a value for ENTITY declarations
3865  *
3866  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3867  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3868  *
3869  * Returns the EntityValue parsed with reference substituted or NULL
3870  */
3871 
3872 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3873 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3874     xmlChar *buf = NULL;
3875     int len = 0;
3876     int size = XML_PARSER_BUFFER_SIZE;
3877     int c, l;
3878     xmlChar stop;
3879     xmlChar *ret = NULL;
3880     const xmlChar *cur = NULL;
3881     xmlParserInputPtr input;
3882 
3883     if (RAW == '"') stop = '"';
3884     else if (RAW == '\'') stop = '\'';
3885     else {
3886 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3887 	return(NULL);
3888     }
3889     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3890     if (buf == NULL) {
3891 	xmlErrMemory(ctxt, NULL);
3892 	return(NULL);
3893     }
3894 
3895     /*
3896      * The content of the entity definition is copied in a buffer.
3897      */
3898 
3899     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3900     input = ctxt->input;
3901     GROW;
3902     if (ctxt->instate == XML_PARSER_EOF) {
3903         xmlFree(buf);
3904         return(NULL);
3905     }
3906     NEXT;
3907     c = CUR_CHAR(l);
3908     /*
3909      * NOTE: 4.4.5 Included in Literal
3910      * When a parameter entity reference appears in a literal entity
3911      * value, ... a single or double quote character in the replacement
3912      * text is always treated as a normal data character and will not
3913      * terminate the literal.
3914      * In practice it means we stop the loop only when back at parsing
3915      * the initial entity and the quote is found
3916      */
3917     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3918 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3919 	if (len + 5 >= size) {
3920 	    xmlChar *tmp;
3921 
3922 	    size *= 2;
3923 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3924 	    if (tmp == NULL) {
3925 		xmlErrMemory(ctxt, NULL);
3926 		xmlFree(buf);
3927 		return(NULL);
3928 	    }
3929 	    buf = tmp;
3930 	}
3931 	COPY_BUF(l,buf,len,c);
3932 	NEXTL(l);
3933 	/*
3934 	 * Pop-up of finished entities.
3935 	 */
3936 	while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3937 	    xmlPopInput(ctxt);
3938 
3939 	GROW;
3940 	c = CUR_CHAR(l);
3941 	if (c == 0) {
3942 	    GROW;
3943 	    c = CUR_CHAR(l);
3944 	}
3945     }
3946     buf[len] = 0;
3947     if (ctxt->instate == XML_PARSER_EOF) {
3948         xmlFree(buf);
3949         return(NULL);
3950     }
3951 
3952     /*
3953      * Raise problem w.r.t. '&' and '%' being used in non-entities
3954      * reference constructs. Note Charref will be handled in
3955      * xmlStringDecodeEntities()
3956      */
3957     cur = buf;
3958     while (*cur != 0) { /* non input consuming */
3959 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3960 	    xmlChar *name;
3961 	    xmlChar tmp = *cur;
3962 
3963 	    cur++;
3964 	    name = xmlParseStringName(ctxt, &cur);
3965             if ((name == NULL) || (*cur != ';')) {
3966 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3967 	    "EntityValue: '%c' forbidden except for entities references\n",
3968 	                          tmp);
3969 	    }
3970 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3971 		(ctxt->inputNr == 1)) {
3972 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3973 	    }
3974 	    if (name != NULL)
3975 		xmlFree(name);
3976 	    if (*cur == 0)
3977 	        break;
3978 	}
3979 	cur++;
3980     }
3981 
3982     /*
3983      * Then PEReference entities are substituted.
3984      */
3985     if (c != stop) {
3986 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3987 	xmlFree(buf);
3988     } else {
3989 	NEXT;
3990 	/*
3991 	 * NOTE: 4.4.7 Bypassed
3992 	 * When a general entity reference appears in the EntityValue in
3993 	 * an entity declaration, it is bypassed and left as is.
3994 	 * so XML_SUBSTITUTE_REF is not set here.
3995 	 */
3996         ++ctxt->depth;
3997 	ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3998 				      0, 0, 0);
3999         --ctxt->depth;
4000 	if (orig != NULL)
4001 	    *orig = buf;
4002 	else
4003 	    xmlFree(buf);
4004     }
4005 
4006     return(ret);
4007 }
4008 
4009 /**
4010  * xmlParseAttValueComplex:
4011  * @ctxt:  an XML parser context
4012  * @len:   the resulting attribute len
4013  * @normalize:  wether to apply the inner normalization
4014  *
4015  * parse a value for an attribute, this is the fallback function
4016  * of xmlParseAttValue() when the attribute parsing requires handling
4017  * of non-ASCII characters, or normalization compaction.
4018  *
4019  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4020  */
4021 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)4022 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
4023     xmlChar limit = 0;
4024     xmlChar *buf = NULL;
4025     xmlChar *rep = NULL;
4026     size_t len = 0;
4027     size_t buf_size = 0;
4028     int c, l, in_space = 0;
4029     xmlChar *current = NULL;
4030     xmlEntityPtr ent;
4031 
4032     if (NXT(0) == '"') {
4033 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4034 	limit = '"';
4035         NEXT;
4036     } else if (NXT(0) == '\'') {
4037 	limit = '\'';
4038 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4039         NEXT;
4040     } else {
4041 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4042 	return(NULL);
4043     }
4044 
4045     /*
4046      * allocate a translation buffer.
4047      */
4048     buf_size = XML_PARSER_BUFFER_SIZE;
4049     buf = (xmlChar *) xmlMallocAtomic(buf_size);
4050     if (buf == NULL) goto mem_error;
4051 
4052     /*
4053      * OK loop until we reach one of the ending char or a size limit.
4054      */
4055     c = CUR_CHAR(l);
4056     while (((NXT(0) != limit) && /* checked */
4057             (IS_CHAR(c)) && (c != '<')) &&
4058             (ctxt->instate != XML_PARSER_EOF)) {
4059         /*
4060          * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4061          * special option is given
4062          */
4063         if ((len > XML_MAX_TEXT_LENGTH) &&
4064             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4065             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4066                            "AttValue length too long\n");
4067             goto mem_error;
4068         }
4069 	if (c == 0) break;
4070 	if (c == '&') {
4071 	    in_space = 0;
4072 	    if (NXT(1) == '#') {
4073 		int val = xmlParseCharRef(ctxt);
4074 
4075 		if (val == '&') {
4076 		    if (ctxt->replaceEntities) {
4077 			if (len + 10 > buf_size) {
4078 			    growBuffer(buf, 10);
4079 			}
4080 			buf[len++] = '&';
4081 		    } else {
4082 			/*
4083 			 * The reparsing will be done in xmlStringGetNodeList()
4084 			 * called by the attribute() function in SAX.c
4085 			 */
4086 			if (len + 10 > buf_size) {
4087 			    growBuffer(buf, 10);
4088 			}
4089 			buf[len++] = '&';
4090 			buf[len++] = '#';
4091 			buf[len++] = '3';
4092 			buf[len++] = '8';
4093 			buf[len++] = ';';
4094 		    }
4095 		} else if (val != 0) {
4096 		    if (len + 10 > buf_size) {
4097 			growBuffer(buf, 10);
4098 		    }
4099 		    len += xmlCopyChar(0, &buf[len], val);
4100 		}
4101 	    } else {
4102 		ent = xmlParseEntityRef(ctxt);
4103 		ctxt->nbentities++;
4104 		if (ent != NULL)
4105 		    ctxt->nbentities += ent->owner;
4106 		if ((ent != NULL) &&
4107 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4108 		    if (len + 10 > buf_size) {
4109 			growBuffer(buf, 10);
4110 		    }
4111 		    if ((ctxt->replaceEntities == 0) &&
4112 		        (ent->content[0] == '&')) {
4113 			buf[len++] = '&';
4114 			buf[len++] = '#';
4115 			buf[len++] = '3';
4116 			buf[len++] = '8';
4117 			buf[len++] = ';';
4118 		    } else {
4119 			buf[len++] = ent->content[0];
4120 		    }
4121 		} else if ((ent != NULL) &&
4122 		           (ctxt->replaceEntities != 0)) {
4123 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4124 			++ctxt->depth;
4125 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4126 						      XML_SUBSTITUTE_REF,
4127 						      0, 0, 0);
4128 			--ctxt->depth;
4129 			if (rep != NULL) {
4130 			    current = rep;
4131 			    while (*current != 0) { /* non input consuming */
4132                                 if ((*current == 0xD) || (*current == 0xA) ||
4133                                     (*current == 0x9)) {
4134                                     buf[len++] = 0x20;
4135                                     current++;
4136                                 } else
4137                                     buf[len++] = *current++;
4138 				if (len + 10 > buf_size) {
4139 				    growBuffer(buf, 10);
4140 				}
4141 			    }
4142 			    xmlFree(rep);
4143 			    rep = NULL;
4144 			}
4145 		    } else {
4146 			if (len + 10 > buf_size) {
4147 			    growBuffer(buf, 10);
4148 			}
4149 			if (ent->content != NULL)
4150 			    buf[len++] = ent->content[0];
4151 		    }
4152 		} else if (ent != NULL) {
4153 		    int i = xmlStrlen(ent->name);
4154 		    const xmlChar *cur = ent->name;
4155 
4156 		    /*
4157 		     * This may look absurd but is needed to detect
4158 		     * entities problems
4159 		     */
4160 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4161 			(ent->content != NULL) && (ent->checked == 0)) {
4162 			unsigned long oldnbent = ctxt->nbentities;
4163 
4164 			++ctxt->depth;
4165 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4166 						  XML_SUBSTITUTE_REF, 0, 0, 0);
4167 			--ctxt->depth;
4168 
4169 			ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4170 			if (rep != NULL) {
4171 			    if (xmlStrchr(rep, '<'))
4172 			        ent->checked |= 1;
4173 			    xmlFree(rep);
4174 			    rep = NULL;
4175 			}
4176 		    }
4177 
4178 		    /*
4179 		     * Just output the reference
4180 		     */
4181 		    buf[len++] = '&';
4182 		    while (len + i + 10 > buf_size) {
4183 			growBuffer(buf, i + 10);
4184 		    }
4185 		    for (;i > 0;i--)
4186 			buf[len++] = *cur++;
4187 		    buf[len++] = ';';
4188 		}
4189 	    }
4190 	} else {
4191 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4192 	        if ((len != 0) || (!normalize)) {
4193 		    if ((!normalize) || (!in_space)) {
4194 			COPY_BUF(l,buf,len,0x20);
4195 			while (len + 10 > buf_size) {
4196 			    growBuffer(buf, 10);
4197 			}
4198 		    }
4199 		    in_space = 1;
4200 		}
4201 	    } else {
4202 	        in_space = 0;
4203 		COPY_BUF(l,buf,len,c);
4204 		if (len + 10 > buf_size) {
4205 		    growBuffer(buf, 10);
4206 		}
4207 	    }
4208 	    NEXTL(l);
4209 	}
4210 	GROW;
4211 	c = CUR_CHAR(l);
4212     }
4213     if (ctxt->instate == XML_PARSER_EOF)
4214         goto error;
4215 
4216     if ((in_space) && (normalize)) {
4217         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4218     }
4219     buf[len] = 0;
4220     if (RAW == '<') {
4221 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4222     } else if (RAW != limit) {
4223 	if ((c != 0) && (!IS_CHAR(c))) {
4224 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4225 			   "invalid character in attribute value\n");
4226 	} else {
4227 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4228 			   "AttValue: ' expected\n");
4229         }
4230     } else
4231 	NEXT;
4232 
4233     /*
4234      * There we potentially risk an overflow, don't allow attribute value of
4235      * length more than INT_MAX it is a very reasonnable assumption !
4236      */
4237     if (len >= INT_MAX) {
4238         xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4239                        "AttValue length too long\n");
4240         goto mem_error;
4241     }
4242 
4243     if (attlen != NULL) *attlen = (int) len;
4244     return(buf);
4245 
4246 mem_error:
4247     xmlErrMemory(ctxt, NULL);
4248 error:
4249     if (buf != NULL)
4250         xmlFree(buf);
4251     if (rep != NULL)
4252         xmlFree(rep);
4253     return(NULL);
4254 }
4255 
4256 /**
4257  * xmlParseAttValue:
4258  * @ctxt:  an XML parser context
4259  *
4260  * parse a value for an attribute
4261  * Note: the parser won't do substitution of entities here, this
4262  * will be handled later in xmlStringGetNodeList
4263  *
4264  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4265  *                   "'" ([^<&'] | Reference)* "'"
4266  *
4267  * 3.3.3 Attribute-Value Normalization:
4268  * Before the value of an attribute is passed to the application or
4269  * checked for validity, the XML processor must normalize it as follows:
4270  * - a character reference is processed by appending the referenced
4271  *   character to the attribute value
4272  * - an entity reference is processed by recursively processing the
4273  *   replacement text of the entity
4274  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4275  *   appending #x20 to the normalized value, except that only a single
4276  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4277  *   parsed entity or the literal entity value of an internal parsed entity
4278  * - other characters are processed by appending them to the normalized value
4279  * If the declared value is not CDATA, then the XML processor must further
4280  * process the normalized attribute value by discarding any leading and
4281  * trailing space (#x20) characters, and by replacing sequences of space
4282  * (#x20) characters by a single space (#x20) character.
4283  * All attributes for which no declaration has been read should be treated
4284  * by a non-validating parser as if declared CDATA.
4285  *
4286  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4287  */
4288 
4289 
4290 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4291 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4292     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4293     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4294 }
4295 
4296 /**
4297  * xmlParseSystemLiteral:
4298  * @ctxt:  an XML parser context
4299  *
4300  * parse an XML Literal
4301  *
4302  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4303  *
4304  * Returns the SystemLiteral parsed or NULL
4305  */
4306 
4307 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4308 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4309     xmlChar *buf = NULL;
4310     int len = 0;
4311     int size = XML_PARSER_BUFFER_SIZE;
4312     int cur, l;
4313     xmlChar stop;
4314     int state = ctxt->instate;
4315     int count = 0;
4316 
4317     SHRINK;
4318     if (RAW == '"') {
4319         NEXT;
4320 	stop = '"';
4321     } else if (RAW == '\'') {
4322         NEXT;
4323 	stop = '\'';
4324     } else {
4325 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4326 	return(NULL);
4327     }
4328 
4329     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4330     if (buf == NULL) {
4331         xmlErrMemory(ctxt, NULL);
4332 	return(NULL);
4333     }
4334     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4335     cur = CUR_CHAR(l);
4336     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4337 	if (len + 5 >= size) {
4338 	    xmlChar *tmp;
4339 
4340             if ((size > XML_MAX_NAME_LENGTH) &&
4341                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4342                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4343                 xmlFree(buf);
4344 		ctxt->instate = (xmlParserInputState) state;
4345                 return(NULL);
4346             }
4347 	    size *= 2;
4348 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4349 	    if (tmp == NULL) {
4350 	        xmlFree(buf);
4351 		xmlErrMemory(ctxt, NULL);
4352 		ctxt->instate = (xmlParserInputState) state;
4353 		return(NULL);
4354 	    }
4355 	    buf = tmp;
4356 	}
4357 	count++;
4358 	if (count > 50) {
4359 	    GROW;
4360 	    count = 0;
4361             if (ctxt->instate == XML_PARSER_EOF) {
4362 	        xmlFree(buf);
4363 		return(NULL);
4364             }
4365 	}
4366 	COPY_BUF(l,buf,len,cur);
4367 	NEXTL(l);
4368 	cur = CUR_CHAR(l);
4369 	if (cur == 0) {
4370 	    GROW;
4371 	    SHRINK;
4372 	    cur = CUR_CHAR(l);
4373 	}
4374     }
4375     buf[len] = 0;
4376     ctxt->instate = (xmlParserInputState) state;
4377     if (!IS_CHAR(cur)) {
4378 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4379     } else {
4380 	NEXT;
4381     }
4382     return(buf);
4383 }
4384 
4385 /**
4386  * xmlParsePubidLiteral:
4387  * @ctxt:  an XML parser context
4388  *
4389  * parse an XML public literal
4390  *
4391  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4392  *
4393  * Returns the PubidLiteral parsed or NULL.
4394  */
4395 
4396 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4397 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4398     xmlChar *buf = NULL;
4399     int len = 0;
4400     int size = XML_PARSER_BUFFER_SIZE;
4401     xmlChar cur;
4402     xmlChar stop;
4403     int count = 0;
4404     xmlParserInputState oldstate = ctxt->instate;
4405 
4406     SHRINK;
4407     if (RAW == '"') {
4408         NEXT;
4409 	stop = '"';
4410     } else if (RAW == '\'') {
4411         NEXT;
4412 	stop = '\'';
4413     } else {
4414 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4415 	return(NULL);
4416     }
4417     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4418     if (buf == NULL) {
4419 	xmlErrMemory(ctxt, NULL);
4420 	return(NULL);
4421     }
4422     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4423     cur = CUR;
4424     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4425 	if (len + 1 >= size) {
4426 	    xmlChar *tmp;
4427 
4428             if ((size > XML_MAX_NAME_LENGTH) &&
4429                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4430                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4431                 xmlFree(buf);
4432                 return(NULL);
4433             }
4434 	    size *= 2;
4435 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4436 	    if (tmp == NULL) {
4437 		xmlErrMemory(ctxt, NULL);
4438 		xmlFree(buf);
4439 		return(NULL);
4440 	    }
4441 	    buf = tmp;
4442 	}
4443 	buf[len++] = cur;
4444 	count++;
4445 	if (count > 50) {
4446 	    GROW;
4447 	    count = 0;
4448             if (ctxt->instate == XML_PARSER_EOF) {
4449 		xmlFree(buf);
4450 		return(NULL);
4451             }
4452 	}
4453 	NEXT;
4454 	cur = CUR;
4455 	if (cur == 0) {
4456 	    GROW;
4457 	    SHRINK;
4458 	    cur = CUR;
4459 	}
4460     }
4461     buf[len] = 0;
4462     if (cur != stop) {
4463 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4464     } else {
4465 	NEXT;
4466     }
4467     ctxt->instate = oldstate;
4468     return(buf);
4469 }
4470 
4471 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4472 
4473 /*
4474  * used for the test in the inner loop of the char data testing
4475  */
4476 static const unsigned char test_char_data[256] = {
4477     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4479     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4482     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4483     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4484     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4485     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4486     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4487     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4488     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4489     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4490     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4491     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4492     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4493     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4494     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4495     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4502     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4503     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4504     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4505     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4506     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4507     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4508     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4509 };
4510 
4511 /**
4512  * xmlParseCharData:
4513  * @ctxt:  an XML parser context
4514  * @cdata:  int indicating whether we are within a CDATA section
4515  *
4516  * parse a CharData section.
4517  * if we are within a CDATA section ']]>' marks an end of section.
4518  *
4519  * The right angle bracket (>) may be represented using the string "&gt;",
4520  * and must, for compatibility, be escaped using "&gt;" or a character
4521  * reference when it appears in the string "]]>" in content, when that
4522  * string is not marking the end of a CDATA section.
4523  *
4524  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4525  */
4526 
4527 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4528 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4529     const xmlChar *in;
4530     int nbchar = 0;
4531     int line = ctxt->input->line;
4532     int col = ctxt->input->col;
4533     int ccol;
4534 
4535     SHRINK;
4536     GROW;
4537     /*
4538      * Accelerated common case where input don't need to be
4539      * modified before passing it to the handler.
4540      */
4541     if (!cdata) {
4542 	in = ctxt->input->cur;
4543 	do {
4544 get_more_space:
4545 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4546 	    if (*in == 0xA) {
4547 		do {
4548 		    ctxt->input->line++; ctxt->input->col = 1;
4549 		    in++;
4550 		} while (*in == 0xA);
4551 		goto get_more_space;
4552 	    }
4553 	    if (*in == '<') {
4554 		nbchar = in - ctxt->input->cur;
4555 		if (nbchar > 0) {
4556 		    const xmlChar *tmp = ctxt->input->cur;
4557 		    ctxt->input->cur = in;
4558 
4559 		    if ((ctxt->sax != NULL) &&
4560 		        (ctxt->sax->ignorableWhitespace !=
4561 		         ctxt->sax->characters)) {
4562 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4563 			    if (ctxt->sax->ignorableWhitespace != NULL)
4564 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4565 						       tmp, nbchar);
4566 			} else {
4567 			    if (ctxt->sax->characters != NULL)
4568 				ctxt->sax->characters(ctxt->userData,
4569 						      tmp, nbchar);
4570 			    if (*ctxt->space == -1)
4571 			        *ctxt->space = -2;
4572 			}
4573 		    } else if ((ctxt->sax != NULL) &&
4574 		               (ctxt->sax->characters != NULL)) {
4575 			ctxt->sax->characters(ctxt->userData,
4576 					      tmp, nbchar);
4577 		    }
4578 		}
4579 		return;
4580 	    }
4581 
4582 get_more:
4583             ccol = ctxt->input->col;
4584 	    while (test_char_data[*in]) {
4585 		in++;
4586 		ccol++;
4587 	    }
4588 	    ctxt->input->col = ccol;
4589 	    if (*in == 0xA) {
4590 		do {
4591 		    ctxt->input->line++; ctxt->input->col = 1;
4592 		    in++;
4593 		} while (*in == 0xA);
4594 		goto get_more;
4595 	    }
4596 	    if (*in == ']') {
4597 		if ((in[1] == ']') && (in[2] == '>')) {
4598 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4599 		    ctxt->input->cur = in;
4600 		    return;
4601 		}
4602 		in++;
4603 		ctxt->input->col++;
4604 		goto get_more;
4605 	    }
4606 	    nbchar = in - ctxt->input->cur;
4607 	    if (nbchar > 0) {
4608 		if ((ctxt->sax != NULL) &&
4609 		    (ctxt->sax->ignorableWhitespace !=
4610 		     ctxt->sax->characters) &&
4611 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4612 		    const xmlChar *tmp = ctxt->input->cur;
4613 		    ctxt->input->cur = in;
4614 
4615 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4616 		        if (ctxt->sax->ignorableWhitespace != NULL)
4617 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4618 							   tmp, nbchar);
4619 		    } else {
4620 		        if (ctxt->sax->characters != NULL)
4621 			    ctxt->sax->characters(ctxt->userData,
4622 						  tmp, nbchar);
4623 			if (*ctxt->space == -1)
4624 			    *ctxt->space = -2;
4625 		    }
4626                     line = ctxt->input->line;
4627                     col = ctxt->input->col;
4628 		} else if (ctxt->sax != NULL) {
4629 		    if (ctxt->sax->characters != NULL)
4630 			ctxt->sax->characters(ctxt->userData,
4631 					      ctxt->input->cur, nbchar);
4632                     line = ctxt->input->line;
4633                     col = ctxt->input->col;
4634 		}
4635                 /* something really bad happened in the SAX callback */
4636                 if (ctxt->instate != XML_PARSER_CONTENT)
4637                     return;
4638 	    }
4639 	    ctxt->input->cur = in;
4640 	    if (*in == 0xD) {
4641 		in++;
4642 		if (*in == 0xA) {
4643 		    ctxt->input->cur = in;
4644 		    in++;
4645 		    ctxt->input->line++; ctxt->input->col = 1;
4646 		    continue; /* while */
4647 		}
4648 		in--;
4649 	    }
4650 	    if (*in == '<') {
4651 		return;
4652 	    }
4653 	    if (*in == '&') {
4654 		return;
4655 	    }
4656 	    SHRINK;
4657 	    GROW;
4658             if (ctxt->instate == XML_PARSER_EOF)
4659 		return;
4660 	    in = ctxt->input->cur;
4661 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4662 	nbchar = 0;
4663     }
4664     ctxt->input->line = line;
4665     ctxt->input->col = col;
4666     xmlParseCharDataComplex(ctxt, cdata);
4667 }
4668 
4669 /**
4670  * xmlParseCharDataComplex:
4671  * @ctxt:  an XML parser context
4672  * @cdata:  int indicating whether we are within a CDATA section
4673  *
4674  * parse a CharData section.this is the fallback function
4675  * of xmlParseCharData() when the parsing requires handling
4676  * of non-ASCII characters.
4677  */
4678 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4679 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4680     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4681     int nbchar = 0;
4682     int cur, l;
4683     int count = 0;
4684 
4685     SHRINK;
4686     GROW;
4687     cur = CUR_CHAR(l);
4688     while ((cur != '<') && /* checked */
4689            (cur != '&') &&
4690 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4691 	if ((cur == ']') && (NXT(1) == ']') &&
4692 	    (NXT(2) == '>')) {
4693 	    if (cdata) break;
4694 	    else {
4695 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4696 	    }
4697 	}
4698 	COPY_BUF(l,buf,nbchar,cur);
4699 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4700 	    buf[nbchar] = 0;
4701 
4702 	    /*
4703 	     * OK the segment is to be consumed as chars.
4704 	     */
4705 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4706 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4707 		    if (ctxt->sax->ignorableWhitespace != NULL)
4708 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4709 			                               buf, nbchar);
4710 		} else {
4711 		    if (ctxt->sax->characters != NULL)
4712 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4713 		    if ((ctxt->sax->characters !=
4714 		         ctxt->sax->ignorableWhitespace) &&
4715 			(*ctxt->space == -1))
4716 			*ctxt->space = -2;
4717 		}
4718 	    }
4719 	    nbchar = 0;
4720             /* something really bad happened in the SAX callback */
4721             if (ctxt->instate != XML_PARSER_CONTENT)
4722                 return;
4723 	}
4724 	count++;
4725 	if (count > 50) {
4726 	    GROW;
4727 	    count = 0;
4728             if (ctxt->instate == XML_PARSER_EOF)
4729 		return;
4730 	}
4731 	NEXTL(l);
4732 	cur = CUR_CHAR(l);
4733     }
4734     if (nbchar != 0) {
4735         buf[nbchar] = 0;
4736 	/*
4737 	 * OK the segment is to be consumed as chars.
4738 	 */
4739 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4740 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4741 		if (ctxt->sax->ignorableWhitespace != NULL)
4742 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4743 	    } else {
4744 		if (ctxt->sax->characters != NULL)
4745 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4746 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4747 		    (*ctxt->space == -1))
4748 		    *ctxt->space = -2;
4749 	    }
4750 	}
4751     }
4752     if ((cur != 0) && (!IS_CHAR(cur))) {
4753 	/* Generate the error and skip the offending character */
4754         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4755                           "PCDATA invalid Char value %d\n",
4756 	                  cur);
4757 	NEXTL(l);
4758     }
4759 }
4760 
4761 /**
4762  * xmlParseExternalID:
4763  * @ctxt:  an XML parser context
4764  * @publicID:  a xmlChar** receiving PubidLiteral
4765  * @strict: indicate whether we should restrict parsing to only
4766  *          production [75], see NOTE below
4767  *
4768  * Parse an External ID or a Public ID
4769  *
4770  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4771  *       'PUBLIC' S PubidLiteral S SystemLiteral
4772  *
4773  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4774  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4775  *
4776  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4777  *
4778  * Returns the function returns SystemLiteral and in the second
4779  *                case publicID receives PubidLiteral, is strict is off
4780  *                it is possible to return NULL and have publicID set.
4781  */
4782 
4783 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4784 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4785     xmlChar *URI = NULL;
4786 
4787     SHRINK;
4788 
4789     *publicID = NULL;
4790     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4791         SKIP(6);
4792 	if (!IS_BLANK_CH(CUR)) {
4793 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4794 	                   "Space required after 'SYSTEM'\n");
4795 	}
4796         SKIP_BLANKS;
4797 	URI = xmlParseSystemLiteral(ctxt);
4798 	if (URI == NULL) {
4799 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4800         }
4801     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4802         SKIP(6);
4803 	if (!IS_BLANK_CH(CUR)) {
4804 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4805 		    "Space required after 'PUBLIC'\n");
4806 	}
4807         SKIP_BLANKS;
4808 	*publicID = xmlParsePubidLiteral(ctxt);
4809 	if (*publicID == NULL) {
4810 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4811 	}
4812 	if (strict) {
4813 	    /*
4814 	     * We don't handle [83] so "S SystemLiteral" is required.
4815 	     */
4816 	    if (!IS_BLANK_CH(CUR)) {
4817 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4818 			"Space required after the Public Identifier\n");
4819 	    }
4820 	} else {
4821 	    /*
4822 	     * We handle [83] so we return immediately, if
4823 	     * "S SystemLiteral" is not detected. From a purely parsing
4824 	     * point of view that's a nice mess.
4825 	     */
4826 	    const xmlChar *ptr;
4827 	    GROW;
4828 
4829 	    ptr = CUR_PTR;
4830 	    if (!IS_BLANK_CH(*ptr)) return(NULL);
4831 
4832 	    while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4833 	    if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4834 	}
4835         SKIP_BLANKS;
4836 	URI = xmlParseSystemLiteral(ctxt);
4837 	if (URI == NULL) {
4838 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4839         }
4840     }
4841     return(URI);
4842 }
4843 
4844 /**
4845  * xmlParseCommentComplex:
4846  * @ctxt:  an XML parser context
4847  * @buf:  the already parsed part of the buffer
4848  * @len:  number of bytes filles in the buffer
4849  * @size:  allocated size of the buffer
4850  *
4851  * Skip an XML (SGML) comment <!-- .... -->
4852  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4853  *  must not occur within comments. "
4854  * This is the slow routine in case the accelerator for ascii didn't work
4855  *
4856  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4857  */
4858 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4859 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4860                        size_t len, size_t size) {
4861     int q, ql;
4862     int r, rl;
4863     int cur, l;
4864     size_t count = 0;
4865     int inputid;
4866 
4867     inputid = ctxt->input->id;
4868 
4869     if (buf == NULL) {
4870         len = 0;
4871 	size = XML_PARSER_BUFFER_SIZE;
4872 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4873 	if (buf == NULL) {
4874 	    xmlErrMemory(ctxt, NULL);
4875 	    return;
4876 	}
4877     }
4878     GROW;	/* Assure there's enough input data */
4879     q = CUR_CHAR(ql);
4880     if (q == 0)
4881         goto not_terminated;
4882     if (!IS_CHAR(q)) {
4883         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4884                           "xmlParseComment: invalid xmlChar value %d\n",
4885 	                  q);
4886 	xmlFree (buf);
4887 	return;
4888     }
4889     NEXTL(ql);
4890     r = CUR_CHAR(rl);
4891     if (r == 0)
4892         goto not_terminated;
4893     if (!IS_CHAR(r)) {
4894         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4895                           "xmlParseComment: invalid xmlChar value %d\n",
4896 	                  q);
4897 	xmlFree (buf);
4898 	return;
4899     }
4900     NEXTL(rl);
4901     cur = CUR_CHAR(l);
4902     if (cur == 0)
4903         goto not_terminated;
4904     while (IS_CHAR(cur) && /* checked */
4905            ((cur != '>') ||
4906 	    (r != '-') || (q != '-'))) {
4907 	if ((r == '-') && (q == '-')) {
4908 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4909 	}
4910         if ((len > XML_MAX_TEXT_LENGTH) &&
4911             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4912             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4913                          "Comment too big found", NULL);
4914             xmlFree (buf);
4915             return;
4916         }
4917 	if (len + 5 >= size) {
4918 	    xmlChar *new_buf;
4919             size_t new_size;
4920 
4921 	    new_size = size * 2;
4922 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4923 	    if (new_buf == NULL) {
4924 		xmlFree (buf);
4925 		xmlErrMemory(ctxt, NULL);
4926 		return;
4927 	    }
4928 	    buf = new_buf;
4929             size = new_size;
4930 	}
4931 	COPY_BUF(ql,buf,len,q);
4932 	q = r;
4933 	ql = rl;
4934 	r = cur;
4935 	rl = l;
4936 
4937 	count++;
4938 	if (count > 50) {
4939 	    GROW;
4940 	    count = 0;
4941             if (ctxt->instate == XML_PARSER_EOF) {
4942 		xmlFree(buf);
4943 		return;
4944             }
4945 	}
4946 	NEXTL(l);
4947 	cur = CUR_CHAR(l);
4948 	if (cur == 0) {
4949 	    SHRINK;
4950 	    GROW;
4951 	    cur = CUR_CHAR(l);
4952 	}
4953     }
4954     buf[len] = 0;
4955     if (cur == 0) {
4956 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4957 	                     "Comment not terminated \n<!--%.50s\n", buf);
4958     } else if (!IS_CHAR(cur)) {
4959         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4960                           "xmlParseComment: invalid xmlChar value %d\n",
4961 	                  cur);
4962     } else {
4963 	if (inputid != ctxt->input->id) {
4964 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4965 		"Comment doesn't start and stop in the same entity\n");
4966 	}
4967         NEXT;
4968 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4969 	    (!ctxt->disableSAX))
4970 	    ctxt->sax->comment(ctxt->userData, buf);
4971     }
4972     xmlFree(buf);
4973     return;
4974 not_terminated:
4975     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4976 			 "Comment not terminated\n", NULL);
4977     xmlFree(buf);
4978     return;
4979 }
4980 
4981 /**
4982  * xmlParseComment:
4983  * @ctxt:  an XML parser context
4984  *
4985  * Skip an XML (SGML) comment <!-- .... -->
4986  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4987  *  must not occur within comments. "
4988  *
4989  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4990  */
4991 void
xmlParseComment(xmlParserCtxtPtr ctxt)4992 xmlParseComment(xmlParserCtxtPtr ctxt) {
4993     xmlChar *buf = NULL;
4994     size_t size = XML_PARSER_BUFFER_SIZE;
4995     size_t len = 0;
4996     xmlParserInputState state;
4997     const xmlChar *in;
4998     size_t nbchar = 0;
4999     int ccol;
5000     int inputid;
5001 
5002     /*
5003      * Check that there is a comment right here.
5004      */
5005     if ((RAW != '<') || (NXT(1) != '!') ||
5006         (NXT(2) != '-') || (NXT(3) != '-')) return;
5007     state = ctxt->instate;
5008     ctxt->instate = XML_PARSER_COMMENT;
5009     inputid = ctxt->input->id;
5010     SKIP(4);
5011     SHRINK;
5012     GROW;
5013 
5014     /*
5015      * Accelerated common case where input don't need to be
5016      * modified before passing it to the handler.
5017      */
5018     in = ctxt->input->cur;
5019     do {
5020 	if (*in == 0xA) {
5021 	    do {
5022 		ctxt->input->line++; ctxt->input->col = 1;
5023 		in++;
5024 	    } while (*in == 0xA);
5025 	}
5026 get_more:
5027         ccol = ctxt->input->col;
5028 	while (((*in > '-') && (*in <= 0x7F)) ||
5029 	       ((*in >= 0x20) && (*in < '-')) ||
5030 	       (*in == 0x09)) {
5031 		    in++;
5032 		    ccol++;
5033 	}
5034 	ctxt->input->col = ccol;
5035 	if (*in == 0xA) {
5036 	    do {
5037 		ctxt->input->line++; ctxt->input->col = 1;
5038 		in++;
5039 	    } while (*in == 0xA);
5040 	    goto get_more;
5041 	}
5042 	nbchar = in - ctxt->input->cur;
5043 	/*
5044 	 * save current set of data
5045 	 */
5046 	if (nbchar > 0) {
5047 	    if ((ctxt->sax != NULL) &&
5048 		(ctxt->sax->comment != NULL)) {
5049 		if (buf == NULL) {
5050 		    if ((*in == '-') && (in[1] == '-'))
5051 		        size = nbchar + 1;
5052 		    else
5053 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
5054 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5055 		    if (buf == NULL) {
5056 		        xmlErrMemory(ctxt, NULL);
5057 			ctxt->instate = state;
5058 			return;
5059 		    }
5060 		    len = 0;
5061 		} else if (len + nbchar + 1 >= size) {
5062 		    xmlChar *new_buf;
5063 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5064 		    new_buf = (xmlChar *) xmlRealloc(buf,
5065 		                                     size * sizeof(xmlChar));
5066 		    if (new_buf == NULL) {
5067 		        xmlFree (buf);
5068 			xmlErrMemory(ctxt, NULL);
5069 			ctxt->instate = state;
5070 			return;
5071 		    }
5072 		    buf = new_buf;
5073 		}
5074 		memcpy(&buf[len], ctxt->input->cur, nbchar);
5075 		len += nbchar;
5076 		buf[len] = 0;
5077 	    }
5078 	}
5079         if ((len > XML_MAX_TEXT_LENGTH) &&
5080             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5081             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5082                          "Comment too big found", NULL);
5083             xmlFree (buf);
5084             return;
5085         }
5086 	ctxt->input->cur = in;
5087 	if (*in == 0xA) {
5088 	    in++;
5089 	    ctxt->input->line++; ctxt->input->col = 1;
5090 	}
5091 	if (*in == 0xD) {
5092 	    in++;
5093 	    if (*in == 0xA) {
5094 		ctxt->input->cur = in;
5095 		in++;
5096 		ctxt->input->line++; ctxt->input->col = 1;
5097 		continue; /* while */
5098 	    }
5099 	    in--;
5100 	}
5101 	SHRINK;
5102 	GROW;
5103         if (ctxt->instate == XML_PARSER_EOF) {
5104             xmlFree(buf);
5105             return;
5106         }
5107 	in = ctxt->input->cur;
5108 	if (*in == '-') {
5109 	    if (in[1] == '-') {
5110 	        if (in[2] == '>') {
5111 		    if (ctxt->input->id != inputid) {
5112 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5113 			"comment doesn't start and stop in the same entity\n");
5114 		    }
5115 		    SKIP(3);
5116 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5117 		        (!ctxt->disableSAX)) {
5118 			if (buf != NULL)
5119 			    ctxt->sax->comment(ctxt->userData, buf);
5120 			else
5121 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5122 		    }
5123 		    if (buf != NULL)
5124 		        xmlFree(buf);
5125 		    if (ctxt->instate != XML_PARSER_EOF)
5126 			ctxt->instate = state;
5127 		    return;
5128 		}
5129 		if (buf != NULL) {
5130 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5131 		                      "Double hyphen within comment: "
5132                                       "<!--%.50s\n",
5133 				      buf);
5134 		} else
5135 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5136 		                      "Double hyphen within comment\n", NULL);
5137 		in++;
5138 		ctxt->input->col++;
5139 	    }
5140 	    in++;
5141 	    ctxt->input->col++;
5142 	    goto get_more;
5143 	}
5144     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5145     xmlParseCommentComplex(ctxt, buf, len, size);
5146     ctxt->instate = state;
5147     return;
5148 }
5149 
5150 
5151 /**
5152  * xmlParsePITarget:
5153  * @ctxt:  an XML parser context
5154  *
5155  * parse the name of a PI
5156  *
5157  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5158  *
5159  * Returns the PITarget name or NULL
5160  */
5161 
5162 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5163 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5164     const xmlChar *name;
5165 
5166     name = xmlParseName(ctxt);
5167     if ((name != NULL) &&
5168         ((name[0] == 'x') || (name[0] == 'X')) &&
5169         ((name[1] == 'm') || (name[1] == 'M')) &&
5170         ((name[2] == 'l') || (name[2] == 'L'))) {
5171 	int i;
5172 	if ((name[0] == 'x') && (name[1] == 'm') &&
5173 	    (name[2] == 'l') && (name[3] == 0)) {
5174 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5175 		 "XML declaration allowed only at the start of the document\n");
5176 	    return(name);
5177 	} else if (name[3] == 0) {
5178 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5179 	    return(name);
5180 	}
5181 	for (i = 0;;i++) {
5182 	    if (xmlW3CPIs[i] == NULL) break;
5183 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5184 	        return(name);
5185 	}
5186 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5187 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5188 		      NULL, NULL);
5189     }
5190     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5191 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5192 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5193     }
5194     return(name);
5195 }
5196 
5197 #ifdef LIBXML_CATALOG_ENABLED
5198 /**
5199  * xmlParseCatalogPI:
5200  * @ctxt:  an XML parser context
5201  * @catalog:  the PI value string
5202  *
5203  * parse an XML Catalog Processing Instruction.
5204  *
5205  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5206  *
5207  * Occurs only if allowed by the user and if happening in the Misc
5208  * part of the document before any doctype informations
5209  * This will add the given catalog to the parsing context in order
5210  * to be used if there is a resolution need further down in the document
5211  */
5212 
5213 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5214 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5215     xmlChar *URL = NULL;
5216     const xmlChar *tmp, *base;
5217     xmlChar marker;
5218 
5219     tmp = catalog;
5220     while (IS_BLANK_CH(*tmp)) tmp++;
5221     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5222 	goto error;
5223     tmp += 7;
5224     while (IS_BLANK_CH(*tmp)) tmp++;
5225     if (*tmp != '=') {
5226 	return;
5227     }
5228     tmp++;
5229     while (IS_BLANK_CH(*tmp)) tmp++;
5230     marker = *tmp;
5231     if ((marker != '\'') && (marker != '"'))
5232 	goto error;
5233     tmp++;
5234     base = tmp;
5235     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5236     if (*tmp == 0)
5237 	goto error;
5238     URL = xmlStrndup(base, tmp - base);
5239     tmp++;
5240     while (IS_BLANK_CH(*tmp)) tmp++;
5241     if (*tmp != 0)
5242 	goto error;
5243 
5244     if (URL != NULL) {
5245 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5246 	xmlFree(URL);
5247     }
5248     return;
5249 
5250 error:
5251     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5252 	          "Catalog PI syntax error: %s\n",
5253 		  catalog, NULL);
5254     if (URL != NULL)
5255 	xmlFree(URL);
5256 }
5257 #endif
5258 
5259 /**
5260  * xmlParsePI:
5261  * @ctxt:  an XML parser context
5262  *
5263  * parse an XML Processing Instruction.
5264  *
5265  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5266  *
5267  * The processing is transfered to SAX once parsed.
5268  */
5269 
5270 void
xmlParsePI(xmlParserCtxtPtr ctxt)5271 xmlParsePI(xmlParserCtxtPtr ctxt) {
5272     xmlChar *buf = NULL;
5273     size_t len = 0;
5274     size_t size = XML_PARSER_BUFFER_SIZE;
5275     int cur, l;
5276     const xmlChar *target;
5277     xmlParserInputState state;
5278     int count = 0;
5279 
5280     if ((RAW == '<') && (NXT(1) == '?')) {
5281 	xmlParserInputPtr input = ctxt->input;
5282 	state = ctxt->instate;
5283         ctxt->instate = XML_PARSER_PI;
5284 	/*
5285 	 * this is a Processing Instruction.
5286 	 */
5287 	SKIP(2);
5288 	SHRINK;
5289 
5290 	/*
5291 	 * Parse the target name and check for special support like
5292 	 * namespace.
5293 	 */
5294         target = xmlParsePITarget(ctxt);
5295 	if (target != NULL) {
5296 	    if ((RAW == '?') && (NXT(1) == '>')) {
5297 		if (input != ctxt->input) {
5298 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5299 	    "PI declaration doesn't start and stop in the same entity\n");
5300 		}
5301 		SKIP(2);
5302 
5303 		/*
5304 		 * SAX: PI detected.
5305 		 */
5306 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5307 		    (ctxt->sax->processingInstruction != NULL))
5308 		    ctxt->sax->processingInstruction(ctxt->userData,
5309 		                                     target, NULL);
5310 		if (ctxt->instate != XML_PARSER_EOF)
5311 		    ctxt->instate = state;
5312 		return;
5313 	    }
5314 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5315 	    if (buf == NULL) {
5316 		xmlErrMemory(ctxt, NULL);
5317 		ctxt->instate = state;
5318 		return;
5319 	    }
5320 	    cur = CUR;
5321 	    if (!IS_BLANK(cur)) {
5322 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5323 			  "ParsePI: PI %s space expected\n", target);
5324 	    }
5325             SKIP_BLANKS;
5326 	    cur = CUR_CHAR(l);
5327 	    while (IS_CHAR(cur) && /* checked */
5328 		   ((cur != '?') || (NXT(1) != '>'))) {
5329 		if (len + 5 >= size) {
5330 		    xmlChar *tmp;
5331                     size_t new_size = size * 2;
5332 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5333 		    if (tmp == NULL) {
5334 			xmlErrMemory(ctxt, NULL);
5335 			xmlFree(buf);
5336 			ctxt->instate = state;
5337 			return;
5338 		    }
5339 		    buf = tmp;
5340                     size = new_size;
5341 		}
5342 		count++;
5343 		if (count > 50) {
5344 		    GROW;
5345                     if (ctxt->instate == XML_PARSER_EOF) {
5346                         xmlFree(buf);
5347                         return;
5348                     }
5349 		    count = 0;
5350                     if ((len > XML_MAX_TEXT_LENGTH) &&
5351                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5352                         xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5353                                           "PI %s too big found", target);
5354                         xmlFree(buf);
5355                         ctxt->instate = state;
5356                         return;
5357                     }
5358 		}
5359 		COPY_BUF(l,buf,len,cur);
5360 		NEXTL(l);
5361 		cur = CUR_CHAR(l);
5362 		if (cur == 0) {
5363 		    SHRINK;
5364 		    GROW;
5365 		    cur = CUR_CHAR(l);
5366 		}
5367 	    }
5368             if ((len > XML_MAX_TEXT_LENGTH) &&
5369                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5370                 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5371                                   "PI %s too big found", target);
5372                 xmlFree(buf);
5373                 ctxt->instate = state;
5374                 return;
5375             }
5376 	    buf[len] = 0;
5377 	    if (cur != '?') {
5378 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5379 		      "ParsePI: PI %s never end ...\n", target);
5380 	    } else {
5381 		if (input != ctxt->input) {
5382 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5383 	    "PI declaration doesn't start and stop in the same entity\n");
5384 		}
5385 		SKIP(2);
5386 
5387 #ifdef LIBXML_CATALOG_ENABLED
5388 		if (((state == XML_PARSER_MISC) ||
5389 	             (state == XML_PARSER_START)) &&
5390 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5391 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5392 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5393 			(allow == XML_CATA_ALLOW_ALL))
5394 			xmlParseCatalogPI(ctxt, buf);
5395 		}
5396 #endif
5397 
5398 
5399 		/*
5400 		 * SAX: PI detected.
5401 		 */
5402 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5403 		    (ctxt->sax->processingInstruction != NULL))
5404 		    ctxt->sax->processingInstruction(ctxt->userData,
5405 		                                     target, buf);
5406 	    }
5407 	    xmlFree(buf);
5408 	} else {
5409 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5410 	}
5411 	if (ctxt->instate != XML_PARSER_EOF)
5412 	    ctxt->instate = state;
5413     }
5414 }
5415 
5416 /**
5417  * xmlParseNotationDecl:
5418  * @ctxt:  an XML parser context
5419  *
5420  * parse a notation declaration
5421  *
5422  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5423  *
5424  * Hence there is actually 3 choices:
5425  *     'PUBLIC' S PubidLiteral
5426  *     'PUBLIC' S PubidLiteral S SystemLiteral
5427  * and 'SYSTEM' S SystemLiteral
5428  *
5429  * See the NOTE on xmlParseExternalID().
5430  */
5431 
5432 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5433 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5434     const xmlChar *name;
5435     xmlChar *Pubid;
5436     xmlChar *Systemid;
5437 
5438     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5439 	xmlParserInputPtr input = ctxt->input;
5440 	SHRINK;
5441 	SKIP(10);
5442 	if (!IS_BLANK_CH(CUR)) {
5443 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5444 			   "Space required after '<!NOTATION'\n");
5445 	    return;
5446 	}
5447 	SKIP_BLANKS;
5448 
5449         name = xmlParseName(ctxt);
5450 	if (name == NULL) {
5451 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5452 	    return;
5453 	}
5454 	if (!IS_BLANK_CH(CUR)) {
5455 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5456 		     "Space required after the NOTATION name'\n");
5457 	    return;
5458 	}
5459 	if (xmlStrchr(name, ':') != NULL) {
5460 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5461 		     "colons are forbidden from notation names '%s'\n",
5462 		     name, NULL, NULL);
5463 	}
5464 	SKIP_BLANKS;
5465 
5466 	/*
5467 	 * Parse the IDs.
5468 	 */
5469 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5470 	SKIP_BLANKS;
5471 
5472 	if (RAW == '>') {
5473 	    if (input != ctxt->input) {
5474 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5475 	"Notation declaration doesn't start and stop in the same entity\n");
5476 	    }
5477 	    NEXT;
5478 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5479 		(ctxt->sax->notationDecl != NULL))
5480 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5481 	} else {
5482 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5483 	}
5484 	if (Systemid != NULL) xmlFree(Systemid);
5485 	if (Pubid != NULL) xmlFree(Pubid);
5486     }
5487 }
5488 
5489 /**
5490  * xmlParseEntityDecl:
5491  * @ctxt:  an XML parser context
5492  *
5493  * parse <!ENTITY declarations
5494  *
5495  * [70] EntityDecl ::= GEDecl | PEDecl
5496  *
5497  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5498  *
5499  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5500  *
5501  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5502  *
5503  * [74] PEDef ::= EntityValue | ExternalID
5504  *
5505  * [76] NDataDecl ::= S 'NDATA' S Name
5506  *
5507  * [ VC: Notation Declared ]
5508  * The Name must match the declared name of a notation.
5509  */
5510 
5511 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5512 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5513     const xmlChar *name = NULL;
5514     xmlChar *value = NULL;
5515     xmlChar *URI = NULL, *literal = NULL;
5516     const xmlChar *ndata = NULL;
5517     int isParameter = 0;
5518     xmlChar *orig = NULL;
5519     int skipped;
5520 
5521     /* GROW; done in the caller */
5522     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5523 	xmlParserInputPtr input = ctxt->input;
5524 	SHRINK;
5525 	SKIP(8);
5526 	skipped = SKIP_BLANKS;
5527 	if (skipped == 0) {
5528 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5529 			   "Space required after '<!ENTITY'\n");
5530 	}
5531 
5532 	if (RAW == '%') {
5533 	    NEXT;
5534 	    skipped = SKIP_BLANKS;
5535 	    if (skipped == 0) {
5536 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5537 			       "Space required after '%%'\n");
5538 	    }
5539 	    isParameter = 1;
5540 	}
5541 
5542         name = xmlParseName(ctxt);
5543 	if (name == NULL) {
5544 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5545 	                   "xmlParseEntityDecl: no name\n");
5546             return;
5547 	}
5548 	if (xmlStrchr(name, ':') != NULL) {
5549 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5550 		     "colons are forbidden from entities names '%s'\n",
5551 		     name, NULL, NULL);
5552 	}
5553         skipped = SKIP_BLANKS;
5554 	if (skipped == 0) {
5555 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5556 			   "Space required after the entity name\n");
5557 	}
5558 
5559 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5560 	/*
5561 	 * handle the various case of definitions...
5562 	 */
5563 	if (isParameter) {
5564 	    if ((RAW == '"') || (RAW == '\'')) {
5565 	        value = xmlParseEntityValue(ctxt, &orig);
5566 		if (value) {
5567 		    if ((ctxt->sax != NULL) &&
5568 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5569 			ctxt->sax->entityDecl(ctxt->userData, name,
5570 		                    XML_INTERNAL_PARAMETER_ENTITY,
5571 				    NULL, NULL, value);
5572 		}
5573 	    } else {
5574 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5575 		if ((URI == NULL) && (literal == NULL)) {
5576 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5577 		}
5578 		if (URI) {
5579 		    xmlURIPtr uri;
5580 
5581 		    uri = xmlParseURI((const char *) URI);
5582 		    if (uri == NULL) {
5583 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5584 				     "Invalid URI: %s\n", URI);
5585 			/*
5586 			 * This really ought to be a well formedness error
5587 			 * but the XML Core WG decided otherwise c.f. issue
5588 			 * E26 of the XML erratas.
5589 			 */
5590 		    } else {
5591 			if (uri->fragment != NULL) {
5592 			    /*
5593 			     * Okay this is foolish to block those but not
5594 			     * invalid URIs.
5595 			     */
5596 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5597 			} else {
5598 			    if ((ctxt->sax != NULL) &&
5599 				(!ctxt->disableSAX) &&
5600 				(ctxt->sax->entityDecl != NULL))
5601 				ctxt->sax->entityDecl(ctxt->userData, name,
5602 					    XML_EXTERNAL_PARAMETER_ENTITY,
5603 					    literal, URI, NULL);
5604 			}
5605 			xmlFreeURI(uri);
5606 		    }
5607 		}
5608 	    }
5609 	} else {
5610 	    if ((RAW == '"') || (RAW == '\'')) {
5611 	        value = xmlParseEntityValue(ctxt, &orig);
5612 		if ((ctxt->sax != NULL) &&
5613 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5614 		    ctxt->sax->entityDecl(ctxt->userData, name,
5615 				XML_INTERNAL_GENERAL_ENTITY,
5616 				NULL, NULL, value);
5617 		/*
5618 		 * For expat compatibility in SAX mode.
5619 		 */
5620 		if ((ctxt->myDoc == NULL) ||
5621 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5622 		    if (ctxt->myDoc == NULL) {
5623 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5624 			if (ctxt->myDoc == NULL) {
5625 			    xmlErrMemory(ctxt, "New Doc failed");
5626 			    return;
5627 			}
5628 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5629 		    }
5630 		    if (ctxt->myDoc->intSubset == NULL)
5631 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5632 					    BAD_CAST "fake", NULL, NULL);
5633 
5634 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5635 			              NULL, NULL, value);
5636 		}
5637 	    } else {
5638 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5639 		if ((URI == NULL) && (literal == NULL)) {
5640 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5641 		}
5642 		if (URI) {
5643 		    xmlURIPtr uri;
5644 
5645 		    uri = xmlParseURI((const char *)URI);
5646 		    if (uri == NULL) {
5647 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5648 				     "Invalid URI: %s\n", URI);
5649 			/*
5650 			 * This really ought to be a well formedness error
5651 			 * but the XML Core WG decided otherwise c.f. issue
5652 			 * E26 of the XML erratas.
5653 			 */
5654 		    } else {
5655 			if (uri->fragment != NULL) {
5656 			    /*
5657 			     * Okay this is foolish to block those but not
5658 			     * invalid URIs.
5659 			     */
5660 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5661 			}
5662 			xmlFreeURI(uri);
5663 		    }
5664 		}
5665 		if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5666 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5667 				   "Space required before 'NDATA'\n");
5668 		}
5669 		SKIP_BLANKS;
5670 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5671 		    SKIP(5);
5672 		    if (!IS_BLANK_CH(CUR)) {
5673 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5674 				       "Space required after 'NDATA'\n");
5675 		    }
5676 		    SKIP_BLANKS;
5677 		    ndata = xmlParseName(ctxt);
5678 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5679 		        (ctxt->sax->unparsedEntityDecl != NULL))
5680 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5681 				    literal, URI, ndata);
5682 		} else {
5683 		    if ((ctxt->sax != NULL) &&
5684 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5685 			ctxt->sax->entityDecl(ctxt->userData, name,
5686 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5687 				    literal, URI, NULL);
5688 		    /*
5689 		     * For expat compatibility in SAX mode.
5690 		     * assuming the entity repalcement was asked for
5691 		     */
5692 		    if ((ctxt->replaceEntities != 0) &&
5693 			((ctxt->myDoc == NULL) ||
5694 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5695 			if (ctxt->myDoc == NULL) {
5696 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5697 			    if (ctxt->myDoc == NULL) {
5698 			        xmlErrMemory(ctxt, "New Doc failed");
5699 				return;
5700 			    }
5701 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5702 			}
5703 
5704 			if (ctxt->myDoc->intSubset == NULL)
5705 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5706 						BAD_CAST "fake", NULL, NULL);
5707 			xmlSAX2EntityDecl(ctxt, name,
5708 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5709 				          literal, URI, NULL);
5710 		    }
5711 		}
5712 	    }
5713 	}
5714 	if (ctxt->instate == XML_PARSER_EOF)
5715 	    return;
5716 	SKIP_BLANKS;
5717 	if (RAW != '>') {
5718 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5719 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5720 	    xmlHaltParser(ctxt);
5721 	} else {
5722 	    if (input != ctxt->input) {
5723 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5724 	"Entity declaration doesn't start and stop in the same entity\n");
5725 	    }
5726 	    NEXT;
5727 	}
5728 	if (orig != NULL) {
5729 	    /*
5730 	     * Ugly mechanism to save the raw entity value.
5731 	     */
5732 	    xmlEntityPtr cur = NULL;
5733 
5734 	    if (isParameter) {
5735 	        if ((ctxt->sax != NULL) &&
5736 		    (ctxt->sax->getParameterEntity != NULL))
5737 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5738 	    } else {
5739 	        if ((ctxt->sax != NULL) &&
5740 		    (ctxt->sax->getEntity != NULL))
5741 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5742 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5743 		    cur = xmlSAX2GetEntity(ctxt, name);
5744 		}
5745 	    }
5746             if (cur != NULL) {
5747 	        if (cur->orig != NULL)
5748 		    xmlFree(orig);
5749 		else
5750 		    cur->orig = orig;
5751 	    } else
5752 		xmlFree(orig);
5753 	}
5754 	if (value != NULL) xmlFree(value);
5755 	if (URI != NULL) xmlFree(URI);
5756 	if (literal != NULL) xmlFree(literal);
5757     }
5758 }
5759 
5760 /**
5761  * xmlParseDefaultDecl:
5762  * @ctxt:  an XML parser context
5763  * @value:  Receive a possible fixed default value for the attribute
5764  *
5765  * Parse an attribute default declaration
5766  *
5767  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5768  *
5769  * [ VC: Required Attribute ]
5770  * if the default declaration is the keyword #REQUIRED, then the
5771  * attribute must be specified for all elements of the type in the
5772  * attribute-list declaration.
5773  *
5774  * [ VC: Attribute Default Legal ]
5775  * The declared default value must meet the lexical constraints of
5776  * the declared attribute type c.f. xmlValidateAttributeDecl()
5777  *
5778  * [ VC: Fixed Attribute Default ]
5779  * if an attribute has a default value declared with the #FIXED
5780  * keyword, instances of that attribute must match the default value.
5781  *
5782  * [ WFC: No < in Attribute Values ]
5783  * handled in xmlParseAttValue()
5784  *
5785  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5786  *          or XML_ATTRIBUTE_FIXED.
5787  */
5788 
5789 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5790 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5791     int val;
5792     xmlChar *ret;
5793 
5794     *value = NULL;
5795     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5796 	SKIP(9);
5797 	return(XML_ATTRIBUTE_REQUIRED);
5798     }
5799     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5800 	SKIP(8);
5801 	return(XML_ATTRIBUTE_IMPLIED);
5802     }
5803     val = XML_ATTRIBUTE_NONE;
5804     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5805 	SKIP(6);
5806 	val = XML_ATTRIBUTE_FIXED;
5807 	if (!IS_BLANK_CH(CUR)) {
5808 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5809 			   "Space required after '#FIXED'\n");
5810 	}
5811 	SKIP_BLANKS;
5812     }
5813     ret = xmlParseAttValue(ctxt);
5814     ctxt->instate = XML_PARSER_DTD;
5815     if (ret == NULL) {
5816 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5817 		       "Attribute default value declaration error\n");
5818     } else
5819         *value = ret;
5820     return(val);
5821 }
5822 
5823 /**
5824  * xmlParseNotationType:
5825  * @ctxt:  an XML parser context
5826  *
5827  * parse an Notation attribute type.
5828  *
5829  * Note: the leading 'NOTATION' S part has already being parsed...
5830  *
5831  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5832  *
5833  * [ VC: Notation Attributes ]
5834  * Values of this type must match one of the notation names included
5835  * in the declaration; all notation names in the declaration must be declared.
5836  *
5837  * Returns: the notation attribute tree built while parsing
5838  */
5839 
5840 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5841 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5842     const xmlChar *name;
5843     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5844 
5845     if (RAW != '(') {
5846 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5847 	return(NULL);
5848     }
5849     SHRINK;
5850     do {
5851         NEXT;
5852 	SKIP_BLANKS;
5853         name = xmlParseName(ctxt);
5854 	if (name == NULL) {
5855 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5856 			   "Name expected in NOTATION declaration\n");
5857             xmlFreeEnumeration(ret);
5858 	    return(NULL);
5859 	}
5860 	tmp = ret;
5861 	while (tmp != NULL) {
5862 	    if (xmlStrEqual(name, tmp->name)) {
5863 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5864 	  "standalone: attribute notation value token %s duplicated\n",
5865 				 name, NULL);
5866 		if (!xmlDictOwns(ctxt->dict, name))
5867 		    xmlFree((xmlChar *) name);
5868 		break;
5869 	    }
5870 	    tmp = tmp->next;
5871 	}
5872 	if (tmp == NULL) {
5873 	    cur = xmlCreateEnumeration(name);
5874 	    if (cur == NULL) {
5875                 xmlFreeEnumeration(ret);
5876                 return(NULL);
5877             }
5878 	    if (last == NULL) ret = last = cur;
5879 	    else {
5880 		last->next = cur;
5881 		last = cur;
5882 	    }
5883 	}
5884 	SKIP_BLANKS;
5885     } while (RAW == '|');
5886     if (RAW != ')') {
5887 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5888         xmlFreeEnumeration(ret);
5889 	return(NULL);
5890     }
5891     NEXT;
5892     return(ret);
5893 }
5894 
5895 /**
5896  * xmlParseEnumerationType:
5897  * @ctxt:  an XML parser context
5898  *
5899  * parse an Enumeration attribute type.
5900  *
5901  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5902  *
5903  * [ VC: Enumeration ]
5904  * Values of this type must match one of the Nmtoken tokens in
5905  * the declaration
5906  *
5907  * Returns: the enumeration attribute tree built while parsing
5908  */
5909 
5910 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5911 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5912     xmlChar *name;
5913     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5914 
5915     if (RAW != '(') {
5916 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5917 	return(NULL);
5918     }
5919     SHRINK;
5920     do {
5921         NEXT;
5922 	SKIP_BLANKS;
5923         name = xmlParseNmtoken(ctxt);
5924 	if (name == NULL) {
5925 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5926 	    return(ret);
5927 	}
5928 	tmp = ret;
5929 	while (tmp != NULL) {
5930 	    if (xmlStrEqual(name, tmp->name)) {
5931 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5932 	  "standalone: attribute enumeration value token %s duplicated\n",
5933 				 name, NULL);
5934 		if (!xmlDictOwns(ctxt->dict, name))
5935 		    xmlFree(name);
5936 		break;
5937 	    }
5938 	    tmp = tmp->next;
5939 	}
5940 	if (tmp == NULL) {
5941 	    cur = xmlCreateEnumeration(name);
5942 	    if (!xmlDictOwns(ctxt->dict, name))
5943 		xmlFree(name);
5944 	    if (cur == NULL) {
5945                 xmlFreeEnumeration(ret);
5946                 return(NULL);
5947             }
5948 	    if (last == NULL) ret = last = cur;
5949 	    else {
5950 		last->next = cur;
5951 		last = cur;
5952 	    }
5953 	}
5954 	SKIP_BLANKS;
5955     } while (RAW == '|');
5956     if (RAW != ')') {
5957 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5958 	return(ret);
5959     }
5960     NEXT;
5961     return(ret);
5962 }
5963 
5964 /**
5965  * xmlParseEnumeratedType:
5966  * @ctxt:  an XML parser context
5967  * @tree:  the enumeration tree built while parsing
5968  *
5969  * parse an Enumerated attribute type.
5970  *
5971  * [57] EnumeratedType ::= NotationType | Enumeration
5972  *
5973  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5974  *
5975  *
5976  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5977  */
5978 
5979 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5980 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5981     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5982 	SKIP(8);
5983 	if (!IS_BLANK_CH(CUR)) {
5984 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5985 			   "Space required after 'NOTATION'\n");
5986 	    return(0);
5987 	}
5988         SKIP_BLANKS;
5989 	*tree = xmlParseNotationType(ctxt);
5990 	if (*tree == NULL) return(0);
5991 	return(XML_ATTRIBUTE_NOTATION);
5992     }
5993     *tree = xmlParseEnumerationType(ctxt);
5994     if (*tree == NULL) return(0);
5995     return(XML_ATTRIBUTE_ENUMERATION);
5996 }
5997 
5998 /**
5999  * xmlParseAttributeType:
6000  * @ctxt:  an XML parser context
6001  * @tree:  the enumeration tree built while parsing
6002  *
6003  * parse the Attribute list def for an element
6004  *
6005  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6006  *
6007  * [55] StringType ::= 'CDATA'
6008  *
6009  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6010  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6011  *
6012  * Validity constraints for attribute values syntax are checked in
6013  * xmlValidateAttributeValue()
6014  *
6015  * [ VC: ID ]
6016  * Values of type ID must match the Name production. A name must not
6017  * appear more than once in an XML document as a value of this type;
6018  * i.e., ID values must uniquely identify the elements which bear them.
6019  *
6020  * [ VC: One ID per Element Type ]
6021  * No element type may have more than one ID attribute specified.
6022  *
6023  * [ VC: ID Attribute Default ]
6024  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6025  *
6026  * [ VC: IDREF ]
6027  * Values of type IDREF must match the Name production, and values
6028  * of type IDREFS must match Names; each IDREF Name must match the value
6029  * of an ID attribute on some element in the XML document; i.e. IDREF
6030  * values must match the value of some ID attribute.
6031  *
6032  * [ VC: Entity Name ]
6033  * Values of type ENTITY must match the Name production, values
6034  * of type ENTITIES must match Names; each Entity Name must match the
6035  * name of an unparsed entity declared in the DTD.
6036  *
6037  * [ VC: Name Token ]
6038  * Values of type NMTOKEN must match the Nmtoken production; values
6039  * of type NMTOKENS must match Nmtokens.
6040  *
6041  * Returns the attribute type
6042  */
6043 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6044 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6045     SHRINK;
6046     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6047 	SKIP(5);
6048 	return(XML_ATTRIBUTE_CDATA);
6049      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6050 	SKIP(6);
6051 	return(XML_ATTRIBUTE_IDREFS);
6052      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6053 	SKIP(5);
6054 	return(XML_ATTRIBUTE_IDREF);
6055      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6056         SKIP(2);
6057 	return(XML_ATTRIBUTE_ID);
6058      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6059 	SKIP(6);
6060 	return(XML_ATTRIBUTE_ENTITY);
6061      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6062 	SKIP(8);
6063 	return(XML_ATTRIBUTE_ENTITIES);
6064      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6065 	SKIP(8);
6066 	return(XML_ATTRIBUTE_NMTOKENS);
6067      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6068 	SKIP(7);
6069 	return(XML_ATTRIBUTE_NMTOKEN);
6070      }
6071      return(xmlParseEnumeratedType(ctxt, tree));
6072 }
6073 
6074 /**
6075  * xmlParseAttributeListDecl:
6076  * @ctxt:  an XML parser context
6077  *
6078  * : parse the Attribute list def for an element
6079  *
6080  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6081  *
6082  * [53] AttDef ::= S Name S AttType S DefaultDecl
6083  *
6084  */
6085 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6086 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6087     const xmlChar *elemName;
6088     const xmlChar *attrName;
6089     xmlEnumerationPtr tree;
6090 
6091     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6092 	xmlParserInputPtr input = ctxt->input;
6093 
6094 	SKIP(9);
6095 	if (!IS_BLANK_CH(CUR)) {
6096 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6097 		                 "Space required after '<!ATTLIST'\n");
6098 	}
6099         SKIP_BLANKS;
6100         elemName = xmlParseName(ctxt);
6101 	if (elemName == NULL) {
6102 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6103 			   "ATTLIST: no name for Element\n");
6104 	    return;
6105 	}
6106 	SKIP_BLANKS;
6107 	GROW;
6108 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6109 	    const xmlChar *check = CUR_PTR;
6110 	    int type;
6111 	    int def;
6112 	    xmlChar *defaultValue = NULL;
6113 
6114 	    GROW;
6115             tree = NULL;
6116 	    attrName = xmlParseName(ctxt);
6117 	    if (attrName == NULL) {
6118 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6119 			       "ATTLIST: no name for Attribute\n");
6120 		break;
6121 	    }
6122 	    GROW;
6123 	    if (!IS_BLANK_CH(CUR)) {
6124 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6125 		        "Space required after the attribute name\n");
6126 		break;
6127 	    }
6128 	    SKIP_BLANKS;
6129 
6130 	    type = xmlParseAttributeType(ctxt, &tree);
6131 	    if (type <= 0) {
6132 	        break;
6133 	    }
6134 
6135 	    GROW;
6136 	    if (!IS_BLANK_CH(CUR)) {
6137 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6138 			       "Space required after the attribute type\n");
6139 	        if (tree != NULL)
6140 		    xmlFreeEnumeration(tree);
6141 		break;
6142 	    }
6143 	    SKIP_BLANKS;
6144 
6145 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6146 	    if (def <= 0) {
6147                 if (defaultValue != NULL)
6148 		    xmlFree(defaultValue);
6149 	        if (tree != NULL)
6150 		    xmlFreeEnumeration(tree);
6151 	        break;
6152 	    }
6153 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6154 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6155 
6156 	    GROW;
6157             if (RAW != '>') {
6158 		if (!IS_BLANK_CH(CUR)) {
6159 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6160 			"Space required after the attribute default value\n");
6161 		    if (defaultValue != NULL)
6162 			xmlFree(defaultValue);
6163 		    if (tree != NULL)
6164 			xmlFreeEnumeration(tree);
6165 		    break;
6166 		}
6167 		SKIP_BLANKS;
6168 	    }
6169 	    if (check == CUR_PTR) {
6170 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6171 		            "in xmlParseAttributeListDecl\n");
6172 		if (defaultValue != NULL)
6173 		    xmlFree(defaultValue);
6174 	        if (tree != NULL)
6175 		    xmlFreeEnumeration(tree);
6176 		break;
6177 	    }
6178 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6179 		(ctxt->sax->attributeDecl != NULL))
6180 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6181 	                        type, def, defaultValue, tree);
6182 	    else if (tree != NULL)
6183 		xmlFreeEnumeration(tree);
6184 
6185 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6186 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6187 		(def != XML_ATTRIBUTE_REQUIRED)) {
6188 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6189 	    }
6190 	    if (ctxt->sax2) {
6191 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6192 	    }
6193 	    if (defaultValue != NULL)
6194 	        xmlFree(defaultValue);
6195 	    GROW;
6196 	}
6197 	if (RAW == '>') {
6198 	    if (input != ctxt->input) {
6199 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6200     "Attribute list declaration doesn't start and stop in the same entity\n",
6201                                  NULL, NULL);
6202 	    }
6203 	    NEXT;
6204 	}
6205     }
6206 }
6207 
6208 /**
6209  * xmlParseElementMixedContentDecl:
6210  * @ctxt:  an XML parser context
6211  * @inputchk:  the input used for the current entity, needed for boundary checks
6212  *
6213  * parse the declaration for a Mixed Element content
6214  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6215  *
6216  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6217  *                '(' S? '#PCDATA' S? ')'
6218  *
6219  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6220  *
6221  * [ VC: No Duplicate Types ]
6222  * The same name must not appear more than once in a single
6223  * mixed-content declaration.
6224  *
6225  * returns: the list of the xmlElementContentPtr describing the element choices
6226  */
6227 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6228 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6229     xmlElementContentPtr ret = NULL, cur = NULL, n;
6230     const xmlChar *elem = NULL;
6231 
6232     GROW;
6233     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6234 	SKIP(7);
6235 	SKIP_BLANKS;
6236 	SHRINK;
6237 	if (RAW == ')') {
6238 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6239 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6240 "Element content declaration doesn't start and stop in the same entity\n",
6241                                  NULL, NULL);
6242 	    }
6243 	    NEXT;
6244 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6245 	    if (ret == NULL)
6246 	        return(NULL);
6247 	    if (RAW == '*') {
6248 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6249 		NEXT;
6250 	    }
6251 	    return(ret);
6252 	}
6253 	if ((RAW == '(') || (RAW == '|')) {
6254 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6255 	    if (ret == NULL) return(NULL);
6256 	}
6257 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6258 	    NEXT;
6259 	    if (elem == NULL) {
6260 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6261 		if (ret == NULL) return(NULL);
6262 		ret->c1 = cur;
6263 		if (cur != NULL)
6264 		    cur->parent = ret;
6265 		cur = ret;
6266 	    } else {
6267 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6268 		if (n == NULL) return(NULL);
6269 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6270 		if (n->c1 != NULL)
6271 		    n->c1->parent = n;
6272 	        cur->c2 = n;
6273 		if (n != NULL)
6274 		    n->parent = cur;
6275 		cur = n;
6276 	    }
6277 	    SKIP_BLANKS;
6278 	    elem = xmlParseName(ctxt);
6279 	    if (elem == NULL) {
6280 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6281 			"xmlParseElementMixedContentDecl : Name expected\n");
6282 		xmlFreeDocElementContent(ctxt->myDoc, cur);
6283 		return(NULL);
6284 	    }
6285 	    SKIP_BLANKS;
6286 	    GROW;
6287 	}
6288 	if ((RAW == ')') && (NXT(1) == '*')) {
6289 	    if (elem != NULL) {
6290 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6291 		                               XML_ELEMENT_CONTENT_ELEMENT);
6292 		if (cur->c2 != NULL)
6293 		    cur->c2->parent = cur;
6294             }
6295             if (ret != NULL)
6296                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6297 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6298 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6299 "Element content declaration doesn't start and stop in the same entity\n",
6300 				 NULL, NULL);
6301 	    }
6302 	    SKIP(2);
6303 	} else {
6304 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6305 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6306 	    return(NULL);
6307 	}
6308 
6309     } else {
6310 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6311     }
6312     return(ret);
6313 }
6314 
6315 /**
6316  * xmlParseElementChildrenContentDeclPriv:
6317  * @ctxt:  an XML parser context
6318  * @inputchk:  the input used for the current entity, needed for boundary checks
6319  * @depth: the level of recursion
6320  *
6321  * parse the declaration for a Mixed Element content
6322  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6323  *
6324  *
6325  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6326  *
6327  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6328  *
6329  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6330  *
6331  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6332  *
6333  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6334  * TODO Parameter-entity replacement text must be properly nested
6335  *	with parenthesized groups. That is to say, if either of the
6336  *	opening or closing parentheses in a choice, seq, or Mixed
6337  *	construct is contained in the replacement text for a parameter
6338  *	entity, both must be contained in the same replacement text. For
6339  *	interoperability, if a parameter-entity reference appears in a
6340  *	choice, seq, or Mixed construct, its replacement text should not
6341  *	be empty, and neither the first nor last non-blank character of
6342  *	the replacement text should be a connector (| or ,).
6343  *
6344  * Returns the tree of xmlElementContentPtr describing the element
6345  *          hierarchy.
6346  */
6347 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6348 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6349                                        int depth) {
6350     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6351     const xmlChar *elem;
6352     xmlChar type = 0;
6353 
6354     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6355         (depth >  2048)) {
6356         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6357 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6358                           depth);
6359 	return(NULL);
6360     }
6361     SKIP_BLANKS;
6362     GROW;
6363     if (RAW == '(') {
6364 	int inputid = ctxt->input->id;
6365 
6366         /* Recurse on first child */
6367 	NEXT;
6368 	SKIP_BLANKS;
6369         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6370                                                            depth + 1);
6371 	SKIP_BLANKS;
6372 	GROW;
6373     } else {
6374 	elem = xmlParseName(ctxt);
6375 	if (elem == NULL) {
6376 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6377 	    return(NULL);
6378 	}
6379         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6380 	if (cur == NULL) {
6381 	    xmlErrMemory(ctxt, NULL);
6382 	    return(NULL);
6383 	}
6384 	GROW;
6385 	if (RAW == '?') {
6386 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6387 	    NEXT;
6388 	} else if (RAW == '*') {
6389 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6390 	    NEXT;
6391 	} else if (RAW == '+') {
6392 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6393 	    NEXT;
6394 	} else {
6395 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6396 	}
6397 	GROW;
6398     }
6399     SKIP_BLANKS;
6400     SHRINK;
6401     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6402         /*
6403 	 * Each loop we parse one separator and one element.
6404 	 */
6405         if (RAW == ',') {
6406 	    if (type == 0) type = CUR;
6407 
6408 	    /*
6409 	     * Detect "Name | Name , Name" error
6410 	     */
6411 	    else if (type != CUR) {
6412 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6413 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6414 		                  type);
6415 		if ((last != NULL) && (last != ret))
6416 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6417 		if (ret != NULL)
6418 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419 		return(NULL);
6420 	    }
6421 	    NEXT;
6422 
6423 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6424 	    if (op == NULL) {
6425 		if ((last != NULL) && (last != ret))
6426 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6427 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6428 		return(NULL);
6429 	    }
6430 	    if (last == NULL) {
6431 		op->c1 = ret;
6432 		if (ret != NULL)
6433 		    ret->parent = op;
6434 		ret = cur = op;
6435 	    } else {
6436 	        cur->c2 = op;
6437 		if (op != NULL)
6438 		    op->parent = cur;
6439 		op->c1 = last;
6440 		if (last != NULL)
6441 		    last->parent = op;
6442 		cur =op;
6443 		last = NULL;
6444 	    }
6445 	} else if (RAW == '|') {
6446 	    if (type == 0) type = CUR;
6447 
6448 	    /*
6449 	     * Detect "Name , Name | Name" error
6450 	     */
6451 	    else if (type != CUR) {
6452 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6453 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6454 				  type);
6455 		if ((last != NULL) && (last != ret))
6456 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6457 		if (ret != NULL)
6458 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6459 		return(NULL);
6460 	    }
6461 	    NEXT;
6462 
6463 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6464 	    if (op == NULL) {
6465 		if ((last != NULL) && (last != ret))
6466 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6467 		if (ret != NULL)
6468 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6469 		return(NULL);
6470 	    }
6471 	    if (last == NULL) {
6472 		op->c1 = ret;
6473 		if (ret != NULL)
6474 		    ret->parent = op;
6475 		ret = cur = op;
6476 	    } else {
6477 	        cur->c2 = op;
6478 		if (op != NULL)
6479 		    op->parent = cur;
6480 		op->c1 = last;
6481 		if (last != NULL)
6482 		    last->parent = op;
6483 		cur =op;
6484 		last = NULL;
6485 	    }
6486 	} else {
6487 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6488 	    if ((last != NULL) && (last != ret))
6489 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6490 	    if (ret != NULL)
6491 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6492 	    return(NULL);
6493 	}
6494 	GROW;
6495 	SKIP_BLANKS;
6496 	GROW;
6497 	if (RAW == '(') {
6498 	    int inputid = ctxt->input->id;
6499 	    /* Recurse on second child */
6500 	    NEXT;
6501 	    SKIP_BLANKS;
6502 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6503                                                           depth + 1);
6504 	    SKIP_BLANKS;
6505 	} else {
6506 	    elem = xmlParseName(ctxt);
6507 	    if (elem == NULL) {
6508 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6509 		if (ret != NULL)
6510 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6511 		return(NULL);
6512 	    }
6513 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6514 	    if (last == NULL) {
6515 		if (ret != NULL)
6516 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6517 		return(NULL);
6518 	    }
6519 	    if (RAW == '?') {
6520 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6521 		NEXT;
6522 	    } else if (RAW == '*') {
6523 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6524 		NEXT;
6525 	    } else if (RAW == '+') {
6526 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6527 		NEXT;
6528 	    } else {
6529 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6530 	    }
6531 	}
6532 	SKIP_BLANKS;
6533 	GROW;
6534     }
6535     if ((cur != NULL) && (last != NULL)) {
6536         cur->c2 = last;
6537 	if (last != NULL)
6538 	    last->parent = cur;
6539     }
6540     if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6541 	xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6542 "Element content declaration doesn't start and stop in the same entity\n",
6543 			 NULL, NULL);
6544     }
6545     NEXT;
6546     if (RAW == '?') {
6547 	if (ret != NULL) {
6548 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6549 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6550 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6551 	    else
6552 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6553 	}
6554 	NEXT;
6555     } else if (RAW == '*') {
6556 	if (ret != NULL) {
6557 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6558 	    cur = ret;
6559 	    /*
6560 	     * Some normalization:
6561 	     * (a | b* | c?)* == (a | b | c)*
6562 	     */
6563 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6564 		if ((cur->c1 != NULL) &&
6565 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6566 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6567 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6568 		if ((cur->c2 != NULL) &&
6569 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6570 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6571 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6572 		cur = cur->c2;
6573 	    }
6574 	}
6575 	NEXT;
6576     } else if (RAW == '+') {
6577 	if (ret != NULL) {
6578 	    int found = 0;
6579 
6580 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6581 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6582 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6583 	    else
6584 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6585 	    /*
6586 	     * Some normalization:
6587 	     * (a | b*)+ == (a | b)*
6588 	     * (a | b?)+ == (a | b)*
6589 	     */
6590 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6591 		if ((cur->c1 != NULL) &&
6592 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6593 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6594 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6595 		    found = 1;
6596 		}
6597 		if ((cur->c2 != NULL) &&
6598 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6599 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6600 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6601 		    found = 1;
6602 		}
6603 		cur = cur->c2;
6604 	    }
6605 	    if (found)
6606 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6607 	}
6608 	NEXT;
6609     }
6610     return(ret);
6611 }
6612 
6613 /**
6614  * xmlParseElementChildrenContentDecl:
6615  * @ctxt:  an XML parser context
6616  * @inputchk:  the input used for the current entity, needed for boundary checks
6617  *
6618  * parse the declaration for a Mixed Element content
6619  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6620  *
6621  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6622  *
6623  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6624  *
6625  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6626  *
6627  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6628  *
6629  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6630  * TODO Parameter-entity replacement text must be properly nested
6631  *	with parenthesized groups. That is to say, if either of the
6632  *	opening or closing parentheses in a choice, seq, or Mixed
6633  *	construct is contained in the replacement text for a parameter
6634  *	entity, both must be contained in the same replacement text. For
6635  *	interoperability, if a parameter-entity reference appears in a
6636  *	choice, seq, or Mixed construct, its replacement text should not
6637  *	be empty, and neither the first nor last non-blank character of
6638  *	the replacement text should be a connector (| or ,).
6639  *
6640  * Returns the tree of xmlElementContentPtr describing the element
6641  *          hierarchy.
6642  */
6643 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6644 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6645     /* stub left for API/ABI compat */
6646     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6647 }
6648 
6649 /**
6650  * xmlParseElementContentDecl:
6651  * @ctxt:  an XML parser context
6652  * @name:  the name of the element being defined.
6653  * @result:  the Element Content pointer will be stored here if any
6654  *
6655  * parse the declaration for an Element content either Mixed or Children,
6656  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6657  *
6658  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6659  *
6660  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6661  */
6662 
6663 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6664 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6665                            xmlElementContentPtr *result) {
6666 
6667     xmlElementContentPtr tree = NULL;
6668     int inputid = ctxt->input->id;
6669     int res;
6670 
6671     *result = NULL;
6672 
6673     if (RAW != '(') {
6674 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6675 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6676 	return(-1);
6677     }
6678     NEXT;
6679     GROW;
6680     if (ctxt->instate == XML_PARSER_EOF)
6681         return(-1);
6682     SKIP_BLANKS;
6683     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6684         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6685 	res = XML_ELEMENT_TYPE_MIXED;
6686     } else {
6687         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6688 	res = XML_ELEMENT_TYPE_ELEMENT;
6689     }
6690     SKIP_BLANKS;
6691     *result = tree;
6692     return(res);
6693 }
6694 
6695 /**
6696  * xmlParseElementDecl:
6697  * @ctxt:  an XML parser context
6698  *
6699  * parse an Element declaration.
6700  *
6701  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6702  *
6703  * [ VC: Unique Element Type Declaration ]
6704  * No element type may be declared more than once
6705  *
6706  * Returns the type of the element, or -1 in case of error
6707  */
6708 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6709 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6710     const xmlChar *name;
6711     int ret = -1;
6712     xmlElementContentPtr content  = NULL;
6713 
6714     /* GROW; done in the caller */
6715     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6716 	xmlParserInputPtr input = ctxt->input;
6717 
6718 	SKIP(9);
6719 	if (!IS_BLANK_CH(CUR)) {
6720 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6721 		           "Space required after 'ELEMENT'\n");
6722 	    return(-1);
6723 	}
6724         SKIP_BLANKS;
6725         name = xmlParseName(ctxt);
6726 	if (name == NULL) {
6727 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6728 			   "xmlParseElementDecl: no name for Element\n");
6729 	    return(-1);
6730 	}
6731 	while ((RAW == 0) && (ctxt->inputNr > 1))
6732 	    xmlPopInput(ctxt);
6733 	if (!IS_BLANK_CH(CUR)) {
6734 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6735 			   "Space required after the element name\n");
6736 	}
6737         SKIP_BLANKS;
6738 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6739 	    SKIP(5);
6740 	    /*
6741 	     * Element must always be empty.
6742 	     */
6743 	    ret = XML_ELEMENT_TYPE_EMPTY;
6744 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6745 	           (NXT(2) == 'Y')) {
6746 	    SKIP(3);
6747 	    /*
6748 	     * Element is a generic container.
6749 	     */
6750 	    ret = XML_ELEMENT_TYPE_ANY;
6751 	} else if (RAW == '(') {
6752 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6753 	} else {
6754 	    /*
6755 	     * [ WFC: PEs in Internal Subset ] error handling.
6756 	     */
6757 	    if ((RAW == '%') && (ctxt->external == 0) &&
6758 	        (ctxt->inputNr == 1)) {
6759 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6760 	  "PEReference: forbidden within markup decl in internal subset\n");
6761 	    } else {
6762 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6763 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6764             }
6765 	    return(-1);
6766 	}
6767 
6768 	SKIP_BLANKS;
6769 	/*
6770 	 * Pop-up of finished entities.
6771 	 */
6772 	while ((RAW == 0) && (ctxt->inputNr > 1))
6773 	    xmlPopInput(ctxt);
6774 	SKIP_BLANKS;
6775 
6776 	if (RAW != '>') {
6777 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6778 	    if (content != NULL) {
6779 		xmlFreeDocElementContent(ctxt->myDoc, content);
6780 	    }
6781 	} else {
6782 	    if (input != ctxt->input) {
6783 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6784     "Element declaration doesn't start and stop in the same entity\n");
6785 	    }
6786 
6787 	    NEXT;
6788 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6789 		(ctxt->sax->elementDecl != NULL)) {
6790 		if (content != NULL)
6791 		    content->parent = NULL;
6792 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6793 		                       content);
6794 		if ((content != NULL) && (content->parent == NULL)) {
6795 		    /*
6796 		     * this is a trick: if xmlAddElementDecl is called,
6797 		     * instead of copying the full tree it is plugged directly
6798 		     * if called from the parser. Avoid duplicating the
6799 		     * interfaces or change the API/ABI
6800 		     */
6801 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6802 		}
6803 	    } else if (content != NULL) {
6804 		xmlFreeDocElementContent(ctxt->myDoc, content);
6805 	    }
6806 	}
6807     }
6808     return(ret);
6809 }
6810 
6811 /**
6812  * xmlParseConditionalSections
6813  * @ctxt:  an XML parser context
6814  *
6815  * [61] conditionalSect ::= includeSect | ignoreSect
6816  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6817  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6818  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6819  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6820  */
6821 
6822 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6823 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6824     int id = ctxt->input->id;
6825 
6826     SKIP(3);
6827     SKIP_BLANKS;
6828     if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6829 	SKIP(7);
6830 	SKIP_BLANKS;
6831 	if (RAW != '[') {
6832 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6833 	    xmlHaltParser(ctxt);
6834 	    return;
6835 	} else {
6836 	    if (ctxt->input->id != id) {
6837 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6838 	    "All markup of the conditional section is not in the same entity\n",
6839 				     NULL, NULL);
6840 	    }
6841 	    NEXT;
6842 	}
6843 	if (xmlParserDebugEntities) {
6844 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6845 		xmlGenericError(xmlGenericErrorContext,
6846 			"%s(%d): ", ctxt->input->filename,
6847 			ctxt->input->line);
6848 	    xmlGenericError(xmlGenericErrorContext,
6849 		    "Entering INCLUDE Conditional Section\n");
6850 	}
6851 
6852 	while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6853 	        (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6854 	    const xmlChar *check = CUR_PTR;
6855 	    unsigned int cons = ctxt->input->consumed;
6856 
6857 	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6858 		xmlParseConditionalSections(ctxt);
6859 	    } else if (IS_BLANK_CH(CUR)) {
6860 		NEXT;
6861 	    } else if (RAW == '%') {
6862 		xmlParsePEReference(ctxt);
6863 	    } else
6864 		xmlParseMarkupDecl(ctxt);
6865 
6866 	    /*
6867 	     * Pop-up of finished entities.
6868 	     */
6869 	    while ((RAW == 0) && (ctxt->inputNr > 1))
6870 		xmlPopInput(ctxt);
6871 
6872 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6873 		xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6874 		xmlHaltParser(ctxt);
6875 		break;
6876 	    }
6877 	}
6878 	if (xmlParserDebugEntities) {
6879 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6880 		xmlGenericError(xmlGenericErrorContext,
6881 			"%s(%d): ", ctxt->input->filename,
6882 			ctxt->input->line);
6883 	    xmlGenericError(xmlGenericErrorContext,
6884 		    "Leaving INCLUDE Conditional Section\n");
6885 	}
6886 
6887     } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6888 	int state;
6889 	xmlParserInputState instate;
6890 	int depth = 0;
6891 
6892 	SKIP(6);
6893 	SKIP_BLANKS;
6894 	if (RAW != '[') {
6895 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6896 	    xmlHaltParser(ctxt);
6897 	    return;
6898 	} else {
6899 	    if (ctxt->input->id != id) {
6900 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6901 	    "All markup of the conditional section is not in the same entity\n",
6902 				     NULL, NULL);
6903 	    }
6904 	    NEXT;
6905 	}
6906 	if (xmlParserDebugEntities) {
6907 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6908 		xmlGenericError(xmlGenericErrorContext,
6909 			"%s(%d): ", ctxt->input->filename,
6910 			ctxt->input->line);
6911 	    xmlGenericError(xmlGenericErrorContext,
6912 		    "Entering IGNORE Conditional Section\n");
6913 	}
6914 
6915 	/*
6916 	 * Parse up to the end of the conditional section
6917 	 * But disable SAX event generating DTD building in the meantime
6918 	 */
6919 	state = ctxt->disableSAX;
6920 	instate = ctxt->instate;
6921 	if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6922 	ctxt->instate = XML_PARSER_IGNORE;
6923 
6924 	while (((depth >= 0) && (RAW != 0)) &&
6925                (ctxt->instate != XML_PARSER_EOF)) {
6926 	  if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6927 	    depth++;
6928 	    SKIP(3);
6929 	    continue;
6930 	  }
6931 	  if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6932 	    if (--depth >= 0) SKIP(3);
6933 	    continue;
6934 	  }
6935 	  NEXT;
6936 	  continue;
6937 	}
6938 
6939 	ctxt->disableSAX = state;
6940 	ctxt->instate = instate;
6941 
6942 	if (xmlParserDebugEntities) {
6943 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6944 		xmlGenericError(xmlGenericErrorContext,
6945 			"%s(%d): ", ctxt->input->filename,
6946 			ctxt->input->line);
6947 	    xmlGenericError(xmlGenericErrorContext,
6948 		    "Leaving IGNORE Conditional Section\n");
6949 	}
6950 
6951     } else {
6952 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6953 	xmlHaltParser(ctxt);
6954 	return;
6955     }
6956 
6957     if (RAW == 0)
6958         SHRINK;
6959 
6960     if (RAW == 0) {
6961 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6962     } else {
6963 	if (ctxt->input->id != id) {
6964 	    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6965 	"All markup of the conditional section is not in the same entity\n",
6966 				 NULL, NULL);
6967 	}
6968 	if ((ctxt-> instate != XML_PARSER_EOF) &&
6969 	    ((ctxt->input->cur + 3) <= ctxt->input->end))
6970 	    SKIP(3);
6971     }
6972 }
6973 
6974 /**
6975  * xmlParseMarkupDecl:
6976  * @ctxt:  an XML parser context
6977  *
6978  * parse Markup declarations
6979  *
6980  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6981  *                     NotationDecl | PI | Comment
6982  *
6983  * [ VC: Proper Declaration/PE Nesting ]
6984  * Parameter-entity replacement text must be properly nested with
6985  * markup declarations. That is to say, if either the first character
6986  * or the last character of a markup declaration (markupdecl above) is
6987  * contained in the replacement text for a parameter-entity reference,
6988  * both must be contained in the same replacement text.
6989  *
6990  * [ WFC: PEs in Internal Subset ]
6991  * In the internal DTD subset, parameter-entity references can occur
6992  * only where markup declarations can occur, not within markup declarations.
6993  * (This does not apply to references that occur in external parameter
6994  * entities or to the external subset.)
6995  */
6996 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6997 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6998     GROW;
6999     if (CUR == '<') {
7000         if (NXT(1) == '!') {
7001 	    switch (NXT(2)) {
7002 	        case 'E':
7003 		    if (NXT(3) == 'L')
7004 			xmlParseElementDecl(ctxt);
7005 		    else if (NXT(3) == 'N')
7006 			xmlParseEntityDecl(ctxt);
7007 		    break;
7008 	        case 'A':
7009 		    xmlParseAttributeListDecl(ctxt);
7010 		    break;
7011 	        case 'N':
7012 		    xmlParseNotationDecl(ctxt);
7013 		    break;
7014 	        case '-':
7015 		    xmlParseComment(ctxt);
7016 		    break;
7017 		default:
7018 		    /* there is an error but it will be detected later */
7019 		    break;
7020 	    }
7021 	} else if (NXT(1) == '?') {
7022 	    xmlParsePI(ctxt);
7023 	}
7024     }
7025 
7026     /*
7027      * detect requirement to exit there and act accordingly
7028      * and avoid having instate overriden later on
7029      */
7030     if (ctxt->instate == XML_PARSER_EOF)
7031         return;
7032 
7033     /*
7034      * This is only for internal subset. On external entities,
7035      * the replacement is done before parsing stage
7036      */
7037     if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7038 	xmlParsePEReference(ctxt);
7039 
7040     /*
7041      * Conditional sections are allowed from entities included
7042      * by PE References in the internal subset.
7043      */
7044     if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7045         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7046 	    xmlParseConditionalSections(ctxt);
7047 	}
7048     }
7049 
7050     ctxt->instate = XML_PARSER_DTD;
7051 }
7052 
7053 /**
7054  * xmlParseTextDecl:
7055  * @ctxt:  an XML parser context
7056  *
7057  * parse an XML declaration header for external entities
7058  *
7059  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7060  */
7061 
7062 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7063 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7064     xmlChar *version;
7065     const xmlChar *encoding;
7066 
7067     /*
7068      * We know that '<?xml' is here.
7069      */
7070     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7071 	SKIP(5);
7072     } else {
7073 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7074 	return;
7075     }
7076 
7077     if (!IS_BLANK_CH(CUR)) {
7078 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7079 		       "Space needed after '<?xml'\n");
7080     }
7081     SKIP_BLANKS;
7082 
7083     /*
7084      * We may have the VersionInfo here.
7085      */
7086     version = xmlParseVersionInfo(ctxt);
7087     if (version == NULL)
7088 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
7089     else {
7090 	if (!IS_BLANK_CH(CUR)) {
7091 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7092 		           "Space needed here\n");
7093 	}
7094     }
7095     ctxt->input->version = version;
7096 
7097     /*
7098      * We must have the encoding declaration
7099      */
7100     encoding = xmlParseEncodingDecl(ctxt);
7101     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7102 	/*
7103 	 * The XML REC instructs us to stop parsing right here
7104 	 */
7105         return;
7106     }
7107     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7108 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7109 		       "Missing encoding in text declaration\n");
7110     }
7111 
7112     SKIP_BLANKS;
7113     if ((RAW == '?') && (NXT(1) == '>')) {
7114         SKIP(2);
7115     } else if (RAW == '>') {
7116         /* Deprecated old WD ... */
7117 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7118 	NEXT;
7119     } else {
7120 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7121 	MOVETO_ENDTAG(CUR_PTR);
7122 	NEXT;
7123     }
7124 }
7125 
7126 /**
7127  * xmlParseExternalSubset:
7128  * @ctxt:  an XML parser context
7129  * @ExternalID: the external identifier
7130  * @SystemID: the system identifier (or URL)
7131  *
7132  * parse Markup declarations from an external subset
7133  *
7134  * [30] extSubset ::= textDecl? extSubsetDecl
7135  *
7136  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7137  */
7138 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7139 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7140                        const xmlChar *SystemID) {
7141     xmlDetectSAX2(ctxt);
7142     GROW;
7143 
7144     if ((ctxt->encoding == NULL) &&
7145         (ctxt->input->end - ctxt->input->cur >= 4)) {
7146         xmlChar start[4];
7147 	xmlCharEncoding enc;
7148 
7149 	start[0] = RAW;
7150 	start[1] = NXT(1);
7151 	start[2] = NXT(2);
7152 	start[3] = NXT(3);
7153 	enc = xmlDetectCharEncoding(start, 4);
7154 	if (enc != XML_CHAR_ENCODING_NONE)
7155 	    xmlSwitchEncoding(ctxt, enc);
7156     }
7157 
7158     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7159 	xmlParseTextDecl(ctxt);
7160 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7161 	    /*
7162 	     * The XML REC instructs us to stop parsing right here
7163 	     */
7164 	    xmlHaltParser(ctxt);
7165 	    return;
7166 	}
7167     }
7168     if (ctxt->myDoc == NULL) {
7169         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7170 	if (ctxt->myDoc == NULL) {
7171 	    xmlErrMemory(ctxt, "New Doc failed");
7172 	    return;
7173 	}
7174 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7175     }
7176     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7177         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7178 
7179     ctxt->instate = XML_PARSER_DTD;
7180     ctxt->external = 1;
7181     while (((RAW == '<') && (NXT(1) == '?')) ||
7182            ((RAW == '<') && (NXT(1) == '!')) ||
7183 	   (RAW == '%') || IS_BLANK_CH(CUR)) {
7184 	const xmlChar *check = CUR_PTR;
7185 	unsigned int cons = ctxt->input->consumed;
7186 
7187 	GROW;
7188         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7189 	    xmlParseConditionalSections(ctxt);
7190 	} else if (IS_BLANK_CH(CUR)) {
7191 	    NEXT;
7192 	} else if (RAW == '%') {
7193             xmlParsePEReference(ctxt);
7194 	} else
7195 	    xmlParseMarkupDecl(ctxt);
7196 
7197 	/*
7198 	 * Pop-up of finished entities.
7199 	 */
7200 	while ((RAW == 0) && (ctxt->inputNr > 1))
7201 	    xmlPopInput(ctxt);
7202 
7203 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7204 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7205 	    break;
7206 	}
7207     }
7208 
7209     if (RAW != 0) {
7210 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7211     }
7212 
7213 }
7214 
7215 /**
7216  * xmlParseReference:
7217  * @ctxt:  an XML parser context
7218  *
7219  * parse and handle entity references in content, depending on the SAX
7220  * interface, this may end-up in a call to character() if this is a
7221  * CharRef, a predefined entity, if there is no reference() callback.
7222  * or if the parser was asked to switch to that mode.
7223  *
7224  * [67] Reference ::= EntityRef | CharRef
7225  */
7226 void
xmlParseReference(xmlParserCtxtPtr ctxt)7227 xmlParseReference(xmlParserCtxtPtr ctxt) {
7228     xmlEntityPtr ent;
7229     xmlChar *val;
7230     int was_checked;
7231     xmlNodePtr list = NULL;
7232     xmlParserErrors ret = XML_ERR_OK;
7233 
7234 
7235     if (RAW != '&')
7236         return;
7237 
7238     /*
7239      * Simple case of a CharRef
7240      */
7241     if (NXT(1) == '#') {
7242 	int i = 0;
7243 	xmlChar out[10];
7244 	int hex = NXT(2);
7245 	int value = xmlParseCharRef(ctxt);
7246 
7247 	if (value == 0)
7248 	    return;
7249 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7250 	    /*
7251 	     * So we are using non-UTF-8 buffers
7252 	     * Check that the char fit on 8bits, if not
7253 	     * generate a CharRef.
7254 	     */
7255 	    if (value <= 0xFF) {
7256 		out[0] = value;
7257 		out[1] = 0;
7258 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7259 		    (!ctxt->disableSAX))
7260 		    ctxt->sax->characters(ctxt->userData, out, 1);
7261 	    } else {
7262 		if ((hex == 'x') || (hex == 'X'))
7263 		    snprintf((char *)out, sizeof(out), "#x%X", value);
7264 		else
7265 		    snprintf((char *)out, sizeof(out), "#%d", value);
7266 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7267 		    (!ctxt->disableSAX))
7268 		    ctxt->sax->reference(ctxt->userData, out);
7269 	    }
7270 	} else {
7271 	    /*
7272 	     * Just encode the value in UTF-8
7273 	     */
7274 	    COPY_BUF(0 ,out, i, value);
7275 	    out[i] = 0;
7276 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7277 		(!ctxt->disableSAX))
7278 		ctxt->sax->characters(ctxt->userData, out, i);
7279 	}
7280 	return;
7281     }
7282 
7283     /*
7284      * We are seeing an entity reference
7285      */
7286     ent = xmlParseEntityRef(ctxt);
7287     if (ent == NULL) return;
7288     if (!ctxt->wellFormed)
7289 	return;
7290     was_checked = ent->checked;
7291 
7292     /* special case of predefined entities */
7293     if ((ent->name == NULL) ||
7294         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7295 	val = ent->content;
7296 	if (val == NULL) return;
7297 	/*
7298 	 * inline the entity.
7299 	 */
7300 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7301 	    (!ctxt->disableSAX))
7302 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7303 	return;
7304     }
7305 
7306     /*
7307      * The first reference to the entity trigger a parsing phase
7308      * where the ent->children is filled with the result from
7309      * the parsing.
7310      * Note: external parsed entities will not be loaded, it is not
7311      * required for a non-validating parser, unless the parsing option
7312      * of validating, or substituting entities were given. Doing so is
7313      * far more secure as the parser will only process data coming from
7314      * the document entity by default.
7315      */
7316     if (((ent->checked == 0) ||
7317          ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7318         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7319          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7320 	unsigned long oldnbent = ctxt->nbentities;
7321 
7322 	/*
7323 	 * This is a bit hackish but this seems the best
7324 	 * way to make sure both SAX and DOM entity support
7325 	 * behaves okay.
7326 	 */
7327 	void *user_data;
7328 	if (ctxt->userData == ctxt)
7329 	    user_data = NULL;
7330 	else
7331 	    user_data = ctxt->userData;
7332 
7333 	/*
7334 	 * Check that this entity is well formed
7335 	 * 4.3.2: An internal general parsed entity is well-formed
7336 	 * if its replacement text matches the production labeled
7337 	 * content.
7338 	 */
7339 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7340 	    ctxt->depth++;
7341 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7342 	                                              user_data, &list);
7343 	    ctxt->depth--;
7344 
7345 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7346 	    ctxt->depth++;
7347 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7348 	                                   user_data, ctxt->depth, ent->URI,
7349 					   ent->ExternalID, &list);
7350 	    ctxt->depth--;
7351 	} else {
7352 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7353 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7354 			 "invalid entity type found\n", NULL);
7355 	}
7356 
7357 	/*
7358 	 * Store the number of entities needing parsing for this entity
7359 	 * content and do checkings
7360 	 */
7361 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7362 	if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7363 	    ent->checked |= 1;
7364 	if (ret == XML_ERR_ENTITY_LOOP) {
7365 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7366 	    xmlFreeNodeList(list);
7367 	    return;
7368 	}
7369 	if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7370 	    xmlFreeNodeList(list);
7371 	    return;
7372 	}
7373 
7374 	if ((ret == XML_ERR_OK) && (list != NULL)) {
7375 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7376 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7377 		(ent->children == NULL)) {
7378 		ent->children = list;
7379 		if (ctxt->replaceEntities) {
7380 		    /*
7381 		     * Prune it directly in the generated document
7382 		     * except for single text nodes.
7383 		     */
7384 		    if (((list->type == XML_TEXT_NODE) &&
7385 			 (list->next == NULL)) ||
7386 			(ctxt->parseMode == XML_PARSE_READER)) {
7387 			list->parent = (xmlNodePtr) ent;
7388 			list = NULL;
7389 			ent->owner = 1;
7390 		    } else {
7391 			ent->owner = 0;
7392 			while (list != NULL) {
7393 			    list->parent = (xmlNodePtr) ctxt->node;
7394 			    list->doc = ctxt->myDoc;
7395 			    if (list->next == NULL)
7396 				ent->last = list;
7397 			    list = list->next;
7398 			}
7399 			list = ent->children;
7400 #ifdef LIBXML_LEGACY_ENABLED
7401 			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7402 			  xmlAddEntityReference(ent, list, NULL);
7403 #endif /* LIBXML_LEGACY_ENABLED */
7404 		    }
7405 		} else {
7406 		    ent->owner = 1;
7407 		    while (list != NULL) {
7408 			list->parent = (xmlNodePtr) ent;
7409 			xmlSetTreeDoc(list, ent->doc);
7410 			if (list->next == NULL)
7411 			    ent->last = list;
7412 			list = list->next;
7413 		    }
7414 		}
7415 	    } else {
7416 		xmlFreeNodeList(list);
7417 		list = NULL;
7418 	    }
7419 	} else if ((ret != XML_ERR_OK) &&
7420 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7421 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7422 		     "Entity '%s' failed to parse\n", ent->name);
7423 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
7424 	} else if (list != NULL) {
7425 	    xmlFreeNodeList(list);
7426 	    list = NULL;
7427 	}
7428 	if (ent->checked == 0)
7429 	    ent->checked = 2;
7430     } else if (ent->checked != 1) {
7431 	ctxt->nbentities += ent->checked / 2;
7432     }
7433 
7434     /*
7435      * Now that the entity content has been gathered
7436      * provide it to the application, this can take different forms based
7437      * on the parsing modes.
7438      */
7439     if (ent->children == NULL) {
7440 	/*
7441 	 * Probably running in SAX mode and the callbacks don't
7442 	 * build the entity content. So unless we already went
7443 	 * though parsing for first checking go though the entity
7444 	 * content to generate callbacks associated to the entity
7445 	 */
7446 	if (was_checked != 0) {
7447 	    void *user_data;
7448 	    /*
7449 	     * This is a bit hackish but this seems the best
7450 	     * way to make sure both SAX and DOM entity support
7451 	     * behaves okay.
7452 	     */
7453 	    if (ctxt->userData == ctxt)
7454 		user_data = NULL;
7455 	    else
7456 		user_data = ctxt->userData;
7457 
7458 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7459 		ctxt->depth++;
7460 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7461 				   ent->content, user_data, NULL);
7462 		ctxt->depth--;
7463 	    } else if (ent->etype ==
7464 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7465 		ctxt->depth++;
7466 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7467 			   ctxt->sax, user_data, ctxt->depth,
7468 			   ent->URI, ent->ExternalID, NULL);
7469 		ctxt->depth--;
7470 	    } else {
7471 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7472 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7473 			     "invalid entity type found\n", NULL);
7474 	    }
7475 	    if (ret == XML_ERR_ENTITY_LOOP) {
7476 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7477 		return;
7478 	    }
7479 	}
7480 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7481 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7482 	    /*
7483 	     * Entity reference callback comes second, it's somewhat
7484 	     * superfluous but a compatibility to historical behaviour
7485 	     */
7486 	    ctxt->sax->reference(ctxt->userData, ent->name);
7487 	}
7488 	return;
7489     }
7490 
7491     /*
7492      * If we didn't get any children for the entity being built
7493      */
7494     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7495 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7496 	/*
7497 	 * Create a node.
7498 	 */
7499 	ctxt->sax->reference(ctxt->userData, ent->name);
7500 	return;
7501     }
7502 
7503     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7504 	/*
7505 	 * There is a problem on the handling of _private for entities
7506 	 * (bug 155816): Should we copy the content of the field from
7507 	 * the entity (possibly overwriting some value set by the user
7508 	 * when a copy is created), should we leave it alone, or should
7509 	 * we try to take care of different situations?  The problem
7510 	 * is exacerbated by the usage of this field by the xmlReader.
7511 	 * To fix this bug, we look at _private on the created node
7512 	 * and, if it's NULL, we copy in whatever was in the entity.
7513 	 * If it's not NULL we leave it alone.  This is somewhat of a
7514 	 * hack - maybe we should have further tests to determine
7515 	 * what to do.
7516 	 */
7517 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7518 	    /*
7519 	     * Seems we are generating the DOM content, do
7520 	     * a simple tree copy for all references except the first
7521 	     * In the first occurrence list contains the replacement.
7522 	     */
7523 	    if (((list == NULL) && (ent->owner == 0)) ||
7524 		(ctxt->parseMode == XML_PARSE_READER)) {
7525 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7526 
7527 		/*
7528 		 * We are copying here, make sure there is no abuse
7529 		 */
7530 		ctxt->sizeentcopy += ent->length + 5;
7531 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7532 		    return;
7533 
7534 		/*
7535 		 * when operating on a reader, the entities definitions
7536 		 * are always owning the entities subtree.
7537 		if (ctxt->parseMode == XML_PARSE_READER)
7538 		    ent->owner = 1;
7539 		 */
7540 
7541 		cur = ent->children;
7542 		while (cur != NULL) {
7543 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7544 		    if (nw != NULL) {
7545 			if (nw->_private == NULL)
7546 			    nw->_private = cur->_private;
7547 			if (firstChild == NULL){
7548 			    firstChild = nw;
7549 			}
7550 			nw = xmlAddChild(ctxt->node, nw);
7551 		    }
7552 		    if (cur == ent->last) {
7553 			/*
7554 			 * needed to detect some strange empty
7555 			 * node cases in the reader tests
7556 			 */
7557 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7558 			    (nw != NULL) &&
7559 			    (nw->type == XML_ELEMENT_NODE) &&
7560 			    (nw->children == NULL))
7561 			    nw->extra = 1;
7562 
7563 			break;
7564 		    }
7565 		    cur = cur->next;
7566 		}
7567 #ifdef LIBXML_LEGACY_ENABLED
7568 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7569 		  xmlAddEntityReference(ent, firstChild, nw);
7570 #endif /* LIBXML_LEGACY_ENABLED */
7571 	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7572 		xmlNodePtr nw = NULL, cur, next, last,
7573 			   firstChild = NULL;
7574 
7575 		/*
7576 		 * We are copying here, make sure there is no abuse
7577 		 */
7578 		ctxt->sizeentcopy += ent->length + 5;
7579 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7580 		    return;
7581 
7582 		/*
7583 		 * Copy the entity child list and make it the new
7584 		 * entity child list. The goal is to make sure any
7585 		 * ID or REF referenced will be the one from the
7586 		 * document content and not the entity copy.
7587 		 */
7588 		cur = ent->children;
7589 		ent->children = NULL;
7590 		last = ent->last;
7591 		ent->last = NULL;
7592 		while (cur != NULL) {
7593 		    next = cur->next;
7594 		    cur->next = NULL;
7595 		    cur->parent = NULL;
7596 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7597 		    if (nw != NULL) {
7598 			if (nw->_private == NULL)
7599 			    nw->_private = cur->_private;
7600 			if (firstChild == NULL){
7601 			    firstChild = cur;
7602 			}
7603 			xmlAddChild((xmlNodePtr) ent, nw);
7604 			xmlAddChild(ctxt->node, cur);
7605 		    }
7606 		    if (cur == last)
7607 			break;
7608 		    cur = next;
7609 		}
7610 		if (ent->owner == 0)
7611 		    ent->owner = 1;
7612 #ifdef LIBXML_LEGACY_ENABLED
7613 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7614 		  xmlAddEntityReference(ent, firstChild, nw);
7615 #endif /* LIBXML_LEGACY_ENABLED */
7616 	    } else {
7617 		const xmlChar *nbktext;
7618 
7619 		/*
7620 		 * the name change is to avoid coalescing of the
7621 		 * node with a possible previous text one which
7622 		 * would make ent->children a dangling pointer
7623 		 */
7624 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7625 					-1);
7626 		if (ent->children->type == XML_TEXT_NODE)
7627 		    ent->children->name = nbktext;
7628 		if ((ent->last != ent->children) &&
7629 		    (ent->last->type == XML_TEXT_NODE))
7630 		    ent->last->name = nbktext;
7631 		xmlAddChildList(ctxt->node, ent->children);
7632 	    }
7633 
7634 	    /*
7635 	     * This is to avoid a nasty side effect, see
7636 	     * characters() in SAX.c
7637 	     */
7638 	    ctxt->nodemem = 0;
7639 	    ctxt->nodelen = 0;
7640 	    return;
7641 	}
7642     }
7643 }
7644 
7645 /**
7646  * xmlParseEntityRef:
7647  * @ctxt:  an XML parser context
7648  *
7649  * parse ENTITY references declarations
7650  *
7651  * [68] EntityRef ::= '&' Name ';'
7652  *
7653  * [ WFC: Entity Declared ]
7654  * In a document without any DTD, a document with only an internal DTD
7655  * subset which contains no parameter entity references, or a document
7656  * with "standalone='yes'", the Name given in the entity reference
7657  * must match that in an entity declaration, except that well-formed
7658  * documents need not declare any of the following entities: amp, lt,
7659  * gt, apos, quot.  The declaration of a parameter entity must precede
7660  * any reference to it.  Similarly, the declaration of a general entity
7661  * must precede any reference to it which appears in a default value in an
7662  * attribute-list declaration. Note that if entities are declared in the
7663  * external subset or in external parameter entities, a non-validating
7664  * processor is not obligated to read and process their declarations;
7665  * for such documents, the rule that an entity must be declared is a
7666  * well-formedness constraint only if standalone='yes'.
7667  *
7668  * [ WFC: Parsed Entity ]
7669  * An entity reference must not contain the name of an unparsed entity
7670  *
7671  * Returns the xmlEntityPtr if found, or NULL otherwise.
7672  */
7673 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7674 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7675     const xmlChar *name;
7676     xmlEntityPtr ent = NULL;
7677 
7678     GROW;
7679     if (ctxt->instate == XML_PARSER_EOF)
7680         return(NULL);
7681 
7682     if (RAW != '&')
7683         return(NULL);
7684     NEXT;
7685     name = xmlParseName(ctxt);
7686     if (name == NULL) {
7687 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7688 		       "xmlParseEntityRef: no name\n");
7689         return(NULL);
7690     }
7691     if (RAW != ';') {
7692 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7693 	return(NULL);
7694     }
7695     NEXT;
7696 
7697     /*
7698      * Predefined entities override any extra definition
7699      */
7700     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7701         ent = xmlGetPredefinedEntity(name);
7702         if (ent != NULL)
7703             return(ent);
7704     }
7705 
7706     /*
7707      * Increase the number of entity references parsed
7708      */
7709     ctxt->nbentities++;
7710 
7711     /*
7712      * Ask first SAX for entity resolution, otherwise try the
7713      * entities which may have stored in the parser context.
7714      */
7715     if (ctxt->sax != NULL) {
7716 	if (ctxt->sax->getEntity != NULL)
7717 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7718 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7719 	    (ctxt->options & XML_PARSE_OLDSAX))
7720 	    ent = xmlGetPredefinedEntity(name);
7721 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7722 	    (ctxt->userData==ctxt)) {
7723 	    ent = xmlSAX2GetEntity(ctxt, name);
7724 	}
7725     }
7726     if (ctxt->instate == XML_PARSER_EOF)
7727 	return(NULL);
7728     /*
7729      * [ WFC: Entity Declared ]
7730      * In a document without any DTD, a document with only an
7731      * internal DTD subset which contains no parameter entity
7732      * references, or a document with "standalone='yes'", the
7733      * Name given in the entity reference must match that in an
7734      * entity declaration, except that well-formed documents
7735      * need not declare any of the following entities: amp, lt,
7736      * gt, apos, quot.
7737      * The declaration of a parameter entity must precede any
7738      * reference to it.
7739      * Similarly, the declaration of a general entity must
7740      * precede any reference to it which appears in a default
7741      * value in an attribute-list declaration. Note that if
7742      * entities are declared in the external subset or in
7743      * external parameter entities, a non-validating processor
7744      * is not obligated to read and process their declarations;
7745      * for such documents, the rule that an entity must be
7746      * declared is a well-formedness constraint only if
7747      * standalone='yes'.
7748      */
7749     if (ent == NULL) {
7750 	if ((ctxt->standalone == 1) ||
7751 	    ((ctxt->hasExternalSubset == 0) &&
7752 	     (ctxt->hasPErefs == 0))) {
7753 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7754 		     "Entity '%s' not defined\n", name);
7755 	} else {
7756 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7757 		     "Entity '%s' not defined\n", name);
7758 	    if ((ctxt->inSubset == 0) &&
7759 		(ctxt->sax != NULL) &&
7760 		(ctxt->sax->reference != NULL)) {
7761 		ctxt->sax->reference(ctxt->userData, name);
7762 	    }
7763 	}
7764 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7765 	ctxt->valid = 0;
7766     }
7767 
7768     /*
7769      * [ WFC: Parsed Entity ]
7770      * An entity reference must not contain the name of an
7771      * unparsed entity
7772      */
7773     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7774 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7775 		 "Entity reference to unparsed entity %s\n", name);
7776     }
7777 
7778     /*
7779      * [ WFC: No External Entity References ]
7780      * Attribute values cannot contain direct or indirect
7781      * entity references to external entities.
7782      */
7783     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7784 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7785 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7786 	     "Attribute references external entity '%s'\n", name);
7787     }
7788     /*
7789      * [ WFC: No < in Attribute Values ]
7790      * The replacement text of any entity referred to directly or
7791      * indirectly in an attribute value (other than "&lt;") must
7792      * not contain a <.
7793      */
7794     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7795 	     (ent != NULL) &&
7796 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7797 	if (((ent->checked & 1) || (ent->checked == 0)) &&
7798 	     (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7799 	    xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7800 	"'<' in entity '%s' is not allowed in attributes values\n", name);
7801         }
7802     }
7803 
7804     /*
7805      * Internal check, no parameter entities here ...
7806      */
7807     else {
7808 	switch (ent->etype) {
7809 	    case XML_INTERNAL_PARAMETER_ENTITY:
7810 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7811 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7812 	     "Attempt to reference the parameter entity '%s'\n",
7813 			      name);
7814 	    break;
7815 	    default:
7816 	    break;
7817 	}
7818     }
7819 
7820     /*
7821      * [ WFC: No Recursion ]
7822      * A parsed entity must not contain a recursive reference
7823      * to itself, either directly or indirectly.
7824      * Done somewhere else
7825      */
7826     return(ent);
7827 }
7828 
7829 /**
7830  * xmlParseStringEntityRef:
7831  * @ctxt:  an XML parser context
7832  * @str:  a pointer to an index in the string
7833  *
7834  * parse ENTITY references declarations, but this version parses it from
7835  * a string value.
7836  *
7837  * [68] EntityRef ::= '&' Name ';'
7838  *
7839  * [ WFC: Entity Declared ]
7840  * In a document without any DTD, a document with only an internal DTD
7841  * subset which contains no parameter entity references, or a document
7842  * with "standalone='yes'", the Name given in the entity reference
7843  * must match that in an entity declaration, except that well-formed
7844  * documents need not declare any of the following entities: amp, lt,
7845  * gt, apos, quot.  The declaration of a parameter entity must precede
7846  * any reference to it.  Similarly, the declaration of a general entity
7847  * must precede any reference to it which appears in a default value in an
7848  * attribute-list declaration. Note that if entities are declared in the
7849  * external subset or in external parameter entities, a non-validating
7850  * processor is not obligated to read and process their declarations;
7851  * for such documents, the rule that an entity must be declared is a
7852  * well-formedness constraint only if standalone='yes'.
7853  *
7854  * [ WFC: Parsed Entity ]
7855  * An entity reference must not contain the name of an unparsed entity
7856  *
7857  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7858  * is updated to the current location in the string.
7859  */
7860 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7861 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7862     xmlChar *name;
7863     const xmlChar *ptr;
7864     xmlChar cur;
7865     xmlEntityPtr ent = NULL;
7866 
7867     if ((str == NULL) || (*str == NULL))
7868         return(NULL);
7869     ptr = *str;
7870     cur = *ptr;
7871     if (cur != '&')
7872 	return(NULL);
7873 
7874     ptr++;
7875     name = xmlParseStringName(ctxt, &ptr);
7876     if (name == NULL) {
7877 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7878 		       "xmlParseStringEntityRef: no name\n");
7879 	*str = ptr;
7880 	return(NULL);
7881     }
7882     if (*ptr != ';') {
7883 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7884         xmlFree(name);
7885 	*str = ptr;
7886 	return(NULL);
7887     }
7888     ptr++;
7889 
7890 
7891     /*
7892      * Predefined entities override any extra definition
7893      */
7894     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7895         ent = xmlGetPredefinedEntity(name);
7896         if (ent != NULL) {
7897             xmlFree(name);
7898             *str = ptr;
7899             return(ent);
7900         }
7901     }
7902 
7903     /*
7904      * Increate the number of entity references parsed
7905      */
7906     ctxt->nbentities++;
7907 
7908     /*
7909      * Ask first SAX for entity resolution, otherwise try the
7910      * entities which may have stored in the parser context.
7911      */
7912     if (ctxt->sax != NULL) {
7913 	if (ctxt->sax->getEntity != NULL)
7914 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7915 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7916 	    ent = xmlGetPredefinedEntity(name);
7917 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7918 	    ent = xmlSAX2GetEntity(ctxt, name);
7919 	}
7920     }
7921     if (ctxt->instate == XML_PARSER_EOF) {
7922 	xmlFree(name);
7923 	return(NULL);
7924     }
7925 
7926     /*
7927      * [ WFC: Entity Declared ]
7928      * In a document without any DTD, a document with only an
7929      * internal DTD subset which contains no parameter entity
7930      * references, or a document with "standalone='yes'", the
7931      * Name given in the entity reference must match that in an
7932      * entity declaration, except that well-formed documents
7933      * need not declare any of the following entities: amp, lt,
7934      * gt, apos, quot.
7935      * The declaration of a parameter entity must precede any
7936      * reference to it.
7937      * Similarly, the declaration of a general entity must
7938      * precede any reference to it which appears in a default
7939      * value in an attribute-list declaration. Note that if
7940      * entities are declared in the external subset or in
7941      * external parameter entities, a non-validating processor
7942      * is not obligated to read and process their declarations;
7943      * for such documents, the rule that an entity must be
7944      * declared is a well-formedness constraint only if
7945      * standalone='yes'.
7946      */
7947     if (ent == NULL) {
7948 	if ((ctxt->standalone == 1) ||
7949 	    ((ctxt->hasExternalSubset == 0) &&
7950 	     (ctxt->hasPErefs == 0))) {
7951 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7952 		     "Entity '%s' not defined\n", name);
7953 	} else {
7954 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7955 			  "Entity '%s' not defined\n",
7956 			  name);
7957 	}
7958 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7959 	/* TODO ? check regressions ctxt->valid = 0; */
7960     }
7961 
7962     /*
7963      * [ WFC: Parsed Entity ]
7964      * An entity reference must not contain the name of an
7965      * unparsed entity
7966      */
7967     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7968 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7969 		 "Entity reference to unparsed entity %s\n", name);
7970     }
7971 
7972     /*
7973      * [ WFC: No External Entity References ]
7974      * Attribute values cannot contain direct or indirect
7975      * entity references to external entities.
7976      */
7977     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7978 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7979 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7980 	 "Attribute references external entity '%s'\n", name);
7981     }
7982     /*
7983      * [ WFC: No < in Attribute Values ]
7984      * The replacement text of any entity referred to directly or
7985      * indirectly in an attribute value (other than "&lt;") must
7986      * not contain a <.
7987      */
7988     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7989 	     (ent != NULL) && (ent->content != NULL) &&
7990 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7991 	     (xmlStrchr(ent->content, '<'))) {
7992 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7993      "'<' in entity '%s' is not allowed in attributes values\n",
7994 			  name);
7995     }
7996 
7997     /*
7998      * Internal check, no parameter entities here ...
7999      */
8000     else {
8001 	switch (ent->etype) {
8002 	    case XML_INTERNAL_PARAMETER_ENTITY:
8003 	    case XML_EXTERNAL_PARAMETER_ENTITY:
8004 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
8005 	     "Attempt to reference the parameter entity '%s'\n",
8006 				  name);
8007 	    break;
8008 	    default:
8009 	    break;
8010 	}
8011     }
8012 
8013     /*
8014      * [ WFC: No Recursion ]
8015      * A parsed entity must not contain a recursive reference
8016      * to itself, either directly or indirectly.
8017      * Done somewhere else
8018      */
8019 
8020     xmlFree(name);
8021     *str = ptr;
8022     return(ent);
8023 }
8024 
8025 /**
8026  * xmlParsePEReference:
8027  * @ctxt:  an XML parser context
8028  *
8029  * parse PEReference declarations
8030  * The entity content is handled directly by pushing it's content as
8031  * a new input stream.
8032  *
8033  * [69] PEReference ::= '%' Name ';'
8034  *
8035  * [ WFC: No Recursion ]
8036  * A parsed entity must not contain a recursive
8037  * reference to itself, either directly or indirectly.
8038  *
8039  * [ WFC: Entity Declared ]
8040  * In a document without any DTD, a document with only an internal DTD
8041  * subset which contains no parameter entity references, or a document
8042  * with "standalone='yes'", ...  ... The declaration of a parameter
8043  * entity must precede any reference to it...
8044  *
8045  * [ VC: Entity Declared ]
8046  * In a document with an external subset or external parameter entities
8047  * with "standalone='no'", ...  ... The declaration of a parameter entity
8048  * must precede any reference to it...
8049  *
8050  * [ WFC: In DTD ]
8051  * Parameter-entity references may only appear in the DTD.
8052  * NOTE: misleading but this is handled.
8053  */
8054 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)8055 xmlParsePEReference(xmlParserCtxtPtr ctxt)
8056 {
8057     const xmlChar *name;
8058     xmlEntityPtr entity = NULL;
8059     xmlParserInputPtr input;
8060 
8061     if (RAW != '%')
8062         return;
8063     NEXT;
8064     name = xmlParseName(ctxt);
8065     if (name == NULL) {
8066 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8067 		       "xmlParsePEReference: no name\n");
8068 	return;
8069     }
8070     if (RAW != ';') {
8071 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8072         return;
8073     }
8074 
8075     NEXT;
8076 
8077     /*
8078      * Increate the number of entity references parsed
8079      */
8080     ctxt->nbentities++;
8081 
8082     /*
8083      * Request the entity from SAX
8084      */
8085     if ((ctxt->sax != NULL) &&
8086 	(ctxt->sax->getParameterEntity != NULL))
8087 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8088     if (ctxt->instate == XML_PARSER_EOF)
8089 	return;
8090     if (entity == NULL) {
8091 	/*
8092 	 * [ WFC: Entity Declared ]
8093 	 * In a document without any DTD, a document with only an
8094 	 * internal DTD subset which contains no parameter entity
8095 	 * references, or a document with "standalone='yes'", ...
8096 	 * ... The declaration of a parameter entity must precede
8097 	 * any reference to it...
8098 	 */
8099 	if ((ctxt->standalone == 1) ||
8100 	    ((ctxt->hasExternalSubset == 0) &&
8101 	     (ctxt->hasPErefs == 0))) {
8102 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8103 			      "PEReference: %%%s; not found\n",
8104 			      name);
8105 	} else {
8106 	    /*
8107 	     * [ VC: Entity Declared ]
8108 	     * In a document with an external subset or external
8109 	     * parameter entities with "standalone='no'", ...
8110 	     * ... The declaration of a parameter entity must
8111 	     * precede any reference to it...
8112 	     */
8113 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8114 			  "PEReference: %%%s; not found\n",
8115 			  name, NULL);
8116 	    ctxt->valid = 0;
8117 	}
8118 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8119     } else {
8120 	/*
8121 	 * Internal checking in case the entity quest barfed
8122 	 */
8123 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8124 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8125 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8126 		  "Internal: %%%s; is not a parameter entity\n",
8127 			  name, NULL);
8128 	} else if (ctxt->input->free != deallocblankswrapper) {
8129 	    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8130 	    if (xmlPushInput(ctxt, input) < 0)
8131 		return;
8132 	} else {
8133 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8134 	        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8135 	        ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8136 	        ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8137 	        ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8138 	        (ctxt->replaceEntities == 0) &&
8139 	        (ctxt->validate == 0))
8140 	        return;
8141 	    /*
8142 	     * TODO !!!
8143 	     * handle the extra spaces added before and after
8144 	     * c.f. http://www.w3.org/TR/REC-xml#as-PE
8145 	     */
8146 	    input = xmlNewEntityInputStream(ctxt, entity);
8147 	    if (xmlPushInput(ctxt, input) < 0)
8148 		return;
8149 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8150 		(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8151 		(IS_BLANK_CH(NXT(5)))) {
8152 		xmlParseTextDecl(ctxt);
8153 		if (ctxt->errNo ==
8154 		    XML_ERR_UNSUPPORTED_ENCODING) {
8155 		    /*
8156 		     * The XML REC instructs us to stop parsing
8157 		     * right here
8158 		     */
8159 		    xmlHaltParser(ctxt);
8160 		    return;
8161 		}
8162 	    }
8163 	}
8164     }
8165     ctxt->hasPErefs = 1;
8166 }
8167 
8168 /**
8169  * xmlLoadEntityContent:
8170  * @ctxt:  an XML parser context
8171  * @entity: an unloaded system entity
8172  *
8173  * Load the original content of the given system entity from the
8174  * ExternalID/SystemID given. This is to be used for Included in Literal
8175  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8176  *
8177  * Returns 0 in case of success and -1 in case of failure
8178  */
8179 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8180 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8181     xmlParserInputPtr input;
8182     xmlBufferPtr buf;
8183     int l, c;
8184     int count = 0;
8185 
8186     if ((ctxt == NULL) || (entity == NULL) ||
8187         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8188 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8189 	(entity->content != NULL)) {
8190 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8191 	            "xmlLoadEntityContent parameter error");
8192         return(-1);
8193     }
8194 
8195     if (xmlParserDebugEntities)
8196 	xmlGenericError(xmlGenericErrorContext,
8197 		"Reading %s entity content input\n", entity->name);
8198 
8199     buf = xmlBufferCreate();
8200     if (buf == NULL) {
8201 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8202 	            "xmlLoadEntityContent parameter error");
8203         return(-1);
8204     }
8205 
8206     input = xmlNewEntityInputStream(ctxt, entity);
8207     if (input == NULL) {
8208 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8209 	            "xmlLoadEntityContent input error");
8210 	xmlBufferFree(buf);
8211         return(-1);
8212     }
8213 
8214     /*
8215      * Push the entity as the current input, read char by char
8216      * saving to the buffer until the end of the entity or an error
8217      */
8218     if (xmlPushInput(ctxt, input) < 0) {
8219         xmlBufferFree(buf);
8220 	return(-1);
8221     }
8222 
8223     GROW;
8224     c = CUR_CHAR(l);
8225     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8226            (IS_CHAR(c))) {
8227         xmlBufferAdd(buf, ctxt->input->cur, l);
8228 	if (count++ > XML_PARSER_CHUNK_SIZE) {
8229 	    count = 0;
8230 	    GROW;
8231             if (ctxt->instate == XML_PARSER_EOF) {
8232                 xmlBufferFree(buf);
8233                 return(-1);
8234             }
8235 	}
8236 	NEXTL(l);
8237 	c = CUR_CHAR(l);
8238 	if (c == 0) {
8239 	    count = 0;
8240 	    GROW;
8241             if (ctxt->instate == XML_PARSER_EOF) {
8242                 xmlBufferFree(buf);
8243                 return(-1);
8244             }
8245 	    c = CUR_CHAR(l);
8246 	}
8247     }
8248 
8249     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8250         xmlPopInput(ctxt);
8251     } else if (!IS_CHAR(c)) {
8252         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8253                           "xmlLoadEntityContent: invalid char value %d\n",
8254 	                  c);
8255 	xmlBufferFree(buf);
8256 	return(-1);
8257     }
8258     entity->content = buf->content;
8259     buf->content = NULL;
8260     xmlBufferFree(buf);
8261 
8262     return(0);
8263 }
8264 
8265 /**
8266  * xmlParseStringPEReference:
8267  * @ctxt:  an XML parser context
8268  * @str:  a pointer to an index in the string
8269  *
8270  * parse PEReference declarations
8271  *
8272  * [69] PEReference ::= '%' Name ';'
8273  *
8274  * [ WFC: No Recursion ]
8275  * A parsed entity must not contain a recursive
8276  * reference to itself, either directly or indirectly.
8277  *
8278  * [ WFC: Entity Declared ]
8279  * In a document without any DTD, a document with only an internal DTD
8280  * subset which contains no parameter entity references, or a document
8281  * with "standalone='yes'", ...  ... The declaration of a parameter
8282  * entity must precede any reference to it...
8283  *
8284  * [ VC: Entity Declared ]
8285  * In a document with an external subset or external parameter entities
8286  * with "standalone='no'", ...  ... The declaration of a parameter entity
8287  * must precede any reference to it...
8288  *
8289  * [ WFC: In DTD ]
8290  * Parameter-entity references may only appear in the DTD.
8291  * NOTE: misleading but this is handled.
8292  *
8293  * Returns the string of the entity content.
8294  *         str is updated to the current value of the index
8295  */
8296 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8297 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8298     const xmlChar *ptr;
8299     xmlChar cur;
8300     xmlChar *name;
8301     xmlEntityPtr entity = NULL;
8302 
8303     if ((str == NULL) || (*str == NULL)) return(NULL);
8304     ptr = *str;
8305     cur = *ptr;
8306     if (cur != '%')
8307         return(NULL);
8308     ptr++;
8309     name = xmlParseStringName(ctxt, &ptr);
8310     if (name == NULL) {
8311 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8312 		       "xmlParseStringPEReference: no name\n");
8313 	*str = ptr;
8314 	return(NULL);
8315     }
8316     cur = *ptr;
8317     if (cur != ';') {
8318 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8319 	xmlFree(name);
8320 	*str = ptr;
8321 	return(NULL);
8322     }
8323     ptr++;
8324 
8325     /*
8326      * Increate the number of entity references parsed
8327      */
8328     ctxt->nbentities++;
8329 
8330     /*
8331      * Request the entity from SAX
8332      */
8333     if ((ctxt->sax != NULL) &&
8334 	(ctxt->sax->getParameterEntity != NULL))
8335 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8336     if (ctxt->instate == XML_PARSER_EOF) {
8337 	xmlFree(name);
8338 	return(NULL);
8339     }
8340     if (entity == NULL) {
8341 	/*
8342 	 * [ WFC: Entity Declared ]
8343 	 * In a document without any DTD, a document with only an
8344 	 * internal DTD subset which contains no parameter entity
8345 	 * references, or a document with "standalone='yes'", ...
8346 	 * ... The declaration of a parameter entity must precede
8347 	 * any reference to it...
8348 	 */
8349 	if ((ctxt->standalone == 1) ||
8350 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8351 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8352 		 "PEReference: %%%s; not found\n", name);
8353 	} else {
8354 	    /*
8355 	     * [ VC: Entity Declared ]
8356 	     * In a document with an external subset or external
8357 	     * parameter entities with "standalone='no'", ...
8358 	     * ... The declaration of a parameter entity must
8359 	     * precede any reference to it...
8360 	     */
8361 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8362 			  "PEReference: %%%s; not found\n",
8363 			  name, NULL);
8364 	    ctxt->valid = 0;
8365 	}
8366 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8367     } else {
8368 	/*
8369 	 * Internal checking in case the entity quest barfed
8370 	 */
8371 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8372 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8373 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8374 			  "%%%s; is not a parameter entity\n",
8375 			  name, NULL);
8376 	}
8377     }
8378     ctxt->hasPErefs = 1;
8379     xmlFree(name);
8380     *str = ptr;
8381     return(entity);
8382 }
8383 
8384 /**
8385  * xmlParseDocTypeDecl:
8386  * @ctxt:  an XML parser context
8387  *
8388  * parse a DOCTYPE declaration
8389  *
8390  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8391  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8392  *
8393  * [ VC: Root Element Type ]
8394  * The Name in the document type declaration must match the element
8395  * type of the root element.
8396  */
8397 
8398 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8399 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8400     const xmlChar *name = NULL;
8401     xmlChar *ExternalID = NULL;
8402     xmlChar *URI = NULL;
8403 
8404     /*
8405      * We know that '<!DOCTYPE' has been detected.
8406      */
8407     SKIP(9);
8408 
8409     SKIP_BLANKS;
8410 
8411     /*
8412      * Parse the DOCTYPE name.
8413      */
8414     name = xmlParseName(ctxt);
8415     if (name == NULL) {
8416 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8417 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8418     }
8419     ctxt->intSubName = name;
8420 
8421     SKIP_BLANKS;
8422 
8423     /*
8424      * Check for SystemID and ExternalID
8425      */
8426     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8427 
8428     if ((URI != NULL) || (ExternalID != NULL)) {
8429         ctxt->hasExternalSubset = 1;
8430     }
8431     ctxt->extSubURI = URI;
8432     ctxt->extSubSystem = ExternalID;
8433 
8434     SKIP_BLANKS;
8435 
8436     /*
8437      * Create and update the internal subset.
8438      */
8439     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8440 	(!ctxt->disableSAX))
8441 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8442     if (ctxt->instate == XML_PARSER_EOF)
8443 	return;
8444 
8445     /*
8446      * Is there any internal subset declarations ?
8447      * they are handled separately in xmlParseInternalSubset()
8448      */
8449     if (RAW == '[')
8450 	return;
8451 
8452     /*
8453      * We should be at the end of the DOCTYPE declaration.
8454      */
8455     if (RAW != '>') {
8456 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8457     }
8458     NEXT;
8459 }
8460 
8461 /**
8462  * xmlParseInternalSubset:
8463  * @ctxt:  an XML parser context
8464  *
8465  * parse the internal subset declaration
8466  *
8467  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8468  */
8469 
8470 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8471 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8472     /*
8473      * Is there any DTD definition ?
8474      */
8475     if (RAW == '[') {
8476         ctxt->instate = XML_PARSER_DTD;
8477         NEXT;
8478 	/*
8479 	 * Parse the succession of Markup declarations and
8480 	 * PEReferences.
8481 	 * Subsequence (markupdecl | PEReference | S)*
8482 	 */
8483 	while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8484 	    const xmlChar *check = CUR_PTR;
8485 	    unsigned int cons = ctxt->input->consumed;
8486 
8487 	    SKIP_BLANKS;
8488 	    xmlParseMarkupDecl(ctxt);
8489 	    xmlParsePEReference(ctxt);
8490 
8491 	    /*
8492 	     * Pop-up of finished entities.
8493 	     */
8494 	    while ((RAW == 0) && (ctxt->inputNr > 1))
8495 		xmlPopInput(ctxt);
8496 
8497 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8498 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8499 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8500 		break;
8501 	    }
8502 	}
8503 	if (RAW == ']') {
8504 	    NEXT;
8505 	    SKIP_BLANKS;
8506 	}
8507     }
8508 
8509     /*
8510      * We should be at the end of the DOCTYPE declaration.
8511      */
8512     if (RAW != '>') {
8513 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8514 	return;
8515     }
8516     NEXT;
8517 }
8518 
8519 #ifdef LIBXML_SAX1_ENABLED
8520 /**
8521  * xmlParseAttribute:
8522  * @ctxt:  an XML parser context
8523  * @value:  a xmlChar ** used to store the value of the attribute
8524  *
8525  * parse an attribute
8526  *
8527  * [41] Attribute ::= Name Eq AttValue
8528  *
8529  * [ WFC: No External Entity References ]
8530  * Attribute values cannot contain direct or indirect entity references
8531  * to external entities.
8532  *
8533  * [ WFC: No < in Attribute Values ]
8534  * The replacement text of any entity referred to directly or indirectly in
8535  * an attribute value (other than "&lt;") must not contain a <.
8536  *
8537  * [ VC: Attribute Value Type ]
8538  * The attribute must have been declared; the value must be of the type
8539  * declared for it.
8540  *
8541  * [25] Eq ::= S? '=' S?
8542  *
8543  * With namespace:
8544  *
8545  * [NS 11] Attribute ::= QName Eq AttValue
8546  *
8547  * Also the case QName == xmlns:??? is handled independently as a namespace
8548  * definition.
8549  *
8550  * Returns the attribute name, and the value in *value.
8551  */
8552 
8553 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8554 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8555     const xmlChar *name;
8556     xmlChar *val;
8557 
8558     *value = NULL;
8559     GROW;
8560     name = xmlParseName(ctxt);
8561     if (name == NULL) {
8562 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8563 	               "error parsing attribute name\n");
8564         return(NULL);
8565     }
8566 
8567     /*
8568      * read the value
8569      */
8570     SKIP_BLANKS;
8571     if (RAW == '=') {
8572         NEXT;
8573 	SKIP_BLANKS;
8574 	val = xmlParseAttValue(ctxt);
8575 	ctxt->instate = XML_PARSER_CONTENT;
8576     } else {
8577 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8578 	       "Specification mandate value for attribute %s\n", name);
8579 	return(NULL);
8580     }
8581 
8582     /*
8583      * Check that xml:lang conforms to the specification
8584      * No more registered as an error, just generate a warning now
8585      * since this was deprecated in XML second edition
8586      */
8587     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8588 	if (!xmlCheckLanguageID(val)) {
8589 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8590 		          "Malformed value for xml:lang : %s\n",
8591 			  val, NULL);
8592 	}
8593     }
8594 
8595     /*
8596      * Check that xml:space conforms to the specification
8597      */
8598     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8599 	if (xmlStrEqual(val, BAD_CAST "default"))
8600 	    *(ctxt->space) = 0;
8601 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8602 	    *(ctxt->space) = 1;
8603 	else {
8604 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8605 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8606                                  val, NULL);
8607 	}
8608     }
8609 
8610     *value = val;
8611     return(name);
8612 }
8613 
8614 /**
8615  * xmlParseStartTag:
8616  * @ctxt:  an XML parser context
8617  *
8618  * parse a start of tag either for rule element or
8619  * EmptyElement. In both case we don't parse the tag closing chars.
8620  *
8621  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8622  *
8623  * [ WFC: Unique Att Spec ]
8624  * No attribute name may appear more than once in the same start-tag or
8625  * empty-element tag.
8626  *
8627  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8628  *
8629  * [ WFC: Unique Att Spec ]
8630  * No attribute name may appear more than once in the same start-tag or
8631  * empty-element tag.
8632  *
8633  * With namespace:
8634  *
8635  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8636  *
8637  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8638  *
8639  * Returns the element name parsed
8640  */
8641 
8642 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8643 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8644     const xmlChar *name;
8645     const xmlChar *attname;
8646     xmlChar *attvalue;
8647     const xmlChar **atts = ctxt->atts;
8648     int nbatts = 0;
8649     int maxatts = ctxt->maxatts;
8650     int i;
8651 
8652     if (RAW != '<') return(NULL);
8653     NEXT1;
8654 
8655     name = xmlParseName(ctxt);
8656     if (name == NULL) {
8657 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8658 	     "xmlParseStartTag: invalid element name\n");
8659         return(NULL);
8660     }
8661 
8662     /*
8663      * Now parse the attributes, it ends up with the ending
8664      *
8665      * (S Attribute)* S?
8666      */
8667     SKIP_BLANKS;
8668     GROW;
8669 
8670     while (((RAW != '>') &&
8671 	   ((RAW != '/') || (NXT(1) != '>')) &&
8672 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8673 	const xmlChar *q = CUR_PTR;
8674 	unsigned int cons = ctxt->input->consumed;
8675 
8676 	attname = xmlParseAttribute(ctxt, &attvalue);
8677         if ((attname != NULL) && (attvalue != NULL)) {
8678 	    /*
8679 	     * [ WFC: Unique Att Spec ]
8680 	     * No attribute name may appear more than once in the same
8681 	     * start-tag or empty-element tag.
8682 	     */
8683 	    for (i = 0; i < nbatts;i += 2) {
8684 	        if (xmlStrEqual(atts[i], attname)) {
8685 		    xmlErrAttributeDup(ctxt, NULL, attname);
8686 		    xmlFree(attvalue);
8687 		    goto failed;
8688 		}
8689 	    }
8690 	    /*
8691 	     * Add the pair to atts
8692 	     */
8693 	    if (atts == NULL) {
8694 	        maxatts = 22; /* allow for 10 attrs by default */
8695 	        atts = (const xmlChar **)
8696 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8697 		if (atts == NULL) {
8698 		    xmlErrMemory(ctxt, NULL);
8699 		    if (attvalue != NULL)
8700 			xmlFree(attvalue);
8701 		    goto failed;
8702 		}
8703 		ctxt->atts = atts;
8704 		ctxt->maxatts = maxatts;
8705 	    } else if (nbatts + 4 > maxatts) {
8706 	        const xmlChar **n;
8707 
8708 	        maxatts *= 2;
8709 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8710 					     maxatts * sizeof(const xmlChar *));
8711 		if (n == NULL) {
8712 		    xmlErrMemory(ctxt, NULL);
8713 		    if (attvalue != NULL)
8714 			xmlFree(attvalue);
8715 		    goto failed;
8716 		}
8717 		atts = n;
8718 		ctxt->atts = atts;
8719 		ctxt->maxatts = maxatts;
8720 	    }
8721 	    atts[nbatts++] = attname;
8722 	    atts[nbatts++] = attvalue;
8723 	    atts[nbatts] = NULL;
8724 	    atts[nbatts + 1] = NULL;
8725 	} else {
8726 	    if (attvalue != NULL)
8727 		xmlFree(attvalue);
8728 	}
8729 
8730 failed:
8731 
8732 	GROW
8733 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8734 	    break;
8735 	if (!IS_BLANK_CH(RAW)) {
8736 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8737 			   "attributes construct error\n");
8738 	}
8739 	SKIP_BLANKS;
8740         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8741             (attname == NULL) && (attvalue == NULL)) {
8742 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8743 			   "xmlParseStartTag: problem parsing attributes\n");
8744 	    break;
8745 	}
8746 	SHRINK;
8747         GROW;
8748     }
8749 
8750     /*
8751      * SAX: Start of Element !
8752      */
8753     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8754 	(!ctxt->disableSAX)) {
8755 	if (nbatts > 0)
8756 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8757 	else
8758 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8759     }
8760 
8761     if (atts != NULL) {
8762         /* Free only the content strings */
8763         for (i = 1;i < nbatts;i+=2)
8764 	    if (atts[i] != NULL)
8765 	       xmlFree((xmlChar *) atts[i]);
8766     }
8767     return(name);
8768 }
8769 
8770 /**
8771  * xmlParseEndTag1:
8772  * @ctxt:  an XML parser context
8773  * @line:  line of the start tag
8774  * @nsNr:  number of namespaces on the start tag
8775  *
8776  * parse an end of tag
8777  *
8778  * [42] ETag ::= '</' Name S? '>'
8779  *
8780  * With namespace
8781  *
8782  * [NS 9] ETag ::= '</' QName S? '>'
8783  */
8784 
8785 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8786 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8787     const xmlChar *name;
8788 
8789     GROW;
8790     if ((RAW != '<') || (NXT(1) != '/')) {
8791 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8792 		       "xmlParseEndTag: '</' not found\n");
8793 	return;
8794     }
8795     SKIP(2);
8796 
8797     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8798 
8799     /*
8800      * We should definitely be at the ending "S? '>'" part
8801      */
8802     GROW;
8803     SKIP_BLANKS;
8804     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8805 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8806     } else
8807 	NEXT1;
8808 
8809     /*
8810      * [ WFC: Element Type Match ]
8811      * The Name in an element's end-tag must match the element type in the
8812      * start-tag.
8813      *
8814      */
8815     if (name != (xmlChar*)1) {
8816         if (name == NULL) name = BAD_CAST "unparseable";
8817         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8818 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8819 		                ctxt->name, line, name);
8820     }
8821 
8822     /*
8823      * SAX: End of Tag
8824      */
8825     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8826 	(!ctxt->disableSAX))
8827         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8828 
8829     namePop(ctxt);
8830     spacePop(ctxt);
8831     return;
8832 }
8833 
8834 /**
8835  * xmlParseEndTag:
8836  * @ctxt:  an XML parser context
8837  *
8838  * parse an end of tag
8839  *
8840  * [42] ETag ::= '</' Name S? '>'
8841  *
8842  * With namespace
8843  *
8844  * [NS 9] ETag ::= '</' QName S? '>'
8845  */
8846 
8847 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8848 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8849     xmlParseEndTag1(ctxt, 0);
8850 }
8851 #endif /* LIBXML_SAX1_ENABLED */
8852 
8853 /************************************************************************
8854  *									*
8855  *		      SAX 2 specific operations				*
8856  *									*
8857  ************************************************************************/
8858 
8859 /*
8860  * xmlGetNamespace:
8861  * @ctxt:  an XML parser context
8862  * @prefix:  the prefix to lookup
8863  *
8864  * Lookup the namespace name for the @prefix (which ca be NULL)
8865  * The prefix must come from the @ctxt->dict dictionary
8866  *
8867  * Returns the namespace name or NULL if not bound
8868  */
8869 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8870 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8871     int i;
8872 
8873     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8874     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8875         if (ctxt->nsTab[i] == prefix) {
8876 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8877 	        return(NULL);
8878 	    return(ctxt->nsTab[i + 1]);
8879 	}
8880     return(NULL);
8881 }
8882 
8883 /**
8884  * xmlParseQName:
8885  * @ctxt:  an XML parser context
8886  * @prefix:  pointer to store the prefix part
8887  *
8888  * parse an XML Namespace QName
8889  *
8890  * [6]  QName  ::= (Prefix ':')? LocalPart
8891  * [7]  Prefix  ::= NCName
8892  * [8]  LocalPart  ::= NCName
8893  *
8894  * Returns the Name parsed or NULL
8895  */
8896 
8897 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8898 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8899     const xmlChar *l, *p;
8900 
8901     GROW;
8902 
8903     l = xmlParseNCName(ctxt);
8904     if (l == NULL) {
8905         if (CUR == ':') {
8906 	    l = xmlParseName(ctxt);
8907 	    if (l != NULL) {
8908 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8909 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8910 		*prefix = NULL;
8911 		return(l);
8912 	    }
8913 	}
8914         return(NULL);
8915     }
8916     if (CUR == ':') {
8917         NEXT;
8918 	p = l;
8919 	l = xmlParseNCName(ctxt);
8920 	if (l == NULL) {
8921 	    xmlChar *tmp;
8922 
8923             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8924 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8925 	    l = xmlParseNmtoken(ctxt);
8926 	    if (l == NULL)
8927 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8928 	    else {
8929 		tmp = xmlBuildQName(l, p, NULL, 0);
8930 		xmlFree((char *)l);
8931 	    }
8932 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8933 	    if (tmp != NULL) xmlFree(tmp);
8934 	    *prefix = NULL;
8935 	    return(p);
8936 	}
8937 	if (CUR == ':') {
8938 	    xmlChar *tmp;
8939 
8940             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8941 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8942 	    NEXT;
8943 	    tmp = (xmlChar *) xmlParseName(ctxt);
8944 	    if (tmp != NULL) {
8945 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8946 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8947 		if (tmp != NULL) xmlFree(tmp);
8948 		*prefix = p;
8949 		return(l);
8950 	    }
8951 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8952 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8953 	    if (tmp != NULL) xmlFree(tmp);
8954 	    *prefix = p;
8955 	    return(l);
8956 	}
8957 	*prefix = p;
8958     } else
8959         *prefix = NULL;
8960     return(l);
8961 }
8962 
8963 /**
8964  * xmlParseQNameAndCompare:
8965  * @ctxt:  an XML parser context
8966  * @name:  the localname
8967  * @prefix:  the prefix, if any.
8968  *
8969  * parse an XML name and compares for match
8970  * (specialized for endtag parsing)
8971  *
8972  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8973  * and the name for mismatch
8974  */
8975 
8976 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8977 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8978                         xmlChar const *prefix) {
8979     const xmlChar *cmp;
8980     const xmlChar *in;
8981     const xmlChar *ret;
8982     const xmlChar *prefix2;
8983 
8984     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8985 
8986     GROW;
8987     in = ctxt->input->cur;
8988 
8989     cmp = prefix;
8990     while (*in != 0 && *in == *cmp) {
8991 	++in;
8992 	++cmp;
8993     }
8994     if ((*cmp == 0) && (*in == ':')) {
8995         in++;
8996 	cmp = name;
8997 	while (*in != 0 && *in == *cmp) {
8998 	    ++in;
8999 	    ++cmp;
9000 	}
9001 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
9002 	    /* success */
9003 	    ctxt->input->cur = in;
9004 	    return((const xmlChar*) 1);
9005 	}
9006     }
9007     /*
9008      * all strings coms from the dictionary, equality can be done directly
9009      */
9010     ret = xmlParseQName (ctxt, &prefix2);
9011     if ((ret == name) && (prefix == prefix2))
9012 	return((const xmlChar*) 1);
9013     return ret;
9014 }
9015 
9016 /**
9017  * xmlParseAttValueInternal:
9018  * @ctxt:  an XML parser context
9019  * @len:  attribute len result
9020  * @alloc:  whether the attribute was reallocated as a new string
9021  * @normalize:  if 1 then further non-CDATA normalization must be done
9022  *
9023  * parse a value for an attribute.
9024  * NOTE: if no normalization is needed, the routine will return pointers
9025  *       directly from the data buffer.
9026  *
9027  * 3.3.3 Attribute-Value Normalization:
9028  * Before the value of an attribute is passed to the application or
9029  * checked for validity, the XML processor must normalize it as follows:
9030  * - a character reference is processed by appending the referenced
9031  *   character to the attribute value
9032  * - an entity reference is processed by recursively processing the
9033  *   replacement text of the entity
9034  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9035  *   appending #x20 to the normalized value, except that only a single
9036  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9037  *   parsed entity or the literal entity value of an internal parsed entity
9038  * - other characters are processed by appending them to the normalized value
9039  * If the declared value is not CDATA, then the XML processor must further
9040  * process the normalized attribute value by discarding any leading and
9041  * trailing space (#x20) characters, and by replacing sequences of space
9042  * (#x20) characters by a single space (#x20) character.
9043  * All attributes for which no declaration has been read should be treated
9044  * by a non-validating parser as if declared CDATA.
9045  *
9046  * Returns the AttValue parsed or NULL. The value has to be freed by the
9047  *     caller if it was copied, this can be detected by val[*len] == 0.
9048  */
9049 
9050 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)9051 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9052                          int normalize)
9053 {
9054     xmlChar limit = 0;
9055     const xmlChar *in = NULL, *start, *end, *last;
9056     xmlChar *ret = NULL;
9057     int line, col;
9058 
9059     GROW;
9060     in = (xmlChar *) CUR_PTR;
9061     line = ctxt->input->line;
9062     col = ctxt->input->col;
9063     if (*in != '"' && *in != '\'') {
9064         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9065         return (NULL);
9066     }
9067     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9068 
9069     /*
9070      * try to handle in this routine the most common case where no
9071      * allocation of a new string is required and where content is
9072      * pure ASCII.
9073      */
9074     limit = *in++;
9075     col++;
9076     end = ctxt->input->end;
9077     start = in;
9078     if (in >= end) {
9079         const xmlChar *oldbase = ctxt->input->base;
9080 	GROW;
9081 	if (oldbase != ctxt->input->base) {
9082 	    long delta = ctxt->input->base - oldbase;
9083 	    start = start + delta;
9084 	    in = in + delta;
9085 	}
9086 	end = ctxt->input->end;
9087     }
9088     if (normalize) {
9089         /*
9090 	 * Skip any leading spaces
9091 	 */
9092 	while ((in < end) && (*in != limit) &&
9093 	       ((*in == 0x20) || (*in == 0x9) ||
9094 	        (*in == 0xA) || (*in == 0xD))) {
9095 	    if (*in == 0xA) {
9096 	        line++; col = 1;
9097 	    } else {
9098 	        col++;
9099 	    }
9100 	    in++;
9101 	    start = in;
9102 	    if (in >= end) {
9103 		const xmlChar *oldbase = ctxt->input->base;
9104 		GROW;
9105                 if (ctxt->instate == XML_PARSER_EOF)
9106                     return(NULL);
9107 		if (oldbase != ctxt->input->base) {
9108 		    long delta = ctxt->input->base - oldbase;
9109 		    start = start + delta;
9110 		    in = in + delta;
9111 		}
9112 		end = ctxt->input->end;
9113                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9114                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9115                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9116                                    "AttValue length too long\n");
9117                     return(NULL);
9118                 }
9119 	    }
9120 	}
9121 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9122 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9123 	    col++;
9124 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
9125 	    if (in >= end) {
9126 		const xmlChar *oldbase = ctxt->input->base;
9127 		GROW;
9128                 if (ctxt->instate == XML_PARSER_EOF)
9129                     return(NULL);
9130 		if (oldbase != ctxt->input->base) {
9131 		    long delta = ctxt->input->base - oldbase;
9132 		    start = start + delta;
9133 		    in = in + delta;
9134 		}
9135 		end = ctxt->input->end;
9136                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9137                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9138                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9139                                    "AttValue length too long\n");
9140                     return(NULL);
9141                 }
9142 	    }
9143 	}
9144 	last = in;
9145 	/*
9146 	 * skip the trailing blanks
9147 	 */
9148 	while ((last[-1] == 0x20) && (last > start)) last--;
9149 	while ((in < end) && (*in != limit) &&
9150 	       ((*in == 0x20) || (*in == 0x9) ||
9151 	        (*in == 0xA) || (*in == 0xD))) {
9152 	    if (*in == 0xA) {
9153 	        line++, col = 1;
9154 	    } else {
9155 	        col++;
9156 	    }
9157 	    in++;
9158 	    if (in >= end) {
9159 		const xmlChar *oldbase = ctxt->input->base;
9160 		GROW;
9161                 if (ctxt->instate == XML_PARSER_EOF)
9162                     return(NULL);
9163 		if (oldbase != ctxt->input->base) {
9164 		    long delta = ctxt->input->base - oldbase;
9165 		    start = start + delta;
9166 		    in = in + delta;
9167 		    last = last + delta;
9168 		}
9169 		end = ctxt->input->end;
9170                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9171                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9172                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173                                    "AttValue length too long\n");
9174                     return(NULL);
9175                 }
9176 	    }
9177 	}
9178         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9179             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9180             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181                            "AttValue length too long\n");
9182             return(NULL);
9183         }
9184 	if (*in != limit) goto need_complex;
9185     } else {
9186 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9187 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9188 	    in++;
9189 	    col++;
9190 	    if (in >= end) {
9191 		const xmlChar *oldbase = ctxt->input->base;
9192 		GROW;
9193                 if (ctxt->instate == XML_PARSER_EOF)
9194                     return(NULL);
9195 		if (oldbase != ctxt->input->base) {
9196 		    long delta = ctxt->input->base - oldbase;
9197 		    start = start + delta;
9198 		    in = in + delta;
9199 		}
9200 		end = ctxt->input->end;
9201                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9202                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9203                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9204                                    "AttValue length too long\n");
9205                     return(NULL);
9206                 }
9207 	    }
9208 	}
9209 	last = in;
9210         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9211             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9212             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9213                            "AttValue length too long\n");
9214             return(NULL);
9215         }
9216 	if (*in != limit) goto need_complex;
9217     }
9218     in++;
9219     col++;
9220     if (len != NULL) {
9221         *len = last - start;
9222         ret = (xmlChar *) start;
9223     } else {
9224         if (alloc) *alloc = 1;
9225         ret = xmlStrndup(start, last - start);
9226     }
9227     CUR_PTR = in;
9228     ctxt->input->line = line;
9229     ctxt->input->col = col;
9230     if (alloc) *alloc = 0;
9231     return ret;
9232 need_complex:
9233     if (alloc) *alloc = 1;
9234     return xmlParseAttValueComplex(ctxt, len, normalize);
9235 }
9236 
9237 /**
9238  * xmlParseAttribute2:
9239  * @ctxt:  an XML parser context
9240  * @pref:  the element prefix
9241  * @elem:  the element name
9242  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9243  * @value:  a xmlChar ** used to store the value of the attribute
9244  * @len:  an int * to save the length of the attribute
9245  * @alloc:  an int * to indicate if the attribute was allocated
9246  *
9247  * parse an attribute in the new SAX2 framework.
9248  *
9249  * Returns the attribute name, and the value in *value, .
9250  */
9251 
9252 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9253 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9254                    const xmlChar * pref, const xmlChar * elem,
9255                    const xmlChar ** prefix, xmlChar ** value,
9256                    int *len, int *alloc)
9257 {
9258     const xmlChar *name;
9259     xmlChar *val, *internal_val = NULL;
9260     int normalize = 0;
9261 
9262     *value = NULL;
9263     GROW;
9264     name = xmlParseQName(ctxt, prefix);
9265     if (name == NULL) {
9266         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9267                        "error parsing attribute name\n");
9268         return (NULL);
9269     }
9270 
9271     /*
9272      * get the type if needed
9273      */
9274     if (ctxt->attsSpecial != NULL) {
9275         int type;
9276 
9277         type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9278                                             pref, elem, *prefix, name);
9279         if (type != 0)
9280             normalize = 1;
9281     }
9282 
9283     /*
9284      * read the value
9285      */
9286     SKIP_BLANKS;
9287     if (RAW == '=') {
9288         NEXT;
9289         SKIP_BLANKS;
9290         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9291 	if (normalize) {
9292 	    /*
9293 	     * Sometimes a second normalisation pass for spaces is needed
9294 	     * but that only happens if charrefs or entities refernces
9295 	     * have been used in the attribute value, i.e. the attribute
9296 	     * value have been extracted in an allocated string already.
9297 	     */
9298 	    if (*alloc) {
9299 	        const xmlChar *val2;
9300 
9301 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9302 		if ((val2 != NULL) && (val2 != val)) {
9303 		    xmlFree(val);
9304 		    val = (xmlChar *) val2;
9305 		}
9306 	    }
9307 	}
9308         ctxt->instate = XML_PARSER_CONTENT;
9309     } else {
9310         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9311                           "Specification mandate value for attribute %s\n",
9312                           name);
9313         return (NULL);
9314     }
9315 
9316     if (*prefix == ctxt->str_xml) {
9317         /*
9318          * Check that xml:lang conforms to the specification
9319          * No more registered as an error, just generate a warning now
9320          * since this was deprecated in XML second edition
9321          */
9322         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9323             internal_val = xmlStrndup(val, *len);
9324             if (!xmlCheckLanguageID(internal_val)) {
9325                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9326                               "Malformed value for xml:lang : %s\n",
9327                               internal_val, NULL);
9328             }
9329         }
9330 
9331         /*
9332          * Check that xml:space conforms to the specification
9333          */
9334         if (xmlStrEqual(name, BAD_CAST "space")) {
9335             internal_val = xmlStrndup(val, *len);
9336             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9337                 *(ctxt->space) = 0;
9338             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9339                 *(ctxt->space) = 1;
9340             else {
9341                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9342                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9343                               internal_val, NULL);
9344             }
9345         }
9346         if (internal_val) {
9347             xmlFree(internal_val);
9348         }
9349     }
9350 
9351     *value = val;
9352     return (name);
9353 }
9354 /**
9355  * xmlParseStartTag2:
9356  * @ctxt:  an XML parser context
9357  *
9358  * parse a start of tag either for rule element or
9359  * EmptyElement. In both case we don't parse the tag closing chars.
9360  * This routine is called when running SAX2 parsing
9361  *
9362  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9363  *
9364  * [ WFC: Unique Att Spec ]
9365  * No attribute name may appear more than once in the same start-tag or
9366  * empty-element tag.
9367  *
9368  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9369  *
9370  * [ WFC: Unique Att Spec ]
9371  * No attribute name may appear more than once in the same start-tag or
9372  * empty-element tag.
9373  *
9374  * With namespace:
9375  *
9376  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9377  *
9378  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9379  *
9380  * Returns the element name parsed
9381  */
9382 
9383 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9384 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9385                   const xmlChar **URI, int *tlen) {
9386     const xmlChar *localname;
9387     const xmlChar *prefix;
9388     const xmlChar *attname;
9389     const xmlChar *aprefix;
9390     const xmlChar *nsname;
9391     xmlChar *attvalue;
9392     const xmlChar **atts = ctxt->atts;
9393     int maxatts = ctxt->maxatts;
9394     int nratts, nbatts, nbdef;
9395     int i, j, nbNs, attval, oldline, oldcol, inputNr;
9396     const xmlChar *base;
9397     unsigned long cur;
9398     int nsNr = ctxt->nsNr;
9399 
9400     if (RAW != '<') return(NULL);
9401     NEXT1;
9402 
9403     /*
9404      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9405      *       point since the attribute values may be stored as pointers to
9406      *       the buffer and calling SHRINK would destroy them !
9407      *       The Shrinking is only possible once the full set of attribute
9408      *       callbacks have been done.
9409      */
9410 reparse:
9411     SHRINK;
9412     base = ctxt->input->base;
9413     cur = ctxt->input->cur - ctxt->input->base;
9414     inputNr = ctxt->inputNr;
9415     oldline = ctxt->input->line;
9416     oldcol = ctxt->input->col;
9417     nbatts = 0;
9418     nratts = 0;
9419     nbdef = 0;
9420     nbNs = 0;
9421     attval = 0;
9422     /* Forget any namespaces added during an earlier parse of this element. */
9423     ctxt->nsNr = nsNr;
9424 
9425     localname = xmlParseQName(ctxt, &prefix);
9426     if (localname == NULL) {
9427 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9428 		       "StartTag: invalid element name\n");
9429         return(NULL);
9430     }
9431     *tlen = ctxt->input->cur - ctxt->input->base - cur;
9432 
9433     /*
9434      * Now parse the attributes, it ends up with the ending
9435      *
9436      * (S Attribute)* S?
9437      */
9438     SKIP_BLANKS;
9439     GROW;
9440     if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9441         goto base_changed;
9442 
9443     while (((RAW != '>') &&
9444 	   ((RAW != '/') || (NXT(1) != '>')) &&
9445 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9446 	const xmlChar *q = CUR_PTR;
9447 	unsigned int cons = ctxt->input->consumed;
9448 	int len = -1, alloc = 0;
9449 
9450 	attname = xmlParseAttribute2(ctxt, prefix, localname,
9451 	                             &aprefix, &attvalue, &len, &alloc);
9452 	if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
9453 	    if ((attvalue != NULL) && (alloc != 0))
9454 	        xmlFree(attvalue);
9455 	    attvalue = NULL;
9456 	    goto base_changed;
9457 	}
9458         if ((attname != NULL) && (attvalue != NULL)) {
9459 	    if (len < 0) len = xmlStrlen(attvalue);
9460             if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9461 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9462 		xmlURIPtr uri;
9463 
9464                 if (URL == NULL) {
9465 		    xmlErrMemory(ctxt, "dictionary allocation failure");
9466 		    if ((attvalue != NULL) && (alloc != 0))
9467 			xmlFree(attvalue);
9468 		    return(NULL);
9469 		}
9470                 if (*URL != 0) {
9471 		    uri = xmlParseURI((const char *) URL);
9472 		    if (uri == NULL) {
9473 			xmlNsErr(ctxt, XML_WAR_NS_URI,
9474 			         "xmlns: '%s' is not a valid URI\n",
9475 					   URL, NULL, NULL);
9476 		    } else {
9477 			if (uri->scheme == NULL) {
9478 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9479 				      "xmlns: URI %s is not absolute\n",
9480 				      URL, NULL, NULL);
9481 			}
9482 			xmlFreeURI(uri);
9483 		    }
9484 		    if (URL == ctxt->str_xml_ns) {
9485 			if (attname != ctxt->str_xml) {
9486 			    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9487 			 "xml namespace URI cannot be the default namespace\n",
9488 				     NULL, NULL, NULL);
9489 			}
9490 			goto skip_default_ns;
9491 		    }
9492 		    if ((len == 29) &&
9493 			(xmlStrEqual(URL,
9494 				 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9495 			xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9496 			     "reuse of the xmlns namespace name is forbidden\n",
9497 				 NULL, NULL, NULL);
9498 			goto skip_default_ns;
9499 		    }
9500 		}
9501 		/*
9502 		 * check that it's not a defined namespace
9503 		 */
9504 		for (j = 1;j <= nbNs;j++)
9505 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9506 			break;
9507 		if (j <= nbNs)
9508 		    xmlErrAttributeDup(ctxt, NULL, attname);
9509 		else
9510 		    if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9511 skip_default_ns:
9512 		if ((attvalue != NULL) && (alloc != 0)) {
9513 		    xmlFree(attvalue);
9514 		    attvalue = NULL;
9515 		}
9516 		if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9517 		    break;
9518 		if (!IS_BLANK_CH(RAW)) {
9519 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9520 				   "attributes construct error\n");
9521 		    break;
9522 		}
9523 		SKIP_BLANKS;
9524 		if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9525 		    goto base_changed;
9526 		continue;
9527 	    }
9528             if (aprefix == ctxt->str_xmlns) {
9529 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9530 		xmlURIPtr uri;
9531 
9532                 if (attname == ctxt->str_xml) {
9533 		    if (URL != ctxt->str_xml_ns) {
9534 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9535 			         "xml namespace prefix mapped to wrong URI\n",
9536 			         NULL, NULL, NULL);
9537 		    }
9538 		    /*
9539 		     * Do not keep a namespace definition node
9540 		     */
9541 		    goto skip_ns;
9542 		}
9543                 if (URL == ctxt->str_xml_ns) {
9544 		    if (attname != ctxt->str_xml) {
9545 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9546 			         "xml namespace URI mapped to wrong prefix\n",
9547 			         NULL, NULL, NULL);
9548 		    }
9549 		    goto skip_ns;
9550 		}
9551                 if (attname == ctxt->str_xmlns) {
9552 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9553 			     "redefinition of the xmlns prefix is forbidden\n",
9554 			     NULL, NULL, NULL);
9555 		    goto skip_ns;
9556 		}
9557 		if ((len == 29) &&
9558 		    (xmlStrEqual(URL,
9559 		                 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9560 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9561 			     "reuse of the xmlns namespace name is forbidden\n",
9562 			     NULL, NULL, NULL);
9563 		    goto skip_ns;
9564 		}
9565 		if ((URL == NULL) || (URL[0] == 0)) {
9566 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9567 		             "xmlns:%s: Empty XML namespace is not allowed\n",
9568 			          attname, NULL, NULL);
9569 		    goto skip_ns;
9570 		} else {
9571 		    uri = xmlParseURI((const char *) URL);
9572 		    if (uri == NULL) {
9573 			xmlNsErr(ctxt, XML_WAR_NS_URI,
9574 			     "xmlns:%s: '%s' is not a valid URI\n",
9575 					   attname, URL, NULL);
9576 		    } else {
9577 			if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9578 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9579 				      "xmlns:%s: URI %s is not absolute\n",
9580 				      attname, URL, NULL);
9581 			}
9582 			xmlFreeURI(uri);
9583 		    }
9584 		}
9585 
9586 		/*
9587 		 * check that it's not a defined namespace
9588 		 */
9589 		for (j = 1;j <= nbNs;j++)
9590 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9591 			break;
9592 		if (j <= nbNs)
9593 		    xmlErrAttributeDup(ctxt, aprefix, attname);
9594 		else
9595 		    if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9596 skip_ns:
9597 		if ((attvalue != NULL) && (alloc != 0)) {
9598 		    xmlFree(attvalue);
9599 		    attvalue = NULL;
9600 		}
9601 		if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9602 		    break;
9603 		if (!IS_BLANK_CH(RAW)) {
9604 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9605 				   "attributes construct error\n");
9606 		    break;
9607 		}
9608 		SKIP_BLANKS;
9609 		if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9610 		    goto base_changed;
9611 		continue;
9612 	    }
9613 
9614 	    /*
9615 	     * Add the pair to atts
9616 	     */
9617 	    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9618 	        if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9619 		    if (attvalue[len] == 0)
9620 			xmlFree(attvalue);
9621 		    goto failed;
9622 		}
9623 	        maxatts = ctxt->maxatts;
9624 		atts = ctxt->atts;
9625 	    }
9626 	    ctxt->attallocs[nratts++] = alloc;
9627 	    atts[nbatts++] = attname;
9628 	    atts[nbatts++] = aprefix;
9629 	    atts[nbatts++] = NULL; /* the URI will be fetched later */
9630 	    atts[nbatts++] = attvalue;
9631 	    attvalue += len;
9632 	    atts[nbatts++] = attvalue;
9633 	    /*
9634 	     * tag if some deallocation is needed
9635 	     */
9636 	    if (alloc != 0) attval = 1;
9637 	} else {
9638 	    if ((attvalue != NULL) && (attvalue[len] == 0))
9639 		xmlFree(attvalue);
9640 	}
9641 
9642 failed:
9643 
9644 	GROW
9645         if (ctxt->instate == XML_PARSER_EOF)
9646             break;
9647 	if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9648 	    goto base_changed;
9649 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9650 	    break;
9651 	if (!IS_BLANK_CH(RAW)) {
9652 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9653 			   "attributes construct error\n");
9654 	    break;
9655 	}
9656 	SKIP_BLANKS;
9657         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9658             (attname == NULL) && (attvalue == NULL)) {
9659 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9660 	         "xmlParseStartTag: problem parsing attributes\n");
9661 	    break;
9662 	}
9663         GROW;
9664 	if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9665 	    goto base_changed;
9666     }
9667 
9668     /*
9669      * The attributes defaulting
9670      */
9671     if (ctxt->attsDefault != NULL) {
9672         xmlDefAttrsPtr defaults;
9673 
9674 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9675 	if (defaults != NULL) {
9676 	    for (i = 0;i < defaults->nbAttrs;i++) {
9677 	        attname = defaults->values[5 * i];
9678 		aprefix = defaults->values[5 * i + 1];
9679 
9680                 /*
9681 		 * special work for namespaces defaulted defs
9682 		 */
9683 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9684 		    /*
9685 		     * check that it's not a defined namespace
9686 		     */
9687 		    for (j = 1;j <= nbNs;j++)
9688 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9689 			    break;
9690 	            if (j <= nbNs) continue;
9691 
9692 		    nsname = xmlGetNamespace(ctxt, NULL);
9693 		    if (nsname != defaults->values[5 * i + 2]) {
9694 			if (nsPush(ctxt, NULL,
9695 			           defaults->values[5 * i + 2]) > 0)
9696 			    nbNs++;
9697 		    }
9698 		} else if (aprefix == ctxt->str_xmlns) {
9699 		    /*
9700 		     * check that it's not a defined namespace
9701 		     */
9702 		    for (j = 1;j <= nbNs;j++)
9703 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9704 			    break;
9705 	            if (j <= nbNs) continue;
9706 
9707 		    nsname = xmlGetNamespace(ctxt, attname);
9708 		    if (nsname != defaults->values[2]) {
9709 			if (nsPush(ctxt, attname,
9710 			           defaults->values[5 * i + 2]) > 0)
9711 			    nbNs++;
9712 		    }
9713 		} else {
9714 		    /*
9715 		     * check that it's not a defined attribute
9716 		     */
9717 		    for (j = 0;j < nbatts;j+=5) {
9718 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9719 			    break;
9720 		    }
9721 		    if (j < nbatts) continue;
9722 
9723 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9724 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9725 			    return(NULL);
9726 			}
9727 			maxatts = ctxt->maxatts;
9728 			atts = ctxt->atts;
9729 		    }
9730 		    atts[nbatts++] = attname;
9731 		    atts[nbatts++] = aprefix;
9732 		    if (aprefix == NULL)
9733 			atts[nbatts++] = NULL;
9734 		    else
9735 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9736 		    atts[nbatts++] = defaults->values[5 * i + 2];
9737 		    atts[nbatts++] = defaults->values[5 * i + 3];
9738 		    if ((ctxt->standalone == 1) &&
9739 		        (defaults->values[5 * i + 4] != NULL)) {
9740 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9741 	  "standalone: attribute %s on %s defaulted from external subset\n",
9742 	                                 attname, localname);
9743 		    }
9744 		    nbdef++;
9745 		}
9746 	    }
9747 	}
9748     }
9749 
9750     /*
9751      * The attributes checkings
9752      */
9753     for (i = 0; i < nbatts;i += 5) {
9754         /*
9755 	* The default namespace does not apply to attribute names.
9756 	*/
9757 	if (atts[i + 1] != NULL) {
9758 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9759 	    if (nsname == NULL) {
9760 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9761 		    "Namespace prefix %s for %s on %s is not defined\n",
9762 		    atts[i + 1], atts[i], localname);
9763 	    }
9764 	    atts[i + 2] = nsname;
9765 	} else
9766 	    nsname = NULL;
9767 	/*
9768 	 * [ WFC: Unique Att Spec ]
9769 	 * No attribute name may appear more than once in the same
9770 	 * start-tag or empty-element tag.
9771 	 * As extended by the Namespace in XML REC.
9772 	 */
9773         for (j = 0; j < i;j += 5) {
9774 	    if (atts[i] == atts[j]) {
9775 	        if (atts[i+1] == atts[j+1]) {
9776 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9777 		    break;
9778 		}
9779 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9780 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9781 			     "Namespaced Attribute %s in '%s' redefined\n",
9782 			     atts[i], nsname, NULL);
9783 		    break;
9784 		}
9785 	    }
9786 	}
9787     }
9788 
9789     nsname = xmlGetNamespace(ctxt, prefix);
9790     if ((prefix != NULL) && (nsname == NULL)) {
9791 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9792 	         "Namespace prefix %s on %s is not defined\n",
9793 		 prefix, localname, NULL);
9794     }
9795     *pref = prefix;
9796     *URI = nsname;
9797 
9798     /*
9799      * SAX: Start of Element !
9800      */
9801     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9802 	(!ctxt->disableSAX)) {
9803 	if (nbNs > 0)
9804 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9805 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9806 			  nbatts / 5, nbdef, atts);
9807 	else
9808 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9809 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9810     }
9811 
9812     /*
9813      * Free up attribute allocated strings if needed
9814      */
9815     if (attval != 0) {
9816 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9817 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9818 	        xmlFree((xmlChar *) atts[i]);
9819     }
9820 
9821     return(localname);
9822 
9823 base_changed:
9824     /*
9825      * the attribute strings are valid iif the base didn't changed
9826      */
9827     if (attval != 0) {
9828 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9829 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9830 	        xmlFree((xmlChar *) atts[i]);
9831     }
9832 
9833     /*
9834      * We can't switch from one entity to another in the middle
9835      * of a start tag
9836      */
9837     if (inputNr != ctxt->inputNr) {
9838         xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
9839 		    "Start tag doesn't start and stop in the same entity\n");
9840 	return(NULL);
9841     }
9842 
9843     ctxt->input->cur = ctxt->input->base + cur;
9844     ctxt->input->line = oldline;
9845     ctxt->input->col = oldcol;
9846     if (ctxt->wellFormed == 1) {
9847 	goto reparse;
9848     }
9849     return(NULL);
9850 }
9851 
9852 /**
9853  * xmlParseEndTag2:
9854  * @ctxt:  an XML parser context
9855  * @line:  line of the start tag
9856  * @nsNr:  number of namespaces on the start tag
9857  *
9858  * parse an end of tag
9859  *
9860  * [42] ETag ::= '</' Name S? '>'
9861  *
9862  * With namespace
9863  *
9864  * [NS 9] ETag ::= '</' QName S? '>'
9865  */
9866 
9867 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9868 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9869                 const xmlChar *URI, int line, int nsNr, int tlen) {
9870     const xmlChar *name;
9871     size_t curLength;
9872 
9873     GROW;
9874     if ((RAW != '<') || (NXT(1) != '/')) {
9875 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9876 	return;
9877     }
9878     SKIP(2);
9879 
9880     curLength = ctxt->input->end - ctxt->input->cur;
9881     if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9882         (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9883         if ((curLength >= (size_t)(tlen + 1)) &&
9884 	    (ctxt->input->cur[tlen] == '>')) {
9885 	    ctxt->input->cur += tlen + 1;
9886 	    ctxt->input->col += tlen + 1;
9887 	    goto done;
9888 	}
9889 	ctxt->input->cur += tlen;
9890 	ctxt->input->col += tlen;
9891 	name = (xmlChar*)1;
9892     } else {
9893 	if (prefix == NULL)
9894 	    name = xmlParseNameAndCompare(ctxt, ctxt->name);
9895 	else
9896 	    name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9897     }
9898 
9899     /*
9900      * We should definitely be at the ending "S? '>'" part
9901      */
9902     GROW;
9903     if (ctxt->instate == XML_PARSER_EOF)
9904         return;
9905     SKIP_BLANKS;
9906     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9907 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9908     } else
9909 	NEXT1;
9910 
9911     /*
9912      * [ WFC: Element Type Match ]
9913      * The Name in an element's end-tag must match the element type in the
9914      * start-tag.
9915      *
9916      */
9917     if (name != (xmlChar*)1) {
9918         if (name == NULL) name = BAD_CAST "unparseable";
9919         if ((line == 0) && (ctxt->node != NULL))
9920             line = ctxt->node->line;
9921         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9922 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9923 		                ctxt->name, line, name);
9924     }
9925 
9926     /*
9927      * SAX: End of Tag
9928      */
9929 done:
9930     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9931 	(!ctxt->disableSAX))
9932 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9933 
9934     spacePop(ctxt);
9935     if (nsNr != 0)
9936 	nsPop(ctxt, nsNr);
9937     return;
9938 }
9939 
9940 /**
9941  * xmlParseCDSect:
9942  * @ctxt:  an XML parser context
9943  *
9944  * Parse escaped pure raw content.
9945  *
9946  * [18] CDSect ::= CDStart CData CDEnd
9947  *
9948  * [19] CDStart ::= '<![CDATA['
9949  *
9950  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9951  *
9952  * [21] CDEnd ::= ']]>'
9953  */
9954 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9955 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9956     xmlChar *buf = NULL;
9957     int len = 0;
9958     int size = XML_PARSER_BUFFER_SIZE;
9959     int r, rl;
9960     int	s, sl;
9961     int cur, l;
9962     int count = 0;
9963 
9964     /* Check 2.6.0 was NXT(0) not RAW */
9965     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9966 	SKIP(9);
9967     } else
9968         return;
9969 
9970     ctxt->instate = XML_PARSER_CDATA_SECTION;
9971     r = CUR_CHAR(rl);
9972     if (!IS_CHAR(r)) {
9973 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9974 	ctxt->instate = XML_PARSER_CONTENT;
9975         return;
9976     }
9977     NEXTL(rl);
9978     s = CUR_CHAR(sl);
9979     if (!IS_CHAR(s)) {
9980 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9981 	ctxt->instate = XML_PARSER_CONTENT;
9982         return;
9983     }
9984     NEXTL(sl);
9985     cur = CUR_CHAR(l);
9986     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9987     if (buf == NULL) {
9988 	xmlErrMemory(ctxt, NULL);
9989 	return;
9990     }
9991     while (IS_CHAR(cur) &&
9992            ((r != ']') || (s != ']') || (cur != '>'))) {
9993 	if (len + 5 >= size) {
9994 	    xmlChar *tmp;
9995 
9996             if ((size > XML_MAX_TEXT_LENGTH) &&
9997                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9998                 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9999                              "CData section too big found", NULL);
10000                 xmlFree (buf);
10001                 return;
10002             }
10003 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
10004 	    if (tmp == NULL) {
10005 	        xmlFree(buf);
10006 		xmlErrMemory(ctxt, NULL);
10007 		return;
10008 	    }
10009 	    buf = tmp;
10010 	    size *= 2;
10011 	}
10012 	COPY_BUF(rl,buf,len,r);
10013 	r = s;
10014 	rl = sl;
10015 	s = cur;
10016 	sl = l;
10017 	count++;
10018 	if (count > 50) {
10019 	    GROW;
10020             if (ctxt->instate == XML_PARSER_EOF) {
10021 		xmlFree(buf);
10022 		return;
10023             }
10024 	    count = 0;
10025 	}
10026 	NEXTL(l);
10027 	cur = CUR_CHAR(l);
10028     }
10029     buf[len] = 0;
10030     ctxt->instate = XML_PARSER_CONTENT;
10031     if (cur != '>') {
10032 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10033 	                     "CData section not finished\n%.50s\n", buf);
10034 	xmlFree(buf);
10035         return;
10036     }
10037     NEXTL(l);
10038 
10039     /*
10040      * OK the buffer is to be consumed as cdata.
10041      */
10042     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10043 	if (ctxt->sax->cdataBlock != NULL)
10044 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
10045 	else if (ctxt->sax->characters != NULL)
10046 	    ctxt->sax->characters(ctxt->userData, buf, len);
10047     }
10048     xmlFree(buf);
10049 }
10050 
10051 /**
10052  * xmlParseContent:
10053  * @ctxt:  an XML parser context
10054  *
10055  * Parse a content:
10056  *
10057  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10058  */
10059 
10060 void
xmlParseContent(xmlParserCtxtPtr ctxt)10061 xmlParseContent(xmlParserCtxtPtr ctxt) {
10062     GROW;
10063     while ((RAW != 0) &&
10064 	   ((RAW != '<') || (NXT(1) != '/')) &&
10065 	   (ctxt->instate != XML_PARSER_EOF)) {
10066 	const xmlChar *test = CUR_PTR;
10067 	unsigned int cons = ctxt->input->consumed;
10068 	const xmlChar *cur = ctxt->input->cur;
10069 
10070 	/*
10071 	 * First case : a Processing Instruction.
10072 	 */
10073 	if ((*cur == '<') && (cur[1] == '?')) {
10074 	    xmlParsePI(ctxt);
10075 	}
10076 
10077 	/*
10078 	 * Second case : a CDSection
10079 	 */
10080 	/* 2.6.0 test was *cur not RAW */
10081 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
10082 	    xmlParseCDSect(ctxt);
10083 	}
10084 
10085 	/*
10086 	 * Third case :  a comment
10087 	 */
10088 	else if ((*cur == '<') && (NXT(1) == '!') &&
10089 		 (NXT(2) == '-') && (NXT(3) == '-')) {
10090 	    xmlParseComment(ctxt);
10091 	    ctxt->instate = XML_PARSER_CONTENT;
10092 	}
10093 
10094 	/*
10095 	 * Fourth case :  a sub-element.
10096 	 */
10097 	else if (*cur == '<') {
10098 	    xmlParseElement(ctxt);
10099 	}
10100 
10101 	/*
10102 	 * Fifth case : a reference. If if has not been resolved,
10103 	 *    parsing returns it's Name, create the node
10104 	 */
10105 
10106 	else if (*cur == '&') {
10107 	    xmlParseReference(ctxt);
10108 	}
10109 
10110 	/*
10111 	 * Last case, text. Note that References are handled directly.
10112 	 */
10113 	else {
10114 	    xmlParseCharData(ctxt, 0);
10115 	}
10116 
10117 	GROW;
10118 	/*
10119 	 * Pop-up of finished entities.
10120 	 */
10121 	while ((RAW == 0) && (ctxt->inputNr > 1))
10122 	    xmlPopInput(ctxt);
10123 	SHRINK;
10124 
10125 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10126 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10127 	                "detected an error in element content\n");
10128 	    xmlHaltParser(ctxt);
10129             break;
10130 	}
10131     }
10132 }
10133 
10134 /**
10135  * xmlParseElement:
10136  * @ctxt:  an XML parser context
10137  *
10138  * parse an XML element, this is highly recursive
10139  *
10140  * [39] element ::= EmptyElemTag | STag content ETag
10141  *
10142  * [ WFC: Element Type Match ]
10143  * The Name in an element's end-tag must match the element type in the
10144  * start-tag.
10145  *
10146  */
10147 
10148 void
xmlParseElement(xmlParserCtxtPtr ctxt)10149 xmlParseElement(xmlParserCtxtPtr ctxt) {
10150     const xmlChar *name;
10151     const xmlChar *prefix = NULL;
10152     const xmlChar *URI = NULL;
10153     xmlParserNodeInfo node_info;
10154     int line, tlen = 0;
10155     xmlNodePtr ret;
10156     int nsNr = ctxt->nsNr;
10157 
10158     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10159         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10160 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10161 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10162 			  xmlParserMaxDepth);
10163 	xmlHaltParser(ctxt);
10164 	return;
10165     }
10166 
10167     /* Capture start position */
10168     if (ctxt->record_info) {
10169         node_info.begin_pos = ctxt->input->consumed +
10170                           (CUR_PTR - ctxt->input->base);
10171 	node_info.begin_line = ctxt->input->line;
10172     }
10173 
10174     if (ctxt->spaceNr == 0)
10175 	spacePush(ctxt, -1);
10176     else if (*ctxt->space == -2)
10177 	spacePush(ctxt, -1);
10178     else
10179 	spacePush(ctxt, *ctxt->space);
10180 
10181     line = ctxt->input->line;
10182 #ifdef LIBXML_SAX1_ENABLED
10183     if (ctxt->sax2)
10184 #endif /* LIBXML_SAX1_ENABLED */
10185         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10186 #ifdef LIBXML_SAX1_ENABLED
10187     else
10188 	name = xmlParseStartTag(ctxt);
10189 #endif /* LIBXML_SAX1_ENABLED */
10190     if (ctxt->instate == XML_PARSER_EOF)
10191 	return;
10192     if (name == NULL) {
10193 	spacePop(ctxt);
10194         return;
10195     }
10196     namePush(ctxt, name);
10197     ret = ctxt->node;
10198 
10199 #ifdef LIBXML_VALID_ENABLED
10200     /*
10201      * [ VC: Root Element Type ]
10202      * The Name in the document type declaration must match the element
10203      * type of the root element.
10204      */
10205     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10206         ctxt->node && (ctxt->node == ctxt->myDoc->children))
10207         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10208 #endif /* LIBXML_VALID_ENABLED */
10209 
10210     /*
10211      * Check for an Empty Element.
10212      */
10213     if ((RAW == '/') && (NXT(1) == '>')) {
10214         SKIP(2);
10215 	if (ctxt->sax2) {
10216 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10217 		(!ctxt->disableSAX))
10218 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10219 #ifdef LIBXML_SAX1_ENABLED
10220 	} else {
10221 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10222 		(!ctxt->disableSAX))
10223 		ctxt->sax->endElement(ctxt->userData, name);
10224 #endif /* LIBXML_SAX1_ENABLED */
10225 	}
10226 	namePop(ctxt);
10227 	spacePop(ctxt);
10228 	if (nsNr != ctxt->nsNr)
10229 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10230 	if ( ret != NULL && ctxt->record_info ) {
10231 	   node_info.end_pos = ctxt->input->consumed +
10232 			      (CUR_PTR - ctxt->input->base);
10233 	   node_info.end_line = ctxt->input->line;
10234 	   node_info.node = ret;
10235 	   xmlParserAddNodeInfo(ctxt, &node_info);
10236 	}
10237 	return;
10238     }
10239     if (RAW == '>') {
10240         NEXT1;
10241     } else {
10242         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10243 		     "Couldn't find end of Start Tag %s line %d\n",
10244 		                name, line, NULL);
10245 
10246 	/*
10247 	 * end of parsing of this node.
10248 	 */
10249 	nodePop(ctxt);
10250 	namePop(ctxt);
10251 	spacePop(ctxt);
10252 	if (nsNr != ctxt->nsNr)
10253 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10254 
10255 	/*
10256 	 * Capture end position and add node
10257 	 */
10258 	if ( ret != NULL && ctxt->record_info ) {
10259 	   node_info.end_pos = ctxt->input->consumed +
10260 			      (CUR_PTR - ctxt->input->base);
10261 	   node_info.end_line = ctxt->input->line;
10262 	   node_info.node = ret;
10263 	   xmlParserAddNodeInfo(ctxt, &node_info);
10264 	}
10265 	return;
10266     }
10267 
10268     /*
10269      * Parse the content of the element:
10270      */
10271     xmlParseContent(ctxt);
10272     if (ctxt->instate == XML_PARSER_EOF)
10273 	return;
10274     if (!IS_BYTE_CHAR(RAW)) {
10275         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10276 	 "Premature end of data in tag %s line %d\n",
10277 		                name, line, NULL);
10278 
10279 	/*
10280 	 * end of parsing of this node.
10281 	 */
10282 	nodePop(ctxt);
10283 	namePop(ctxt);
10284 	spacePop(ctxt);
10285 	if (nsNr != ctxt->nsNr)
10286 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10287 	return;
10288     }
10289 
10290     /*
10291      * parse the end of tag: '</' should be here.
10292      */
10293     if (ctxt->sax2) {
10294 	xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10295 	namePop(ctxt);
10296     }
10297 #ifdef LIBXML_SAX1_ENABLED
10298       else
10299 	xmlParseEndTag1(ctxt, line);
10300 #endif /* LIBXML_SAX1_ENABLED */
10301 
10302     /*
10303      * Capture end position and add node
10304      */
10305     if ( ret != NULL && ctxt->record_info ) {
10306        node_info.end_pos = ctxt->input->consumed +
10307                           (CUR_PTR - ctxt->input->base);
10308        node_info.end_line = ctxt->input->line;
10309        node_info.node = ret;
10310        xmlParserAddNodeInfo(ctxt, &node_info);
10311     }
10312 }
10313 
10314 /**
10315  * xmlParseVersionNum:
10316  * @ctxt:  an XML parser context
10317  *
10318  * parse the XML version value.
10319  *
10320  * [26] VersionNum ::= '1.' [0-9]+
10321  *
10322  * In practice allow [0-9].[0-9]+ at that level
10323  *
10324  * Returns the string giving the XML version number, or NULL
10325  */
10326 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10327 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10328     xmlChar *buf = NULL;
10329     int len = 0;
10330     int size = 10;
10331     xmlChar cur;
10332 
10333     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10334     if (buf == NULL) {
10335 	xmlErrMemory(ctxt, NULL);
10336 	return(NULL);
10337     }
10338     cur = CUR;
10339     if (!((cur >= '0') && (cur <= '9'))) {
10340 	xmlFree(buf);
10341 	return(NULL);
10342     }
10343     buf[len++] = cur;
10344     NEXT;
10345     cur=CUR;
10346     if (cur != '.') {
10347 	xmlFree(buf);
10348 	return(NULL);
10349     }
10350     buf[len++] = cur;
10351     NEXT;
10352     cur=CUR;
10353     while ((cur >= '0') && (cur <= '9')) {
10354 	if (len + 1 >= size) {
10355 	    xmlChar *tmp;
10356 
10357 	    size *= 2;
10358 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10359 	    if (tmp == NULL) {
10360 	        xmlFree(buf);
10361 		xmlErrMemory(ctxt, NULL);
10362 		return(NULL);
10363 	    }
10364 	    buf = tmp;
10365 	}
10366 	buf[len++] = cur;
10367 	NEXT;
10368 	cur=CUR;
10369     }
10370     buf[len] = 0;
10371     return(buf);
10372 }
10373 
10374 /**
10375  * xmlParseVersionInfo:
10376  * @ctxt:  an XML parser context
10377  *
10378  * parse the XML version.
10379  *
10380  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10381  *
10382  * [25] Eq ::= S? '=' S?
10383  *
10384  * Returns the version string, e.g. "1.0"
10385  */
10386 
10387 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10388 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10389     xmlChar *version = NULL;
10390 
10391     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10392 	SKIP(7);
10393 	SKIP_BLANKS;
10394 	if (RAW != '=') {
10395 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10396 	    return(NULL);
10397         }
10398 	NEXT;
10399 	SKIP_BLANKS;
10400 	if (RAW == '"') {
10401 	    NEXT;
10402 	    version = xmlParseVersionNum(ctxt);
10403 	    if (RAW != '"') {
10404 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10405 	    } else
10406 	        NEXT;
10407 	} else if (RAW == '\''){
10408 	    NEXT;
10409 	    version = xmlParseVersionNum(ctxt);
10410 	    if (RAW != '\'') {
10411 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10412 	    } else
10413 	        NEXT;
10414 	} else {
10415 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10416 	}
10417     }
10418     return(version);
10419 }
10420 
10421 /**
10422  * xmlParseEncName:
10423  * @ctxt:  an XML parser context
10424  *
10425  * parse the XML encoding name
10426  *
10427  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10428  *
10429  * Returns the encoding name value or NULL
10430  */
10431 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10432 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10433     xmlChar *buf = NULL;
10434     int len = 0;
10435     int size = 10;
10436     xmlChar cur;
10437 
10438     cur = CUR;
10439     if (((cur >= 'a') && (cur <= 'z')) ||
10440         ((cur >= 'A') && (cur <= 'Z'))) {
10441 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10442 	if (buf == NULL) {
10443 	    xmlErrMemory(ctxt, NULL);
10444 	    return(NULL);
10445 	}
10446 
10447 	buf[len++] = cur;
10448 	NEXT;
10449 	cur = CUR;
10450 	while (((cur >= 'a') && (cur <= 'z')) ||
10451 	       ((cur >= 'A') && (cur <= 'Z')) ||
10452 	       ((cur >= '0') && (cur <= '9')) ||
10453 	       (cur == '.') || (cur == '_') ||
10454 	       (cur == '-')) {
10455 	    if (len + 1 >= size) {
10456 	        xmlChar *tmp;
10457 
10458 		size *= 2;
10459 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10460 		if (tmp == NULL) {
10461 		    xmlErrMemory(ctxt, NULL);
10462 		    xmlFree(buf);
10463 		    return(NULL);
10464 		}
10465 		buf = tmp;
10466 	    }
10467 	    buf[len++] = cur;
10468 	    NEXT;
10469 	    cur = CUR;
10470 	    if (cur == 0) {
10471 	        SHRINK;
10472 		GROW;
10473 		cur = CUR;
10474 	    }
10475         }
10476 	buf[len] = 0;
10477     } else {
10478 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10479     }
10480     return(buf);
10481 }
10482 
10483 /**
10484  * xmlParseEncodingDecl:
10485  * @ctxt:  an XML parser context
10486  *
10487  * parse the XML encoding declaration
10488  *
10489  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10490  *
10491  * this setups the conversion filters.
10492  *
10493  * Returns the encoding value or NULL
10494  */
10495 
10496 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10497 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10498     xmlChar *encoding = NULL;
10499 
10500     SKIP_BLANKS;
10501     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10502 	SKIP(8);
10503 	SKIP_BLANKS;
10504 	if (RAW != '=') {
10505 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10506 	    return(NULL);
10507         }
10508 	NEXT;
10509 	SKIP_BLANKS;
10510 	if (RAW == '"') {
10511 	    NEXT;
10512 	    encoding = xmlParseEncName(ctxt);
10513 	    if (RAW != '"') {
10514 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10515 		xmlFree((xmlChar *) encoding);
10516 		return(NULL);
10517 	    } else
10518 	        NEXT;
10519 	} else if (RAW == '\''){
10520 	    NEXT;
10521 	    encoding = xmlParseEncName(ctxt);
10522 	    if (RAW != '\'') {
10523 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10524 		xmlFree((xmlChar *) encoding);
10525 		return(NULL);
10526 	    } else
10527 	        NEXT;
10528 	} else {
10529 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10530 	}
10531 
10532         /*
10533          * Non standard parsing, allowing the user to ignore encoding
10534          */
10535         if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10536 	    xmlFree((xmlChar *) encoding);
10537             return(NULL);
10538 	}
10539 
10540 	/*
10541 	 * UTF-16 encoding stwich has already taken place at this stage,
10542 	 * more over the little-endian/big-endian selection is already done
10543 	 */
10544         if ((encoding != NULL) &&
10545 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10546 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10547 	    /*
10548 	     * If no encoding was passed to the parser, that we are
10549 	     * using UTF-16 and no decoder is present i.e. the
10550 	     * document is apparently UTF-8 compatible, then raise an
10551 	     * encoding mismatch fatal error
10552 	     */
10553 	    if ((ctxt->encoding == NULL) &&
10554 	        (ctxt->input->buf != NULL) &&
10555 	        (ctxt->input->buf->encoder == NULL)) {
10556 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10557 		  "Document labelled UTF-16 but has UTF-8 content\n");
10558 	    }
10559 	    if (ctxt->encoding != NULL)
10560 		xmlFree((xmlChar *) ctxt->encoding);
10561 	    ctxt->encoding = encoding;
10562 	}
10563 	/*
10564 	 * UTF-8 encoding is handled natively
10565 	 */
10566         else if ((encoding != NULL) &&
10567 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10568 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10569 	    if (ctxt->encoding != NULL)
10570 		xmlFree((xmlChar *) ctxt->encoding);
10571 	    ctxt->encoding = encoding;
10572 	}
10573 	else if (encoding != NULL) {
10574 	    xmlCharEncodingHandlerPtr handler;
10575 
10576 	    if (ctxt->input->encoding != NULL)
10577 		xmlFree((xmlChar *) ctxt->input->encoding);
10578 	    ctxt->input->encoding = encoding;
10579 
10580             handler = xmlFindCharEncodingHandler((const char *) encoding);
10581 	    if (handler != NULL) {
10582 		if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10583 		    /* failed to convert */
10584 		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10585 		    return(NULL);
10586 		}
10587 	    } else {
10588 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10589 			"Unsupported encoding %s\n", encoding);
10590 		return(NULL);
10591 	    }
10592 	}
10593     }
10594     return(encoding);
10595 }
10596 
10597 /**
10598  * xmlParseSDDecl:
10599  * @ctxt:  an XML parser context
10600  *
10601  * parse the XML standalone declaration
10602  *
10603  * [32] SDDecl ::= S 'standalone' Eq
10604  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10605  *
10606  * [ VC: Standalone Document Declaration ]
10607  * TODO The standalone document declaration must have the value "no"
10608  * if any external markup declarations contain declarations of:
10609  *  - attributes with default values, if elements to which these
10610  *    attributes apply appear in the document without specifications
10611  *    of values for these attributes, or
10612  *  - entities (other than amp, lt, gt, apos, quot), if references
10613  *    to those entities appear in the document, or
10614  *  - attributes with values subject to normalization, where the
10615  *    attribute appears in the document with a value which will change
10616  *    as a result of normalization, or
10617  *  - element types with element content, if white space occurs directly
10618  *    within any instance of those types.
10619  *
10620  * Returns:
10621  *   1 if standalone="yes"
10622  *   0 if standalone="no"
10623  *  -2 if standalone attribute is missing or invalid
10624  *	  (A standalone value of -2 means that the XML declaration was found,
10625  *	   but no value was specified for the standalone attribute).
10626  */
10627 
10628 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10629 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10630     int standalone = -2;
10631 
10632     SKIP_BLANKS;
10633     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10634 	SKIP(10);
10635         SKIP_BLANKS;
10636 	if (RAW != '=') {
10637 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10638 	    return(standalone);
10639         }
10640 	NEXT;
10641 	SKIP_BLANKS;
10642         if (RAW == '\''){
10643 	    NEXT;
10644 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10645 	        standalone = 0;
10646                 SKIP(2);
10647 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10648 	               (NXT(2) == 's')) {
10649 	        standalone = 1;
10650 		SKIP(3);
10651             } else {
10652 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10653 	    }
10654 	    if (RAW != '\'') {
10655 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10656 	    } else
10657 	        NEXT;
10658 	} else if (RAW == '"'){
10659 	    NEXT;
10660 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10661 	        standalone = 0;
10662 		SKIP(2);
10663 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10664 	               (NXT(2) == 's')) {
10665 	        standalone = 1;
10666                 SKIP(3);
10667             } else {
10668 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10669 	    }
10670 	    if (RAW != '"') {
10671 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10672 	    } else
10673 	        NEXT;
10674 	} else {
10675 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10676         }
10677     }
10678     return(standalone);
10679 }
10680 
10681 /**
10682  * xmlParseXMLDecl:
10683  * @ctxt:  an XML parser context
10684  *
10685  * parse an XML declaration header
10686  *
10687  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10688  */
10689 
10690 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10691 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10692     xmlChar *version;
10693 
10694     /*
10695      * This value for standalone indicates that the document has an
10696      * XML declaration but it does not have a standalone attribute.
10697      * It will be overwritten later if a standalone attribute is found.
10698      */
10699     ctxt->input->standalone = -2;
10700 
10701     /*
10702      * We know that '<?xml' is here.
10703      */
10704     SKIP(5);
10705 
10706     if (!IS_BLANK_CH(RAW)) {
10707 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10708 	               "Blank needed after '<?xml'\n");
10709     }
10710     SKIP_BLANKS;
10711 
10712     /*
10713      * We must have the VersionInfo here.
10714      */
10715     version = xmlParseVersionInfo(ctxt);
10716     if (version == NULL) {
10717 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10718     } else {
10719 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10720 	    /*
10721 	     * Changed here for XML-1.0 5th edition
10722 	     */
10723 	    if (ctxt->options & XML_PARSE_OLD10) {
10724 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10725 			          "Unsupported version '%s'\n",
10726 			          version);
10727 	    } else {
10728 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10729 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10730 		                  "Unsupported version '%s'\n",
10731 				  version, NULL);
10732 		} else {
10733 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10734 				      "Unsupported version '%s'\n",
10735 				      version);
10736 		}
10737 	    }
10738 	}
10739 	if (ctxt->version != NULL)
10740 	    xmlFree((void *) ctxt->version);
10741 	ctxt->version = version;
10742     }
10743 
10744     /*
10745      * We may have the encoding declaration
10746      */
10747     if (!IS_BLANK_CH(RAW)) {
10748         if ((RAW == '?') && (NXT(1) == '>')) {
10749 	    SKIP(2);
10750 	    return;
10751 	}
10752 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10753     }
10754     xmlParseEncodingDecl(ctxt);
10755     if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10756          (ctxt->instate == XML_PARSER_EOF)) {
10757 	/*
10758 	 * The XML REC instructs us to stop parsing right here
10759 	 */
10760         return;
10761     }
10762 
10763     /*
10764      * We may have the standalone status.
10765      */
10766     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10767         if ((RAW == '?') && (NXT(1) == '>')) {
10768 	    SKIP(2);
10769 	    return;
10770 	}
10771 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10772     }
10773 
10774     /*
10775      * We can grow the input buffer freely at that point
10776      */
10777     GROW;
10778 
10779     SKIP_BLANKS;
10780     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10781 
10782     SKIP_BLANKS;
10783     if ((RAW == '?') && (NXT(1) == '>')) {
10784         SKIP(2);
10785     } else if (RAW == '>') {
10786         /* Deprecated old WD ... */
10787 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10788 	NEXT;
10789     } else {
10790 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10791 	MOVETO_ENDTAG(CUR_PTR);
10792 	NEXT;
10793     }
10794 }
10795 
10796 /**
10797  * xmlParseMisc:
10798  * @ctxt:  an XML parser context
10799  *
10800  * parse an XML Misc* optional field.
10801  *
10802  * [27] Misc ::= Comment | PI |  S
10803  */
10804 
10805 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10806 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10807     while ((ctxt->instate != XML_PARSER_EOF) &&
10808            (((RAW == '<') && (NXT(1) == '?')) ||
10809             (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10810             IS_BLANK_CH(CUR))) {
10811         if ((RAW == '<') && (NXT(1) == '?')) {
10812 	    xmlParsePI(ctxt);
10813 	} else if (IS_BLANK_CH(CUR)) {
10814 	    NEXT;
10815 	} else
10816 	    xmlParseComment(ctxt);
10817     }
10818 }
10819 
10820 /**
10821  * xmlParseDocument:
10822  * @ctxt:  an XML parser context
10823  *
10824  * parse an XML document (and build a tree if using the standard SAX
10825  * interface).
10826  *
10827  * [1] document ::= prolog element Misc*
10828  *
10829  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10830  *
10831  * Returns 0, -1 in case of error. the parser context is augmented
10832  *                as a result of the parsing.
10833  */
10834 
10835 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10836 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10837     xmlChar start[4];
10838     xmlCharEncoding enc;
10839 
10840     xmlInitParser();
10841 
10842     if ((ctxt == NULL) || (ctxt->input == NULL))
10843         return(-1);
10844 
10845     GROW;
10846 
10847     /*
10848      * SAX: detecting the level.
10849      */
10850     xmlDetectSAX2(ctxt);
10851 
10852     /*
10853      * SAX: beginning of the document processing.
10854      */
10855     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10856         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10857     if (ctxt->instate == XML_PARSER_EOF)
10858 	return(-1);
10859 
10860     if ((ctxt->encoding == NULL) &&
10861         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10862 	/*
10863 	 * Get the 4 first bytes and decode the charset
10864 	 * if enc != XML_CHAR_ENCODING_NONE
10865 	 * plug some encoding conversion routines.
10866 	 */
10867 	start[0] = RAW;
10868 	start[1] = NXT(1);
10869 	start[2] = NXT(2);
10870 	start[3] = NXT(3);
10871 	enc = xmlDetectCharEncoding(&start[0], 4);
10872 	if (enc != XML_CHAR_ENCODING_NONE) {
10873 	    xmlSwitchEncoding(ctxt, enc);
10874 	}
10875     }
10876 
10877 
10878     if (CUR == 0) {
10879 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10880 	return(-1);
10881     }
10882 
10883     /*
10884      * Check for the XMLDecl in the Prolog.
10885      * do not GROW here to avoid the detected encoder to decode more
10886      * than just the first line, unless the amount of data is really
10887      * too small to hold "<?xml version="1.0" encoding="foo"
10888      */
10889     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10890        GROW;
10891     }
10892     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10893 
10894 	/*
10895 	 * Note that we will switch encoding on the fly.
10896 	 */
10897 	xmlParseXMLDecl(ctxt);
10898 	if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10899 	    (ctxt->instate == XML_PARSER_EOF)) {
10900 	    /*
10901 	     * The XML REC instructs us to stop parsing right here
10902 	     */
10903 	    return(-1);
10904 	}
10905 	ctxt->standalone = ctxt->input->standalone;
10906 	SKIP_BLANKS;
10907     } else {
10908 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10909     }
10910     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10911         ctxt->sax->startDocument(ctxt->userData);
10912     if (ctxt->instate == XML_PARSER_EOF)
10913 	return(-1);
10914     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10915         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10916 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10917     }
10918 
10919     /*
10920      * The Misc part of the Prolog
10921      */
10922     GROW;
10923     xmlParseMisc(ctxt);
10924 
10925     /*
10926      * Then possibly doc type declaration(s) and more Misc
10927      * (doctypedecl Misc*)?
10928      */
10929     GROW;
10930     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10931 
10932 	ctxt->inSubset = 1;
10933 	xmlParseDocTypeDecl(ctxt);
10934 	if (RAW == '[') {
10935 	    ctxt->instate = XML_PARSER_DTD;
10936 	    xmlParseInternalSubset(ctxt);
10937 	    if (ctxt->instate == XML_PARSER_EOF)
10938 		return(-1);
10939 	}
10940 
10941 	/*
10942 	 * Create and update the external subset.
10943 	 */
10944 	ctxt->inSubset = 2;
10945 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10946 	    (!ctxt->disableSAX))
10947 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10948 	                              ctxt->extSubSystem, ctxt->extSubURI);
10949 	if (ctxt->instate == XML_PARSER_EOF)
10950 	    return(-1);
10951 	ctxt->inSubset = 0;
10952 
10953         xmlCleanSpecialAttr(ctxt);
10954 
10955 	ctxt->instate = XML_PARSER_PROLOG;
10956 	xmlParseMisc(ctxt);
10957     }
10958 
10959     /*
10960      * Time to start parsing the tree itself
10961      */
10962     GROW;
10963     if (RAW != '<') {
10964 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10965 		       "Start tag expected, '<' not found\n");
10966     } else {
10967 	ctxt->instate = XML_PARSER_CONTENT;
10968 	xmlParseElement(ctxt);
10969 	ctxt->instate = XML_PARSER_EPILOG;
10970 
10971 
10972 	/*
10973 	 * The Misc part at the end
10974 	 */
10975 	xmlParseMisc(ctxt);
10976 
10977 	if (RAW != 0) {
10978 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10979 	}
10980 	ctxt->instate = XML_PARSER_EOF;
10981     }
10982 
10983     /*
10984      * SAX: end of the document processing.
10985      */
10986     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10987         ctxt->sax->endDocument(ctxt->userData);
10988 
10989     /*
10990      * Remove locally kept entity definitions if the tree was not built
10991      */
10992     if ((ctxt->myDoc != NULL) &&
10993 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10994 	xmlFreeDoc(ctxt->myDoc);
10995 	ctxt->myDoc = NULL;
10996     }
10997 
10998     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10999         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
11000 	if (ctxt->valid)
11001 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
11002 	if (ctxt->nsWellFormed)
11003 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
11004 	if (ctxt->options & XML_PARSE_OLD10)
11005 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
11006     }
11007     if (! ctxt->wellFormed) {
11008 	ctxt->valid = 0;
11009 	return(-1);
11010     }
11011     return(0);
11012 }
11013 
11014 /**
11015  * xmlParseExtParsedEnt:
11016  * @ctxt:  an XML parser context
11017  *
11018  * parse a general parsed entity
11019  * An external general parsed entity is well-formed if it matches the
11020  * production labeled extParsedEnt.
11021  *
11022  * [78] extParsedEnt ::= TextDecl? content
11023  *
11024  * Returns 0, -1 in case of error. the parser context is augmented
11025  *                as a result of the parsing.
11026  */
11027 
11028 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)11029 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11030     xmlChar start[4];
11031     xmlCharEncoding enc;
11032 
11033     if ((ctxt == NULL) || (ctxt->input == NULL))
11034         return(-1);
11035 
11036     xmlDefaultSAXHandlerInit();
11037 
11038     xmlDetectSAX2(ctxt);
11039 
11040     GROW;
11041 
11042     /*
11043      * SAX: beginning of the document processing.
11044      */
11045     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11046         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11047 
11048     /*
11049      * Get the 4 first bytes and decode the charset
11050      * if enc != XML_CHAR_ENCODING_NONE
11051      * plug some encoding conversion routines.
11052      */
11053     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11054 	start[0] = RAW;
11055 	start[1] = NXT(1);
11056 	start[2] = NXT(2);
11057 	start[3] = NXT(3);
11058 	enc = xmlDetectCharEncoding(start, 4);
11059 	if (enc != XML_CHAR_ENCODING_NONE) {
11060 	    xmlSwitchEncoding(ctxt, enc);
11061 	}
11062     }
11063 
11064 
11065     if (CUR == 0) {
11066 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11067     }
11068 
11069     /*
11070      * Check for the XMLDecl in the Prolog.
11071      */
11072     GROW;
11073     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11074 
11075 	/*
11076 	 * Note that we will switch encoding on the fly.
11077 	 */
11078 	xmlParseXMLDecl(ctxt);
11079 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11080 	    /*
11081 	     * The XML REC instructs us to stop parsing right here
11082 	     */
11083 	    return(-1);
11084 	}
11085 	SKIP_BLANKS;
11086     } else {
11087 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11088     }
11089     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11090         ctxt->sax->startDocument(ctxt->userData);
11091     if (ctxt->instate == XML_PARSER_EOF)
11092 	return(-1);
11093 
11094     /*
11095      * Doing validity checking on chunk doesn't make sense
11096      */
11097     ctxt->instate = XML_PARSER_CONTENT;
11098     ctxt->validate = 0;
11099     ctxt->loadsubset = 0;
11100     ctxt->depth = 0;
11101 
11102     xmlParseContent(ctxt);
11103     if (ctxt->instate == XML_PARSER_EOF)
11104 	return(-1);
11105 
11106     if ((RAW == '<') && (NXT(1) == '/')) {
11107 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11108     } else if (RAW != 0) {
11109 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11110     }
11111 
11112     /*
11113      * SAX: end of the document processing.
11114      */
11115     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11116         ctxt->sax->endDocument(ctxt->userData);
11117 
11118     if (! ctxt->wellFormed) return(-1);
11119     return(0);
11120 }
11121 
11122 #ifdef LIBXML_PUSH_ENABLED
11123 /************************************************************************
11124  *									*
11125  *		Progressive parsing interfaces				*
11126  *									*
11127  ************************************************************************/
11128 
11129 /**
11130  * xmlParseLookupSequence:
11131  * @ctxt:  an XML parser context
11132  * @first:  the first char to lookup
11133  * @next:  the next char to lookup or zero
11134  * @third:  the next char to lookup or zero
11135  *
11136  * Try to find if a sequence (first, next, third) or  just (first next) or
11137  * (first) is available in the input stream.
11138  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11139  * to avoid rescanning sequences of bytes, it DOES change the state of the
11140  * parser, do not use liberally.
11141  *
11142  * Returns the index to the current parsing point if the full sequence
11143  *      is available, -1 otherwise.
11144  */
11145 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11146 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11147                        xmlChar next, xmlChar third) {
11148     int base, len;
11149     xmlParserInputPtr in;
11150     const xmlChar *buf;
11151 
11152     in = ctxt->input;
11153     if (in == NULL) return(-1);
11154     base = in->cur - in->base;
11155     if (base < 0) return(-1);
11156     if (ctxt->checkIndex > base)
11157         base = ctxt->checkIndex;
11158     if (in->buf == NULL) {
11159 	buf = in->base;
11160 	len = in->length;
11161     } else {
11162 	buf = xmlBufContent(in->buf->buffer);
11163 	len = xmlBufUse(in->buf->buffer);
11164     }
11165     /* take into account the sequence length */
11166     if (third) len -= 2;
11167     else if (next) len --;
11168     for (;base < len;base++) {
11169         if (buf[base] == first) {
11170 	    if (third != 0) {
11171 		if ((buf[base + 1] != next) ||
11172 		    (buf[base + 2] != third)) continue;
11173 	    } else if (next != 0) {
11174 		if (buf[base + 1] != next) continue;
11175 	    }
11176 	    ctxt->checkIndex = 0;
11177 #ifdef DEBUG_PUSH
11178 	    if (next == 0)
11179 		xmlGenericError(xmlGenericErrorContext,
11180 			"PP: lookup '%c' found at %d\n",
11181 			first, base);
11182 	    else if (third == 0)
11183 		xmlGenericError(xmlGenericErrorContext,
11184 			"PP: lookup '%c%c' found at %d\n",
11185 			first, next, base);
11186 	    else
11187 		xmlGenericError(xmlGenericErrorContext,
11188 			"PP: lookup '%c%c%c' found at %d\n",
11189 			first, next, third, base);
11190 #endif
11191 	    return(base - (in->cur - in->base));
11192 	}
11193     }
11194     ctxt->checkIndex = base;
11195 #ifdef DEBUG_PUSH
11196     if (next == 0)
11197 	xmlGenericError(xmlGenericErrorContext,
11198 		"PP: lookup '%c' failed\n", first);
11199     else if (third == 0)
11200 	xmlGenericError(xmlGenericErrorContext,
11201 		"PP: lookup '%c%c' failed\n", first, next);
11202     else
11203 	xmlGenericError(xmlGenericErrorContext,
11204 		"PP: lookup '%c%c%c' failed\n", first, next, third);
11205 #endif
11206     return(-1);
11207 }
11208 
11209 /**
11210  * xmlParseGetLasts:
11211  * @ctxt:  an XML parser context
11212  * @lastlt:  pointer to store the last '<' from the input
11213  * @lastgt:  pointer to store the last '>' from the input
11214  *
11215  * Lookup the last < and > in the current chunk
11216  */
11217 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11218 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11219                  const xmlChar **lastgt) {
11220     const xmlChar *tmp;
11221 
11222     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11223 	xmlGenericError(xmlGenericErrorContext,
11224 		    "Internal error: xmlParseGetLasts\n");
11225 	return;
11226     }
11227     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11228         tmp = ctxt->input->end;
11229 	tmp--;
11230 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11231 	if (tmp < ctxt->input->base) {
11232 	    *lastlt = NULL;
11233 	    *lastgt = NULL;
11234 	} else {
11235 	    *lastlt = tmp;
11236 	    tmp++;
11237 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11238 	        if (*tmp == '\'') {
11239 		    tmp++;
11240 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11241 		    if (tmp < ctxt->input->end) tmp++;
11242 		} else if (*tmp == '"') {
11243 		    tmp++;
11244 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11245 		    if (tmp < ctxt->input->end) tmp++;
11246 		} else
11247 		    tmp++;
11248 	    }
11249 	    if (tmp < ctxt->input->end)
11250 	        *lastgt = tmp;
11251 	    else {
11252 	        tmp = *lastlt;
11253 		tmp--;
11254 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11255 		if (tmp >= ctxt->input->base)
11256 		    *lastgt = tmp;
11257 		else
11258 		    *lastgt = NULL;
11259 	    }
11260 	}
11261     } else {
11262         *lastlt = NULL;
11263 	*lastgt = NULL;
11264     }
11265 }
11266 /**
11267  * xmlCheckCdataPush:
11268  * @cur: pointer to the block of characters
11269  * @len: length of the block in bytes
11270  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11271  *
11272  * Check that the block of characters is okay as SCdata content [20]
11273  *
11274  * Returns the number of bytes to pass if okay, a negative index where an
11275  *         UTF-8 error occured otherwise
11276  */
11277 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11278 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11279     int ix;
11280     unsigned char c;
11281     int codepoint;
11282 
11283     if ((utf == NULL) || (len <= 0))
11284         return(0);
11285 
11286     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11287         c = utf[ix];
11288         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11289 	    if (c >= 0x20)
11290 		ix++;
11291 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11292 	        ix++;
11293 	    else
11294 	        return(-ix);
11295 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11296 	    if (ix + 2 > len) return(complete ? -ix : ix);
11297 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11298 	        return(-ix);
11299 	    codepoint = (utf[ix] & 0x1f) << 6;
11300 	    codepoint |= utf[ix+1] & 0x3f;
11301 	    if (!xmlIsCharQ(codepoint))
11302 	        return(-ix);
11303 	    ix += 2;
11304 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11305 	    if (ix + 3 > len) return(complete ? -ix : ix);
11306 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11307 	        ((utf[ix+2] & 0xc0) != 0x80))
11308 		    return(-ix);
11309 	    codepoint = (utf[ix] & 0xf) << 12;
11310 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11311 	    codepoint |= utf[ix+2] & 0x3f;
11312 	    if (!xmlIsCharQ(codepoint))
11313 	        return(-ix);
11314 	    ix += 3;
11315 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11316 	    if (ix + 4 > len) return(complete ? -ix : ix);
11317 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11318 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11319 		((utf[ix+3] & 0xc0) != 0x80))
11320 		    return(-ix);
11321 	    codepoint = (utf[ix] & 0x7) << 18;
11322 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11323 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11324 	    codepoint |= utf[ix+3] & 0x3f;
11325 	    if (!xmlIsCharQ(codepoint))
11326 	        return(-ix);
11327 	    ix += 4;
11328 	} else				/* unknown encoding */
11329 	    return(-ix);
11330       }
11331       return(ix);
11332 }
11333 
11334 /**
11335  * xmlParseTryOrFinish:
11336  * @ctxt:  an XML parser context
11337  * @terminate:  last chunk indicator
11338  *
11339  * Try to progress on parsing
11340  *
11341  * Returns zero if no parsing was possible
11342  */
11343 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11344 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11345     int ret = 0;
11346     int avail, tlen;
11347     xmlChar cur, next;
11348     const xmlChar *lastlt, *lastgt;
11349 
11350     if (ctxt->input == NULL)
11351         return(0);
11352 
11353 #ifdef DEBUG_PUSH
11354     switch (ctxt->instate) {
11355 	case XML_PARSER_EOF:
11356 	    xmlGenericError(xmlGenericErrorContext,
11357 		    "PP: try EOF\n"); break;
11358 	case XML_PARSER_START:
11359 	    xmlGenericError(xmlGenericErrorContext,
11360 		    "PP: try START\n"); break;
11361 	case XML_PARSER_MISC:
11362 	    xmlGenericError(xmlGenericErrorContext,
11363 		    "PP: try MISC\n");break;
11364 	case XML_PARSER_COMMENT:
11365 	    xmlGenericError(xmlGenericErrorContext,
11366 		    "PP: try COMMENT\n");break;
11367 	case XML_PARSER_PROLOG:
11368 	    xmlGenericError(xmlGenericErrorContext,
11369 		    "PP: try PROLOG\n");break;
11370 	case XML_PARSER_START_TAG:
11371 	    xmlGenericError(xmlGenericErrorContext,
11372 		    "PP: try START_TAG\n");break;
11373 	case XML_PARSER_CONTENT:
11374 	    xmlGenericError(xmlGenericErrorContext,
11375 		    "PP: try CONTENT\n");break;
11376 	case XML_PARSER_CDATA_SECTION:
11377 	    xmlGenericError(xmlGenericErrorContext,
11378 		    "PP: try CDATA_SECTION\n");break;
11379 	case XML_PARSER_END_TAG:
11380 	    xmlGenericError(xmlGenericErrorContext,
11381 		    "PP: try END_TAG\n");break;
11382 	case XML_PARSER_ENTITY_DECL:
11383 	    xmlGenericError(xmlGenericErrorContext,
11384 		    "PP: try ENTITY_DECL\n");break;
11385 	case XML_PARSER_ENTITY_VALUE:
11386 	    xmlGenericError(xmlGenericErrorContext,
11387 		    "PP: try ENTITY_VALUE\n");break;
11388 	case XML_PARSER_ATTRIBUTE_VALUE:
11389 	    xmlGenericError(xmlGenericErrorContext,
11390 		    "PP: try ATTRIBUTE_VALUE\n");break;
11391 	case XML_PARSER_DTD:
11392 	    xmlGenericError(xmlGenericErrorContext,
11393 		    "PP: try DTD\n");break;
11394 	case XML_PARSER_EPILOG:
11395 	    xmlGenericError(xmlGenericErrorContext,
11396 		    "PP: try EPILOG\n");break;
11397 	case XML_PARSER_PI:
11398 	    xmlGenericError(xmlGenericErrorContext,
11399 		    "PP: try PI\n");break;
11400         case XML_PARSER_IGNORE:
11401             xmlGenericError(xmlGenericErrorContext,
11402 		    "PP: try IGNORE\n");break;
11403     }
11404 #endif
11405 
11406     if ((ctxt->input != NULL) &&
11407         (ctxt->input->cur - ctxt->input->base > 4096)) {
11408 	xmlSHRINK(ctxt);
11409 	ctxt->checkIndex = 0;
11410     }
11411     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11412 
11413     while (ctxt->instate != XML_PARSER_EOF) {
11414 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11415 	    return(0);
11416 
11417 
11418 	/*
11419 	 * Pop-up of finished entities.
11420 	 */
11421 	while ((RAW == 0) && (ctxt->inputNr > 1))
11422 	    xmlPopInput(ctxt);
11423 
11424 	if (ctxt->input == NULL) break;
11425 	if (ctxt->input->buf == NULL)
11426 	    avail = ctxt->input->length -
11427 	            (ctxt->input->cur - ctxt->input->base);
11428 	else {
11429 	    /*
11430 	     * If we are operating on converted input, try to flush
11431 	     * remainng chars to avoid them stalling in the non-converted
11432 	     * buffer. But do not do this in document start where
11433 	     * encoding="..." may not have been read and we work on a
11434 	     * guessed encoding.
11435 	     */
11436 	    if ((ctxt->instate != XML_PARSER_START) &&
11437 	        (ctxt->input->buf->raw != NULL) &&
11438 		(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11439                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11440                                                  ctxt->input);
11441 		size_t current = ctxt->input->cur - ctxt->input->base;
11442 
11443 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11444                 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11445                                       base, current);
11446 	    }
11447 	    avail = xmlBufUse(ctxt->input->buf->buffer) -
11448 		    (ctxt->input->cur - ctxt->input->base);
11449 	}
11450         if (avail < 1)
11451 	    goto done;
11452         switch (ctxt->instate) {
11453             case XML_PARSER_EOF:
11454 	        /*
11455 		 * Document parsing is done !
11456 		 */
11457 	        goto done;
11458             case XML_PARSER_START:
11459 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11460 		    xmlChar start[4];
11461 		    xmlCharEncoding enc;
11462 
11463 		    /*
11464 		     * Very first chars read from the document flow.
11465 		     */
11466 		    if (avail < 4)
11467 			goto done;
11468 
11469 		    /*
11470 		     * Get the 4 first bytes and decode the charset
11471 		     * if enc != XML_CHAR_ENCODING_NONE
11472 		     * plug some encoding conversion routines,
11473 		     * else xmlSwitchEncoding will set to (default)
11474 		     * UTF8.
11475 		     */
11476 		    start[0] = RAW;
11477 		    start[1] = NXT(1);
11478 		    start[2] = NXT(2);
11479 		    start[3] = NXT(3);
11480 		    enc = xmlDetectCharEncoding(start, 4);
11481 		    xmlSwitchEncoding(ctxt, enc);
11482 		    break;
11483 		}
11484 
11485 		if (avail < 2)
11486 		    goto done;
11487 		cur = ctxt->input->cur[0];
11488 		next = ctxt->input->cur[1];
11489 		if (cur == 0) {
11490 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11491 			ctxt->sax->setDocumentLocator(ctxt->userData,
11492 						      &xmlDefaultSAXLocator);
11493 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11494 		    xmlHaltParser(ctxt);
11495 #ifdef DEBUG_PUSH
11496 		    xmlGenericError(xmlGenericErrorContext,
11497 			    "PP: entering EOF\n");
11498 #endif
11499 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11500 			ctxt->sax->endDocument(ctxt->userData);
11501 		    goto done;
11502 		}
11503 	        if ((cur == '<') && (next == '?')) {
11504 		    /* PI or XML decl */
11505 		    if (avail < 5) return(ret);
11506 		    if ((!terminate) &&
11507 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11508 			return(ret);
11509 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11510 			ctxt->sax->setDocumentLocator(ctxt->userData,
11511 						      &xmlDefaultSAXLocator);
11512 		    if ((ctxt->input->cur[2] == 'x') &&
11513 			(ctxt->input->cur[3] == 'm') &&
11514 			(ctxt->input->cur[4] == 'l') &&
11515 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11516 			ret += 5;
11517 #ifdef DEBUG_PUSH
11518 			xmlGenericError(xmlGenericErrorContext,
11519 				"PP: Parsing XML Decl\n");
11520 #endif
11521 			xmlParseXMLDecl(ctxt);
11522 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11523 			    /*
11524 			     * The XML REC instructs us to stop parsing right
11525 			     * here
11526 			     */
11527 			    xmlHaltParser(ctxt);
11528 			    return(0);
11529 			}
11530 			ctxt->standalone = ctxt->input->standalone;
11531 			if ((ctxt->encoding == NULL) &&
11532 			    (ctxt->input->encoding != NULL))
11533 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11534 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11535 			    (!ctxt->disableSAX))
11536 			    ctxt->sax->startDocument(ctxt->userData);
11537 			ctxt->instate = XML_PARSER_MISC;
11538 #ifdef DEBUG_PUSH
11539 			xmlGenericError(xmlGenericErrorContext,
11540 				"PP: entering MISC\n");
11541 #endif
11542 		    } else {
11543 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11544 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11545 			    (!ctxt->disableSAX))
11546 			    ctxt->sax->startDocument(ctxt->userData);
11547 			ctxt->instate = XML_PARSER_MISC;
11548 #ifdef DEBUG_PUSH
11549 			xmlGenericError(xmlGenericErrorContext,
11550 				"PP: entering MISC\n");
11551 #endif
11552 		    }
11553 		} else {
11554 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11555 			ctxt->sax->setDocumentLocator(ctxt->userData,
11556 						      &xmlDefaultSAXLocator);
11557 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11558 		    if (ctxt->version == NULL) {
11559 		        xmlErrMemory(ctxt, NULL);
11560 			break;
11561 		    }
11562 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11563 		        (!ctxt->disableSAX))
11564 			ctxt->sax->startDocument(ctxt->userData);
11565 		    ctxt->instate = XML_PARSER_MISC;
11566 #ifdef DEBUG_PUSH
11567 		    xmlGenericError(xmlGenericErrorContext,
11568 			    "PP: entering MISC\n");
11569 #endif
11570 		}
11571 		break;
11572             case XML_PARSER_START_TAG: {
11573 	        const xmlChar *name;
11574 		const xmlChar *prefix = NULL;
11575 		const xmlChar *URI = NULL;
11576 		int nsNr = ctxt->nsNr;
11577 
11578 		if ((avail < 2) && (ctxt->inputNr == 1))
11579 		    goto done;
11580 		cur = ctxt->input->cur[0];
11581 	        if (cur != '<') {
11582 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11583 		    xmlHaltParser(ctxt);
11584 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11585 			ctxt->sax->endDocument(ctxt->userData);
11586 		    goto done;
11587 		}
11588 		if (!terminate) {
11589 		    if (ctxt->progressive) {
11590 		        /* > can be found unescaped in attribute values */
11591 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11592 			    goto done;
11593 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11594 			goto done;
11595 		    }
11596 		}
11597 		if (ctxt->spaceNr == 0)
11598 		    spacePush(ctxt, -1);
11599 		else if (*ctxt->space == -2)
11600 		    spacePush(ctxt, -1);
11601 		else
11602 		    spacePush(ctxt, *ctxt->space);
11603 #ifdef LIBXML_SAX1_ENABLED
11604 		if (ctxt->sax2)
11605 #endif /* LIBXML_SAX1_ENABLED */
11606 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11607 #ifdef LIBXML_SAX1_ENABLED
11608 		else
11609 		    name = xmlParseStartTag(ctxt);
11610 #endif /* LIBXML_SAX1_ENABLED */
11611 		if (ctxt->instate == XML_PARSER_EOF)
11612 		    goto done;
11613 		if (name == NULL) {
11614 		    spacePop(ctxt);
11615 		    xmlHaltParser(ctxt);
11616 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11617 			ctxt->sax->endDocument(ctxt->userData);
11618 		    goto done;
11619 		}
11620 #ifdef LIBXML_VALID_ENABLED
11621 		/*
11622 		 * [ VC: Root Element Type ]
11623 		 * The Name in the document type declaration must match
11624 		 * the element type of the root element.
11625 		 */
11626 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11627 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11628 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11629 #endif /* LIBXML_VALID_ENABLED */
11630 
11631 		/*
11632 		 * Check for an Empty Element.
11633 		 */
11634 		if ((RAW == '/') && (NXT(1) == '>')) {
11635 		    SKIP(2);
11636 
11637 		    if (ctxt->sax2) {
11638 			if ((ctxt->sax != NULL) &&
11639 			    (ctxt->sax->endElementNs != NULL) &&
11640 			    (!ctxt->disableSAX))
11641 			    ctxt->sax->endElementNs(ctxt->userData, name,
11642 			                            prefix, URI);
11643 			if (ctxt->nsNr - nsNr > 0)
11644 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11645 #ifdef LIBXML_SAX1_ENABLED
11646 		    } else {
11647 			if ((ctxt->sax != NULL) &&
11648 			    (ctxt->sax->endElement != NULL) &&
11649 			    (!ctxt->disableSAX))
11650 			    ctxt->sax->endElement(ctxt->userData, name);
11651 #endif /* LIBXML_SAX1_ENABLED */
11652 		    }
11653 		    if (ctxt->instate == XML_PARSER_EOF)
11654 			goto done;
11655 		    spacePop(ctxt);
11656 		    if (ctxt->nameNr == 0) {
11657 			ctxt->instate = XML_PARSER_EPILOG;
11658 		    } else {
11659 			ctxt->instate = XML_PARSER_CONTENT;
11660 		    }
11661                     ctxt->progressive = 1;
11662 		    break;
11663 		}
11664 		if (RAW == '>') {
11665 		    NEXT;
11666 		} else {
11667 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11668 					 "Couldn't find end of Start Tag %s\n",
11669 					 name);
11670 		    nodePop(ctxt);
11671 		    spacePop(ctxt);
11672 		}
11673 		if (ctxt->sax2)
11674 		    nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11675 #ifdef LIBXML_SAX1_ENABLED
11676 		else
11677 		    namePush(ctxt, name);
11678 #endif /* LIBXML_SAX1_ENABLED */
11679 
11680 		ctxt->instate = XML_PARSER_CONTENT;
11681                 ctxt->progressive = 1;
11682                 break;
11683 	    }
11684             case XML_PARSER_CONTENT: {
11685 		const xmlChar *test;
11686 		unsigned int cons;
11687 		if ((avail < 2) && (ctxt->inputNr == 1))
11688 		    goto done;
11689 		cur = ctxt->input->cur[0];
11690 		next = ctxt->input->cur[1];
11691 
11692 		test = CUR_PTR;
11693 	        cons = ctxt->input->consumed;
11694 		if ((cur == '<') && (next == '/')) {
11695 		    ctxt->instate = XML_PARSER_END_TAG;
11696 		    break;
11697 	        } else if ((cur == '<') && (next == '?')) {
11698 		    if ((!terminate) &&
11699 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11700                         ctxt->progressive = XML_PARSER_PI;
11701 			goto done;
11702                     }
11703 		    xmlParsePI(ctxt);
11704 		    ctxt->instate = XML_PARSER_CONTENT;
11705                     ctxt->progressive = 1;
11706 		} else if ((cur == '<') && (next != '!')) {
11707 		    ctxt->instate = XML_PARSER_START_TAG;
11708 		    break;
11709 		} else if ((cur == '<') && (next == '!') &&
11710 		           (ctxt->input->cur[2] == '-') &&
11711 			   (ctxt->input->cur[3] == '-')) {
11712 		    int term;
11713 
11714 	            if (avail < 4)
11715 		        goto done;
11716 		    ctxt->input->cur += 4;
11717 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11718 		    ctxt->input->cur -= 4;
11719 		    if ((!terminate) && (term < 0)) {
11720                         ctxt->progressive = XML_PARSER_COMMENT;
11721 			goto done;
11722                     }
11723 		    xmlParseComment(ctxt);
11724 		    ctxt->instate = XML_PARSER_CONTENT;
11725                     ctxt->progressive = 1;
11726 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11727 		    (ctxt->input->cur[2] == '[') &&
11728 		    (ctxt->input->cur[3] == 'C') &&
11729 		    (ctxt->input->cur[4] == 'D') &&
11730 		    (ctxt->input->cur[5] == 'A') &&
11731 		    (ctxt->input->cur[6] == 'T') &&
11732 		    (ctxt->input->cur[7] == 'A') &&
11733 		    (ctxt->input->cur[8] == '[')) {
11734 		    SKIP(9);
11735 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11736 		    break;
11737 		} else if ((cur == '<') && (next == '!') &&
11738 		           (avail < 9)) {
11739 		    goto done;
11740 		} else if (cur == '&') {
11741 		    if ((!terminate) &&
11742 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11743 			goto done;
11744 		    xmlParseReference(ctxt);
11745 		} else {
11746 		    /* TODO Avoid the extra copy, handle directly !!! */
11747 		    /*
11748 		     * Goal of the following test is:
11749 		     *  - minimize calls to the SAX 'character' callback
11750 		     *    when they are mergeable
11751 		     *  - handle an problem for isBlank when we only parse
11752 		     *    a sequence of blank chars and the next one is
11753 		     *    not available to check against '<' presence.
11754 		     *  - tries to homogenize the differences in SAX
11755 		     *    callbacks between the push and pull versions
11756 		     *    of the parser.
11757 		     */
11758 		    if ((ctxt->inputNr == 1) &&
11759 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11760 			if (!terminate) {
11761 			    if (ctxt->progressive) {
11762 				if ((lastlt == NULL) ||
11763 				    (ctxt->input->cur > lastlt))
11764 				    goto done;
11765 			    } else if (xmlParseLookupSequence(ctxt,
11766 			                                      '<', 0, 0) < 0) {
11767 				goto done;
11768 			    }
11769 			}
11770                     }
11771 		    ctxt->checkIndex = 0;
11772 		    xmlParseCharData(ctxt, 0);
11773 		}
11774 		/*
11775 		 * Pop-up of finished entities.
11776 		 */
11777 		while ((RAW == 0) && (ctxt->inputNr > 1))
11778 		    xmlPopInput(ctxt);
11779 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11780 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11781 		                "detected an error in element content\n");
11782 		    xmlHaltParser(ctxt);
11783 		    break;
11784 		}
11785 		break;
11786 	    }
11787             case XML_PARSER_END_TAG:
11788 		if (avail < 2)
11789 		    goto done;
11790 		if (!terminate) {
11791 		    if (ctxt->progressive) {
11792 		        /* > can be found unescaped in attribute values */
11793 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11794 			    goto done;
11795 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11796 			goto done;
11797 		    }
11798 		}
11799 		if (ctxt->sax2) {
11800 		    xmlParseEndTag2(ctxt,
11801 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11802 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11803 		       (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11804 		    nameNsPop(ctxt);
11805 		}
11806 #ifdef LIBXML_SAX1_ENABLED
11807 		  else
11808 		    xmlParseEndTag1(ctxt, 0);
11809 #endif /* LIBXML_SAX1_ENABLED */
11810 		if (ctxt->instate == XML_PARSER_EOF) {
11811 		    /* Nothing */
11812 		} else if (ctxt->nameNr == 0) {
11813 		    ctxt->instate = XML_PARSER_EPILOG;
11814 		} else {
11815 		    ctxt->instate = XML_PARSER_CONTENT;
11816 		}
11817 		break;
11818             case XML_PARSER_CDATA_SECTION: {
11819 	        /*
11820 		 * The Push mode need to have the SAX callback for
11821 		 * cdataBlock merge back contiguous callbacks.
11822 		 */
11823 		int base;
11824 
11825 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11826 		if (base < 0) {
11827 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11828 		        int tmp;
11829 
11830 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11831 			                        XML_PARSER_BIG_BUFFER_SIZE, 0);
11832 			if (tmp < 0) {
11833 			    tmp = -tmp;
11834 			    ctxt->input->cur += tmp;
11835 			    goto encoding_error;
11836 			}
11837 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11838 			    if (ctxt->sax->cdataBlock != NULL)
11839 				ctxt->sax->cdataBlock(ctxt->userData,
11840 				                      ctxt->input->cur, tmp);
11841 			    else if (ctxt->sax->characters != NULL)
11842 				ctxt->sax->characters(ctxt->userData,
11843 				                      ctxt->input->cur, tmp);
11844 			}
11845 			if (ctxt->instate == XML_PARSER_EOF)
11846 			    goto done;
11847 			SKIPL(tmp);
11848 			ctxt->checkIndex = 0;
11849 		    }
11850 		    goto done;
11851 		} else {
11852 		    int tmp;
11853 
11854 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11855 		    if ((tmp < 0) || (tmp != base)) {
11856 			tmp = -tmp;
11857 			ctxt->input->cur += tmp;
11858 			goto encoding_error;
11859 		    }
11860 		    if ((ctxt->sax != NULL) && (base == 0) &&
11861 		        (ctxt->sax->cdataBlock != NULL) &&
11862 		        (!ctxt->disableSAX)) {
11863 			/*
11864 			 * Special case to provide identical behaviour
11865 			 * between pull and push parsers on enpty CDATA
11866 			 * sections
11867 			 */
11868 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11869 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11870 			               "<![CDATA[", 9)))
11871 			     ctxt->sax->cdataBlock(ctxt->userData,
11872 			                           BAD_CAST "", 0);
11873 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11874 			(!ctxt->disableSAX)) {
11875 			if (ctxt->sax->cdataBlock != NULL)
11876 			    ctxt->sax->cdataBlock(ctxt->userData,
11877 						  ctxt->input->cur, base);
11878 			else if (ctxt->sax->characters != NULL)
11879 			    ctxt->sax->characters(ctxt->userData,
11880 						  ctxt->input->cur, base);
11881 		    }
11882 		    if (ctxt->instate == XML_PARSER_EOF)
11883 			goto done;
11884 		    SKIPL(base + 3);
11885 		    ctxt->checkIndex = 0;
11886 		    ctxt->instate = XML_PARSER_CONTENT;
11887 #ifdef DEBUG_PUSH
11888 		    xmlGenericError(xmlGenericErrorContext,
11889 			    "PP: entering CONTENT\n");
11890 #endif
11891 		}
11892 		break;
11893 	    }
11894             case XML_PARSER_MISC:
11895 		SKIP_BLANKS;
11896 		if (ctxt->input->buf == NULL)
11897 		    avail = ctxt->input->length -
11898 		            (ctxt->input->cur - ctxt->input->base);
11899 		else
11900 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11901 		            (ctxt->input->cur - ctxt->input->base);
11902 		if (avail < 2)
11903 		    goto done;
11904 		cur = ctxt->input->cur[0];
11905 		next = ctxt->input->cur[1];
11906 	        if ((cur == '<') && (next == '?')) {
11907 		    if ((!terminate) &&
11908 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11909                         ctxt->progressive = XML_PARSER_PI;
11910 			goto done;
11911                     }
11912 #ifdef DEBUG_PUSH
11913 		    xmlGenericError(xmlGenericErrorContext,
11914 			    "PP: Parsing PI\n");
11915 #endif
11916 		    xmlParsePI(ctxt);
11917 		    if (ctxt->instate == XML_PARSER_EOF)
11918 			goto done;
11919 		    ctxt->instate = XML_PARSER_MISC;
11920                     ctxt->progressive = 1;
11921 		    ctxt->checkIndex = 0;
11922 		} else if ((cur == '<') && (next == '!') &&
11923 		    (ctxt->input->cur[2] == '-') &&
11924 		    (ctxt->input->cur[3] == '-')) {
11925 		    if ((!terminate) &&
11926 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11927                         ctxt->progressive = XML_PARSER_COMMENT;
11928 			goto done;
11929                     }
11930 #ifdef DEBUG_PUSH
11931 		    xmlGenericError(xmlGenericErrorContext,
11932 			    "PP: Parsing Comment\n");
11933 #endif
11934 		    xmlParseComment(ctxt);
11935 		    if (ctxt->instate == XML_PARSER_EOF)
11936 			goto done;
11937 		    ctxt->instate = XML_PARSER_MISC;
11938                     ctxt->progressive = 1;
11939 		    ctxt->checkIndex = 0;
11940 		} else if ((cur == '<') && (next == '!') &&
11941 		    (ctxt->input->cur[2] == 'D') &&
11942 		    (ctxt->input->cur[3] == 'O') &&
11943 		    (ctxt->input->cur[4] == 'C') &&
11944 		    (ctxt->input->cur[5] == 'T') &&
11945 		    (ctxt->input->cur[6] == 'Y') &&
11946 		    (ctxt->input->cur[7] == 'P') &&
11947 		    (ctxt->input->cur[8] == 'E')) {
11948 		    if ((!terminate) &&
11949 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11950                         ctxt->progressive = XML_PARSER_DTD;
11951 			goto done;
11952                     }
11953 #ifdef DEBUG_PUSH
11954 		    xmlGenericError(xmlGenericErrorContext,
11955 			    "PP: Parsing internal subset\n");
11956 #endif
11957 		    ctxt->inSubset = 1;
11958                     ctxt->progressive = 0;
11959 		    ctxt->checkIndex = 0;
11960 		    xmlParseDocTypeDecl(ctxt);
11961 		    if (ctxt->instate == XML_PARSER_EOF)
11962 			goto done;
11963 		    if (RAW == '[') {
11964 			ctxt->instate = XML_PARSER_DTD;
11965 #ifdef DEBUG_PUSH
11966 			xmlGenericError(xmlGenericErrorContext,
11967 				"PP: entering DTD\n");
11968 #endif
11969 		    } else {
11970 			/*
11971 			 * Create and update the external subset.
11972 			 */
11973 			ctxt->inSubset = 2;
11974 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11975 			    (ctxt->sax->externalSubset != NULL))
11976 			    ctxt->sax->externalSubset(ctxt->userData,
11977 				    ctxt->intSubName, ctxt->extSubSystem,
11978 				    ctxt->extSubURI);
11979 			ctxt->inSubset = 0;
11980 			xmlCleanSpecialAttr(ctxt);
11981 			ctxt->instate = XML_PARSER_PROLOG;
11982 #ifdef DEBUG_PUSH
11983 			xmlGenericError(xmlGenericErrorContext,
11984 				"PP: entering PROLOG\n");
11985 #endif
11986 		    }
11987 		} else if ((cur == '<') && (next == '!') &&
11988 		           (avail < 9)) {
11989 		    goto done;
11990 		} else {
11991 		    ctxt->instate = XML_PARSER_START_TAG;
11992 		    ctxt->progressive = XML_PARSER_START_TAG;
11993 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11994 #ifdef DEBUG_PUSH
11995 		    xmlGenericError(xmlGenericErrorContext,
11996 			    "PP: entering START_TAG\n");
11997 #endif
11998 		}
11999 		break;
12000             case XML_PARSER_PROLOG:
12001 		SKIP_BLANKS;
12002 		if (ctxt->input->buf == NULL)
12003 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12004 		else
12005 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
12006                             (ctxt->input->cur - ctxt->input->base);
12007 		if (avail < 2)
12008 		    goto done;
12009 		cur = ctxt->input->cur[0];
12010 		next = ctxt->input->cur[1];
12011 	        if ((cur == '<') && (next == '?')) {
12012 		    if ((!terminate) &&
12013 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12014                         ctxt->progressive = XML_PARSER_PI;
12015 			goto done;
12016                     }
12017 #ifdef DEBUG_PUSH
12018 		    xmlGenericError(xmlGenericErrorContext,
12019 			    "PP: Parsing PI\n");
12020 #endif
12021 		    xmlParsePI(ctxt);
12022 		    if (ctxt->instate == XML_PARSER_EOF)
12023 			goto done;
12024 		    ctxt->instate = XML_PARSER_PROLOG;
12025                     ctxt->progressive = 1;
12026 		} else if ((cur == '<') && (next == '!') &&
12027 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12028 		    if ((!terminate) &&
12029 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12030                         ctxt->progressive = XML_PARSER_COMMENT;
12031 			goto done;
12032                     }
12033 #ifdef DEBUG_PUSH
12034 		    xmlGenericError(xmlGenericErrorContext,
12035 			    "PP: Parsing Comment\n");
12036 #endif
12037 		    xmlParseComment(ctxt);
12038 		    if (ctxt->instate == XML_PARSER_EOF)
12039 			goto done;
12040 		    ctxt->instate = XML_PARSER_PROLOG;
12041                     ctxt->progressive = 1;
12042 		} else if ((cur == '<') && (next == '!') &&
12043 		           (avail < 4)) {
12044 		    goto done;
12045 		} else {
12046 		    ctxt->instate = XML_PARSER_START_TAG;
12047 		    if (ctxt->progressive == 0)
12048 			ctxt->progressive = XML_PARSER_START_TAG;
12049 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
12050 #ifdef DEBUG_PUSH
12051 		    xmlGenericError(xmlGenericErrorContext,
12052 			    "PP: entering START_TAG\n");
12053 #endif
12054 		}
12055 		break;
12056             case XML_PARSER_EPILOG:
12057 		SKIP_BLANKS;
12058 		if (ctxt->input->buf == NULL)
12059 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12060 		else
12061 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
12062                             (ctxt->input->cur - ctxt->input->base);
12063 		if (avail < 2)
12064 		    goto done;
12065 		cur = ctxt->input->cur[0];
12066 		next = ctxt->input->cur[1];
12067 	        if ((cur == '<') && (next == '?')) {
12068 		    if ((!terminate) &&
12069 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12070                         ctxt->progressive = XML_PARSER_PI;
12071 			goto done;
12072                     }
12073 #ifdef DEBUG_PUSH
12074 		    xmlGenericError(xmlGenericErrorContext,
12075 			    "PP: Parsing PI\n");
12076 #endif
12077 		    xmlParsePI(ctxt);
12078 		    if (ctxt->instate == XML_PARSER_EOF)
12079 			goto done;
12080 		    ctxt->instate = XML_PARSER_EPILOG;
12081                     ctxt->progressive = 1;
12082 		} else if ((cur == '<') && (next == '!') &&
12083 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12084 		    if ((!terminate) &&
12085 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12086                         ctxt->progressive = XML_PARSER_COMMENT;
12087 			goto done;
12088                     }
12089 #ifdef DEBUG_PUSH
12090 		    xmlGenericError(xmlGenericErrorContext,
12091 			    "PP: Parsing Comment\n");
12092 #endif
12093 		    xmlParseComment(ctxt);
12094 		    if (ctxt->instate == XML_PARSER_EOF)
12095 			goto done;
12096 		    ctxt->instate = XML_PARSER_EPILOG;
12097                     ctxt->progressive = 1;
12098 		} else if ((cur == '<') && (next == '!') &&
12099 		           (avail < 4)) {
12100 		    goto done;
12101 		} else {
12102 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12103 		    xmlHaltParser(ctxt);
12104 #ifdef DEBUG_PUSH
12105 		    xmlGenericError(xmlGenericErrorContext,
12106 			    "PP: entering EOF\n");
12107 #endif
12108 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12109 			ctxt->sax->endDocument(ctxt->userData);
12110 		    goto done;
12111 		}
12112 		break;
12113             case XML_PARSER_DTD: {
12114 	        /*
12115 		 * Sorry but progressive parsing of the internal subset
12116 		 * is not expected to be supported. We first check that
12117 		 * the full content of the internal subset is available and
12118 		 * the parsing is launched only at that point.
12119 		 * Internal subset ends up with "']' S? '>'" in an unescaped
12120 		 * section and not in a ']]>' sequence which are conditional
12121 		 * sections (whoever argued to keep that crap in XML deserve
12122 		 * a place in hell !).
12123 		 */
12124 		int base, i;
12125 		xmlChar *buf;
12126 	        xmlChar quote = 0;
12127                 size_t use;
12128 
12129 		base = ctxt->input->cur - ctxt->input->base;
12130 		if (base < 0) return(0);
12131 		if (ctxt->checkIndex > base)
12132 		    base = ctxt->checkIndex;
12133 		buf = xmlBufContent(ctxt->input->buf->buffer);
12134                 use = xmlBufUse(ctxt->input->buf->buffer);
12135 		for (;(unsigned int) base < use; base++) {
12136 		    if (quote != 0) {
12137 		        if (buf[base] == quote)
12138 			    quote = 0;
12139 			continue;
12140 		    }
12141 		    if ((quote == 0) && (buf[base] == '<')) {
12142 		        int found  = 0;
12143 			/* special handling of comments */
12144 		        if (((unsigned int) base + 4 < use) &&
12145 			    (buf[base + 1] == '!') &&
12146 			    (buf[base + 2] == '-') &&
12147 			    (buf[base + 3] == '-')) {
12148 			    for (;(unsigned int) base + 3 < use; base++) {
12149 				if ((buf[base] == '-') &&
12150 				    (buf[base + 1] == '-') &&
12151 				    (buf[base + 2] == '>')) {
12152 				    found = 1;
12153 				    base += 2;
12154 				    break;
12155 				}
12156 		            }
12157 			    if (!found) {
12158 #if 0
12159 			        fprintf(stderr, "unfinished comment\n");
12160 #endif
12161 			        break; /* for */
12162 		            }
12163 		            continue;
12164 			}
12165 		    }
12166 		    if (buf[base] == '"') {
12167 		        quote = '"';
12168 			continue;
12169 		    }
12170 		    if (buf[base] == '\'') {
12171 		        quote = '\'';
12172 			continue;
12173 		    }
12174 		    if (buf[base] == ']') {
12175 #if 0
12176 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
12177 			        buf[base + 1], buf[base + 2], buf[base + 3]);
12178 #endif
12179 		        if ((unsigned int) base +1 >= use)
12180 			    break;
12181 			if (buf[base + 1] == ']') {
12182 			    /* conditional crap, skip both ']' ! */
12183 			    base++;
12184 			    continue;
12185 			}
12186 		        for (i = 1; (unsigned int) base + i < use; i++) {
12187 			    if (buf[base + i] == '>') {
12188 #if 0
12189 			        fprintf(stderr, "found\n");
12190 #endif
12191 			        goto found_end_int_subset;
12192 			    }
12193 			    if (!IS_BLANK_CH(buf[base + i])) {
12194 #if 0
12195 			        fprintf(stderr, "not found\n");
12196 #endif
12197 			        goto not_end_of_int_subset;
12198 			    }
12199 			}
12200 #if 0
12201 			fprintf(stderr, "end of stream\n");
12202 #endif
12203 		        break;
12204 
12205 		    }
12206 not_end_of_int_subset:
12207                     continue; /* for */
12208 		}
12209 		/*
12210 		 * We didn't found the end of the Internal subset
12211 		 */
12212                 if (quote == 0)
12213                     ctxt->checkIndex = base;
12214                 else
12215                     ctxt->checkIndex = 0;
12216 #ifdef DEBUG_PUSH
12217 		if (next == 0)
12218 		    xmlGenericError(xmlGenericErrorContext,
12219 			    "PP: lookup of int subset end filed\n");
12220 #endif
12221 	        goto done;
12222 
12223 found_end_int_subset:
12224                 ctxt->checkIndex = 0;
12225 		xmlParseInternalSubset(ctxt);
12226 		if (ctxt->instate == XML_PARSER_EOF)
12227 		    goto done;
12228 		ctxt->inSubset = 2;
12229 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12230 		    (ctxt->sax->externalSubset != NULL))
12231 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12232 			    ctxt->extSubSystem, ctxt->extSubURI);
12233 		ctxt->inSubset = 0;
12234 		xmlCleanSpecialAttr(ctxt);
12235 		if (ctxt->instate == XML_PARSER_EOF)
12236 		    goto done;
12237 		ctxt->instate = XML_PARSER_PROLOG;
12238 		ctxt->checkIndex = 0;
12239 #ifdef DEBUG_PUSH
12240 		xmlGenericError(xmlGenericErrorContext,
12241 			"PP: entering PROLOG\n");
12242 #endif
12243                 break;
12244 	    }
12245             case XML_PARSER_COMMENT:
12246 		xmlGenericError(xmlGenericErrorContext,
12247 			"PP: internal error, state == COMMENT\n");
12248 		ctxt->instate = XML_PARSER_CONTENT;
12249 #ifdef DEBUG_PUSH
12250 		xmlGenericError(xmlGenericErrorContext,
12251 			"PP: entering CONTENT\n");
12252 #endif
12253 		break;
12254             case XML_PARSER_IGNORE:
12255 		xmlGenericError(xmlGenericErrorContext,
12256 			"PP: internal error, state == IGNORE");
12257 	        ctxt->instate = XML_PARSER_DTD;
12258 #ifdef DEBUG_PUSH
12259 		xmlGenericError(xmlGenericErrorContext,
12260 			"PP: entering DTD\n");
12261 #endif
12262 	        break;
12263             case XML_PARSER_PI:
12264 		xmlGenericError(xmlGenericErrorContext,
12265 			"PP: internal error, state == PI\n");
12266 		ctxt->instate = XML_PARSER_CONTENT;
12267 #ifdef DEBUG_PUSH
12268 		xmlGenericError(xmlGenericErrorContext,
12269 			"PP: entering CONTENT\n");
12270 #endif
12271 		break;
12272             case XML_PARSER_ENTITY_DECL:
12273 		xmlGenericError(xmlGenericErrorContext,
12274 			"PP: internal error, state == ENTITY_DECL\n");
12275 		ctxt->instate = XML_PARSER_DTD;
12276 #ifdef DEBUG_PUSH
12277 		xmlGenericError(xmlGenericErrorContext,
12278 			"PP: entering DTD\n");
12279 #endif
12280 		break;
12281             case XML_PARSER_ENTITY_VALUE:
12282 		xmlGenericError(xmlGenericErrorContext,
12283 			"PP: internal error, state == ENTITY_VALUE\n");
12284 		ctxt->instate = XML_PARSER_CONTENT;
12285 #ifdef DEBUG_PUSH
12286 		xmlGenericError(xmlGenericErrorContext,
12287 			"PP: entering DTD\n");
12288 #endif
12289 		break;
12290             case XML_PARSER_ATTRIBUTE_VALUE:
12291 		xmlGenericError(xmlGenericErrorContext,
12292 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
12293 		ctxt->instate = XML_PARSER_START_TAG;
12294 #ifdef DEBUG_PUSH
12295 		xmlGenericError(xmlGenericErrorContext,
12296 			"PP: entering START_TAG\n");
12297 #endif
12298 		break;
12299             case XML_PARSER_SYSTEM_LITERAL:
12300 		xmlGenericError(xmlGenericErrorContext,
12301 			"PP: internal error, state == SYSTEM_LITERAL\n");
12302 		ctxt->instate = XML_PARSER_START_TAG;
12303 #ifdef DEBUG_PUSH
12304 		xmlGenericError(xmlGenericErrorContext,
12305 			"PP: entering START_TAG\n");
12306 #endif
12307 		break;
12308             case XML_PARSER_PUBLIC_LITERAL:
12309 		xmlGenericError(xmlGenericErrorContext,
12310 			"PP: internal error, state == PUBLIC_LITERAL\n");
12311 		ctxt->instate = XML_PARSER_START_TAG;
12312 #ifdef DEBUG_PUSH
12313 		xmlGenericError(xmlGenericErrorContext,
12314 			"PP: entering START_TAG\n");
12315 #endif
12316 		break;
12317 	}
12318     }
12319 done:
12320 #ifdef DEBUG_PUSH
12321     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12322 #endif
12323     return(ret);
12324 encoding_error:
12325     {
12326         char buffer[150];
12327 
12328 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12329 			ctxt->input->cur[0], ctxt->input->cur[1],
12330 			ctxt->input->cur[2], ctxt->input->cur[3]);
12331 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12332 		     "Input is not proper UTF-8, indicate encoding !\n%s",
12333 		     BAD_CAST buffer, NULL);
12334     }
12335     return(0);
12336 }
12337 
12338 /**
12339  * xmlParseCheckTransition:
12340  * @ctxt:  an XML parser context
12341  * @chunk:  a char array
12342  * @size:  the size in byte of the chunk
12343  *
12344  * Check depending on the current parser state if the chunk given must be
12345  * processed immediately or one need more data to advance on parsing.
12346  *
12347  * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12348  */
12349 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12350 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12351     if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12352         return(-1);
12353     if (ctxt->instate == XML_PARSER_START_TAG) {
12354         if (memchr(chunk, '>', size) != NULL)
12355             return(1);
12356         return(0);
12357     }
12358     if (ctxt->progressive == XML_PARSER_COMMENT) {
12359         if (memchr(chunk, '>', size) != NULL)
12360             return(1);
12361         return(0);
12362     }
12363     if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12364         if (memchr(chunk, '>', size) != NULL)
12365             return(1);
12366         return(0);
12367     }
12368     if (ctxt->progressive == XML_PARSER_PI) {
12369         if (memchr(chunk, '>', size) != NULL)
12370             return(1);
12371         return(0);
12372     }
12373     if (ctxt->instate == XML_PARSER_END_TAG) {
12374         if (memchr(chunk, '>', size) != NULL)
12375             return(1);
12376         return(0);
12377     }
12378     if ((ctxt->progressive == XML_PARSER_DTD) ||
12379         (ctxt->instate == XML_PARSER_DTD)) {
12380         if (memchr(chunk, '>', size) != NULL)
12381             return(1);
12382         return(0);
12383     }
12384     return(1);
12385 }
12386 
12387 /**
12388  * xmlParseChunk:
12389  * @ctxt:  an XML parser context
12390  * @chunk:  an char array
12391  * @size:  the size in byte of the chunk
12392  * @terminate:  last chunk indicator
12393  *
12394  * Parse a Chunk of memory
12395  *
12396  * Returns zero if no error, the xmlParserErrors otherwise.
12397  */
12398 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12399 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12400               int terminate) {
12401     int end_in_lf = 0;
12402     int remain = 0;
12403     size_t old_avail = 0;
12404     size_t avail = 0;
12405 
12406     if (ctxt == NULL)
12407         return(XML_ERR_INTERNAL_ERROR);
12408     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12409         return(ctxt->errNo);
12410     if (ctxt->instate == XML_PARSER_EOF)
12411         return(-1);
12412     if (ctxt->instate == XML_PARSER_START)
12413         xmlDetectSAX2(ctxt);
12414     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12415         (chunk[size - 1] == '\r')) {
12416 	end_in_lf = 1;
12417 	size--;
12418     }
12419 
12420 xmldecl_done:
12421 
12422     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12423         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12424 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12425 	size_t cur = ctxt->input->cur - ctxt->input->base;
12426 	int res;
12427 
12428         old_avail = xmlBufUse(ctxt->input->buf->buffer);
12429         /*
12430          * Specific handling if we autodetected an encoding, we should not
12431          * push more than the first line ... which depend on the encoding
12432          * And only push the rest once the final encoding was detected
12433          */
12434         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12435             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12436             unsigned int len = 45;
12437 
12438             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12439                                BAD_CAST "UTF-16")) ||
12440                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12441                                BAD_CAST "UTF16")))
12442                 len = 90;
12443             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12444                                     BAD_CAST "UCS-4")) ||
12445                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12446                                     BAD_CAST "UCS4")))
12447                 len = 180;
12448 
12449             if (ctxt->input->buf->rawconsumed < len)
12450                 len -= ctxt->input->buf->rawconsumed;
12451 
12452             /*
12453              * Change size for reading the initial declaration only
12454              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12455              * will blindly copy extra bytes from memory.
12456              */
12457             if ((unsigned int) size > len) {
12458                 remain = size - len;
12459                 size = len;
12460             } else {
12461                 remain = 0;
12462             }
12463         }
12464 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12465 	if (res < 0) {
12466 	    ctxt->errNo = XML_PARSER_EOF;
12467 	    xmlHaltParser(ctxt);
12468 	    return (XML_PARSER_EOF);
12469 	}
12470         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12471 #ifdef DEBUG_PUSH
12472 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12473 #endif
12474 
12475     } else if (ctxt->instate != XML_PARSER_EOF) {
12476 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12477 	    xmlParserInputBufferPtr in = ctxt->input->buf;
12478 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
12479 		    (in->raw != NULL)) {
12480 		int nbchars;
12481 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12482 		size_t current = ctxt->input->cur - ctxt->input->base;
12483 
12484 		nbchars = xmlCharEncInput(in, terminate);
12485 		if (nbchars < 0) {
12486 		    /* TODO 2.6.0 */
12487 		    xmlGenericError(xmlGenericErrorContext,
12488 				    "xmlParseChunk: encoder error\n");
12489 		    return(XML_ERR_INVALID_ENCODING);
12490 		}
12491 		xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12492 	    }
12493 	}
12494     }
12495     if (remain != 0) {
12496         xmlParseTryOrFinish(ctxt, 0);
12497     } else {
12498         if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12499             avail = xmlBufUse(ctxt->input->buf->buffer);
12500         /*
12501          * Depending on the current state it may not be such
12502          * a good idea to try parsing if there is nothing in the chunk
12503          * which would be worth doing a parser state transition and we
12504          * need to wait for more data
12505          */
12506         if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12507             (old_avail == 0) || (avail == 0) ||
12508             (xmlParseCheckTransition(ctxt,
12509                        (const char *)&ctxt->input->base[old_avail],
12510                                      avail - old_avail)))
12511             xmlParseTryOrFinish(ctxt, terminate);
12512     }
12513     if (ctxt->instate == XML_PARSER_EOF)
12514         return(ctxt->errNo);
12515 
12516     if ((ctxt->input != NULL) &&
12517          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12518          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12519         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12520         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12521         xmlHaltParser(ctxt);
12522     }
12523     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12524         return(ctxt->errNo);
12525 
12526     if (remain != 0) {
12527         chunk += size;
12528         size = remain;
12529         remain = 0;
12530         goto xmldecl_done;
12531     }
12532     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12533         (ctxt->input->buf != NULL)) {
12534 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12535 					 ctxt->input);
12536 	size_t current = ctxt->input->cur - ctxt->input->base;
12537 
12538 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12539 
12540 	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12541 			      base, current);
12542     }
12543     if (terminate) {
12544 	/*
12545 	 * Check for termination
12546 	 */
12547 	int cur_avail = 0;
12548 
12549 	if (ctxt->input != NULL) {
12550 	    if (ctxt->input->buf == NULL)
12551 		cur_avail = ctxt->input->length -
12552 			    (ctxt->input->cur - ctxt->input->base);
12553 	    else
12554 		cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12555 			              (ctxt->input->cur - ctxt->input->base);
12556 	}
12557 
12558 	if ((ctxt->instate != XML_PARSER_EOF) &&
12559 	    (ctxt->instate != XML_PARSER_EPILOG)) {
12560 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12561 	}
12562 	if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12563 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12564 	}
12565 	if (ctxt->instate != XML_PARSER_EOF) {
12566 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12567 		ctxt->sax->endDocument(ctxt->userData);
12568 	}
12569 	ctxt->instate = XML_PARSER_EOF;
12570     }
12571     if (ctxt->wellFormed == 0)
12572 	return((xmlParserErrors) ctxt->errNo);
12573     else
12574         return(0);
12575 }
12576 
12577 /************************************************************************
12578  *									*
12579  *		I/O front end functions to the parser			*
12580  *									*
12581  ************************************************************************/
12582 
12583 /**
12584  * xmlCreatePushParserCtxt:
12585  * @sax:  a SAX handler
12586  * @user_data:  The user data returned on SAX callbacks
12587  * @chunk:  a pointer to an array of chars
12588  * @size:  number of chars in the array
12589  * @filename:  an optional file name or URI
12590  *
12591  * Create a parser context for using the XML parser in push mode.
12592  * If @buffer and @size are non-NULL, the data is used to detect
12593  * the encoding.  The remaining characters will be parsed so they
12594  * don't need to be fed in again through xmlParseChunk.
12595  * To allow content encoding detection, @size should be >= 4
12596  * The value of @filename is used for fetching external entities
12597  * and error/warning reports.
12598  *
12599  * Returns the new parser context or NULL
12600  */
12601 
12602 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12603 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12604                         const char *chunk, int size, const char *filename) {
12605     xmlParserCtxtPtr ctxt;
12606     xmlParserInputPtr inputStream;
12607     xmlParserInputBufferPtr buf;
12608     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12609 
12610     /*
12611      * plug some encoding conversion routines
12612      */
12613     if ((chunk != NULL) && (size >= 4))
12614 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12615 
12616     buf = xmlAllocParserInputBuffer(enc);
12617     if (buf == NULL) return(NULL);
12618 
12619     ctxt = xmlNewParserCtxt();
12620     if (ctxt == NULL) {
12621         xmlErrMemory(NULL, "creating parser: out of memory\n");
12622 	xmlFreeParserInputBuffer(buf);
12623 	return(NULL);
12624     }
12625     ctxt->dictNames = 1;
12626     ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12627     if (ctxt->pushTab == NULL) {
12628         xmlErrMemory(ctxt, NULL);
12629 	xmlFreeParserInputBuffer(buf);
12630 	xmlFreeParserCtxt(ctxt);
12631 	return(NULL);
12632     }
12633     if (sax != NULL) {
12634 #ifdef LIBXML_SAX1_ENABLED
12635 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12636 #endif /* LIBXML_SAX1_ENABLED */
12637 	    xmlFree(ctxt->sax);
12638 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12639 	if (ctxt->sax == NULL) {
12640 	    xmlErrMemory(ctxt, NULL);
12641 	    xmlFreeParserInputBuffer(buf);
12642 	    xmlFreeParserCtxt(ctxt);
12643 	    return(NULL);
12644 	}
12645 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12646 	if (sax->initialized == XML_SAX2_MAGIC)
12647 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12648 	else
12649 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12650 	if (user_data != NULL)
12651 	    ctxt->userData = user_data;
12652     }
12653     if (filename == NULL) {
12654 	ctxt->directory = NULL;
12655     } else {
12656         ctxt->directory = xmlParserGetDirectory(filename);
12657     }
12658 
12659     inputStream = xmlNewInputStream(ctxt);
12660     if (inputStream == NULL) {
12661 	xmlFreeParserCtxt(ctxt);
12662 	xmlFreeParserInputBuffer(buf);
12663 	return(NULL);
12664     }
12665 
12666     if (filename == NULL)
12667 	inputStream->filename = NULL;
12668     else {
12669 	inputStream->filename = (char *)
12670 	    xmlCanonicPath((const xmlChar *) filename);
12671 	if (inputStream->filename == NULL) {
12672 	    xmlFreeParserCtxt(ctxt);
12673 	    xmlFreeParserInputBuffer(buf);
12674 	    return(NULL);
12675 	}
12676     }
12677     inputStream->buf = buf;
12678     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12679     inputPush(ctxt, inputStream);
12680 
12681     /*
12682      * If the caller didn't provide an initial 'chunk' for determining
12683      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12684      * that it can be automatically determined later
12685      */
12686     if ((size == 0) || (chunk == NULL)) {
12687 	ctxt->charset = XML_CHAR_ENCODING_NONE;
12688     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12689 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12690 	size_t cur = ctxt->input->cur - ctxt->input->base;
12691 
12692 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12693 
12694         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12695 #ifdef DEBUG_PUSH
12696 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12697 #endif
12698     }
12699 
12700     if (enc != XML_CHAR_ENCODING_NONE) {
12701         xmlSwitchEncoding(ctxt, enc);
12702     }
12703 
12704     return(ctxt);
12705 }
12706 #endif /* LIBXML_PUSH_ENABLED */
12707 
12708 /**
12709  * xmlHaltParser:
12710  * @ctxt:  an XML parser context
12711  *
12712  * Blocks further parser processing don't override error
12713  * for internal use
12714  */
12715 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12716 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12717     if (ctxt == NULL)
12718         return;
12719     ctxt->instate = XML_PARSER_EOF;
12720     ctxt->disableSAX = 1;
12721     if (ctxt->input != NULL) {
12722         /*
12723 	 * in case there was a specific allocation deallocate before
12724 	 * overriding base
12725 	 */
12726         if (ctxt->input->free != NULL) {
12727 	    ctxt->input->free((xmlChar *) ctxt->input->base);
12728 	    ctxt->input->free = NULL;
12729 	}
12730 	ctxt->input->cur = BAD_CAST"";
12731 	ctxt->input->base = ctxt->input->cur;
12732     }
12733 }
12734 
12735 /**
12736  * xmlStopParser:
12737  * @ctxt:  an XML parser context
12738  *
12739  * Blocks further parser processing
12740  */
12741 void
xmlStopParser(xmlParserCtxtPtr ctxt)12742 xmlStopParser(xmlParserCtxtPtr ctxt) {
12743     if (ctxt == NULL)
12744         return;
12745     xmlHaltParser(ctxt);
12746     ctxt->errNo = XML_ERR_USER_STOP;
12747 }
12748 
12749 /**
12750  * xmlCreateIOParserCtxt:
12751  * @sax:  a SAX handler
12752  * @user_data:  The user data returned on SAX callbacks
12753  * @ioread:  an I/O read function
12754  * @ioclose:  an I/O close function
12755  * @ioctx:  an I/O handler
12756  * @enc:  the charset encoding if known
12757  *
12758  * Create a parser context for using the XML parser with an existing
12759  * I/O stream
12760  *
12761  * Returns the new parser context or NULL
12762  */
12763 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12764 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12765 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12766 	void *ioctx, xmlCharEncoding enc) {
12767     xmlParserCtxtPtr ctxt;
12768     xmlParserInputPtr inputStream;
12769     xmlParserInputBufferPtr buf;
12770 
12771     if (ioread == NULL) return(NULL);
12772 
12773     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12774     if (buf == NULL) {
12775         if (ioclose != NULL)
12776             ioclose(ioctx);
12777         return (NULL);
12778     }
12779 
12780     ctxt = xmlNewParserCtxt();
12781     if (ctxt == NULL) {
12782 	xmlFreeParserInputBuffer(buf);
12783 	return(NULL);
12784     }
12785     if (sax != NULL) {
12786 #ifdef LIBXML_SAX1_ENABLED
12787 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12788 #endif /* LIBXML_SAX1_ENABLED */
12789 	    xmlFree(ctxt->sax);
12790 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12791 	if (ctxt->sax == NULL) {
12792 	    xmlErrMemory(ctxt, NULL);
12793 	    xmlFreeParserCtxt(ctxt);
12794 	    return(NULL);
12795 	}
12796 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12797 	if (sax->initialized == XML_SAX2_MAGIC)
12798 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12799 	else
12800 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12801 	if (user_data != NULL)
12802 	    ctxt->userData = user_data;
12803     }
12804 
12805     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12806     if (inputStream == NULL) {
12807 	xmlFreeParserCtxt(ctxt);
12808 	return(NULL);
12809     }
12810     inputPush(ctxt, inputStream);
12811 
12812     return(ctxt);
12813 }
12814 
12815 #ifdef LIBXML_VALID_ENABLED
12816 /************************************************************************
12817  *									*
12818  *		Front ends when parsing a DTD				*
12819  *									*
12820  ************************************************************************/
12821 
12822 /**
12823  * xmlIOParseDTD:
12824  * @sax:  the SAX handler block or NULL
12825  * @input:  an Input Buffer
12826  * @enc:  the charset encoding if known
12827  *
12828  * Load and parse a DTD
12829  *
12830  * Returns the resulting xmlDtdPtr or NULL in case of error.
12831  * @input will be freed by the function in any case.
12832  */
12833 
12834 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12835 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12836 	      xmlCharEncoding enc) {
12837     xmlDtdPtr ret = NULL;
12838     xmlParserCtxtPtr ctxt;
12839     xmlParserInputPtr pinput = NULL;
12840     xmlChar start[4];
12841 
12842     if (input == NULL)
12843 	return(NULL);
12844 
12845     ctxt = xmlNewParserCtxt();
12846     if (ctxt == NULL) {
12847         xmlFreeParserInputBuffer(input);
12848 	return(NULL);
12849     }
12850 
12851     /* We are loading a DTD */
12852     ctxt->options |= XML_PARSE_DTDLOAD;
12853 
12854     /*
12855      * Set-up the SAX context
12856      */
12857     if (sax != NULL) {
12858 	if (ctxt->sax != NULL)
12859 	    xmlFree(ctxt->sax);
12860         ctxt->sax = sax;
12861         ctxt->userData = ctxt;
12862     }
12863     xmlDetectSAX2(ctxt);
12864 
12865     /*
12866      * generate a parser input from the I/O handler
12867      */
12868 
12869     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12870     if (pinput == NULL) {
12871         if (sax != NULL) ctxt->sax = NULL;
12872         xmlFreeParserInputBuffer(input);
12873 	xmlFreeParserCtxt(ctxt);
12874 	return(NULL);
12875     }
12876 
12877     /*
12878      * plug some encoding conversion routines here.
12879      */
12880     if (xmlPushInput(ctxt, pinput) < 0) {
12881         if (sax != NULL) ctxt->sax = NULL;
12882 	xmlFreeParserCtxt(ctxt);
12883 	return(NULL);
12884     }
12885     if (enc != XML_CHAR_ENCODING_NONE) {
12886         xmlSwitchEncoding(ctxt, enc);
12887     }
12888 
12889     pinput->filename = NULL;
12890     pinput->line = 1;
12891     pinput->col = 1;
12892     pinput->base = ctxt->input->cur;
12893     pinput->cur = ctxt->input->cur;
12894     pinput->free = NULL;
12895 
12896     /*
12897      * let's parse that entity knowing it's an external subset.
12898      */
12899     ctxt->inSubset = 2;
12900     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12901     if (ctxt->myDoc == NULL) {
12902 	xmlErrMemory(ctxt, "New Doc failed");
12903 	return(NULL);
12904     }
12905     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12906     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12907 	                               BAD_CAST "none", BAD_CAST "none");
12908 
12909     if ((enc == XML_CHAR_ENCODING_NONE) &&
12910         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12911 	/*
12912 	 * Get the 4 first bytes and decode the charset
12913 	 * if enc != XML_CHAR_ENCODING_NONE
12914 	 * plug some encoding conversion routines.
12915 	 */
12916 	start[0] = RAW;
12917 	start[1] = NXT(1);
12918 	start[2] = NXT(2);
12919 	start[3] = NXT(3);
12920 	enc = xmlDetectCharEncoding(start, 4);
12921 	if (enc != XML_CHAR_ENCODING_NONE) {
12922 	    xmlSwitchEncoding(ctxt, enc);
12923 	}
12924     }
12925 
12926     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12927 
12928     if (ctxt->myDoc != NULL) {
12929 	if (ctxt->wellFormed) {
12930 	    ret = ctxt->myDoc->extSubset;
12931 	    ctxt->myDoc->extSubset = NULL;
12932 	    if (ret != NULL) {
12933 		xmlNodePtr tmp;
12934 
12935 		ret->doc = NULL;
12936 		tmp = ret->children;
12937 		while (tmp != NULL) {
12938 		    tmp->doc = NULL;
12939 		    tmp = tmp->next;
12940 		}
12941 	    }
12942 	} else {
12943 	    ret = NULL;
12944 	}
12945         xmlFreeDoc(ctxt->myDoc);
12946         ctxt->myDoc = NULL;
12947     }
12948     if (sax != NULL) ctxt->sax = NULL;
12949     xmlFreeParserCtxt(ctxt);
12950 
12951     return(ret);
12952 }
12953 
12954 /**
12955  * xmlSAXParseDTD:
12956  * @sax:  the SAX handler block
12957  * @ExternalID:  a NAME* containing the External ID of the DTD
12958  * @SystemID:  a NAME* containing the URL to the DTD
12959  *
12960  * Load and parse an external subset.
12961  *
12962  * Returns the resulting xmlDtdPtr or NULL in case of error.
12963  */
12964 
12965 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12966 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12967                           const xmlChar *SystemID) {
12968     xmlDtdPtr ret = NULL;
12969     xmlParserCtxtPtr ctxt;
12970     xmlParserInputPtr input = NULL;
12971     xmlCharEncoding enc;
12972     xmlChar* systemIdCanonic;
12973 
12974     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12975 
12976     ctxt = xmlNewParserCtxt();
12977     if (ctxt == NULL) {
12978 	return(NULL);
12979     }
12980 
12981     /* We are loading a DTD */
12982     ctxt->options |= XML_PARSE_DTDLOAD;
12983 
12984     /*
12985      * Set-up the SAX context
12986      */
12987     if (sax != NULL) {
12988 	if (ctxt->sax != NULL)
12989 	    xmlFree(ctxt->sax);
12990         ctxt->sax = sax;
12991         ctxt->userData = ctxt;
12992     }
12993 
12994     /*
12995      * Canonicalise the system ID
12996      */
12997     systemIdCanonic = xmlCanonicPath(SystemID);
12998     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12999 	xmlFreeParserCtxt(ctxt);
13000 	return(NULL);
13001     }
13002 
13003     /*
13004      * Ask the Entity resolver to load the damn thing
13005      */
13006 
13007     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
13008 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
13009 	                                 systemIdCanonic);
13010     if (input == NULL) {
13011         if (sax != NULL) ctxt->sax = NULL;
13012 	xmlFreeParserCtxt(ctxt);
13013 	if (systemIdCanonic != NULL)
13014 	    xmlFree(systemIdCanonic);
13015 	return(NULL);
13016     }
13017 
13018     /*
13019      * plug some encoding conversion routines here.
13020      */
13021     if (xmlPushInput(ctxt, input) < 0) {
13022         if (sax != NULL) ctxt->sax = NULL;
13023 	xmlFreeParserCtxt(ctxt);
13024 	if (systemIdCanonic != NULL)
13025 	    xmlFree(systemIdCanonic);
13026 	return(NULL);
13027     }
13028     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13029 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
13030 	xmlSwitchEncoding(ctxt, enc);
13031     }
13032 
13033     if (input->filename == NULL)
13034 	input->filename = (char *) systemIdCanonic;
13035     else
13036 	xmlFree(systemIdCanonic);
13037     input->line = 1;
13038     input->col = 1;
13039     input->base = ctxt->input->cur;
13040     input->cur = ctxt->input->cur;
13041     input->free = NULL;
13042 
13043     /*
13044      * let's parse that entity knowing it's an external subset.
13045      */
13046     ctxt->inSubset = 2;
13047     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
13048     if (ctxt->myDoc == NULL) {
13049 	xmlErrMemory(ctxt, "New Doc failed");
13050         if (sax != NULL) ctxt->sax = NULL;
13051 	xmlFreeParserCtxt(ctxt);
13052 	return(NULL);
13053     }
13054     ctxt->myDoc->properties = XML_DOC_INTERNAL;
13055     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
13056 	                               ExternalID, SystemID);
13057     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
13058 
13059     if (ctxt->myDoc != NULL) {
13060 	if (ctxt->wellFormed) {
13061 	    ret = ctxt->myDoc->extSubset;
13062 	    ctxt->myDoc->extSubset = NULL;
13063 	    if (ret != NULL) {
13064 		xmlNodePtr tmp;
13065 
13066 		ret->doc = NULL;
13067 		tmp = ret->children;
13068 		while (tmp != NULL) {
13069 		    tmp->doc = NULL;
13070 		    tmp = tmp->next;
13071 		}
13072 	    }
13073 	} else {
13074 	    ret = NULL;
13075 	}
13076         xmlFreeDoc(ctxt->myDoc);
13077         ctxt->myDoc = NULL;
13078     }
13079     if (sax != NULL) ctxt->sax = NULL;
13080     xmlFreeParserCtxt(ctxt);
13081 
13082     return(ret);
13083 }
13084 
13085 
13086 /**
13087  * xmlParseDTD:
13088  * @ExternalID:  a NAME* containing the External ID of the DTD
13089  * @SystemID:  a NAME* containing the URL to the DTD
13090  *
13091  * Load and parse an external subset.
13092  *
13093  * Returns the resulting xmlDtdPtr or NULL in case of error.
13094  */
13095 
13096 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)13097 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13098     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13099 }
13100 #endif /* LIBXML_VALID_ENABLED */
13101 
13102 /************************************************************************
13103  *									*
13104  *		Front ends when parsing an Entity			*
13105  *									*
13106  ************************************************************************/
13107 
13108 /**
13109  * xmlParseCtxtExternalEntity:
13110  * @ctx:  the existing parsing context
13111  * @URL:  the URL for the entity to load
13112  * @ID:  the System ID for the entity to load
13113  * @lst:  the return value for the set of parsed nodes
13114  *
13115  * Parse an external general entity within an existing parsing context
13116  * An external general parsed entity is well-formed if it matches the
13117  * production labeled extParsedEnt.
13118  *
13119  * [78] extParsedEnt ::= TextDecl? content
13120  *
13121  * Returns 0 if the entity is well formed, -1 in case of args problem and
13122  *    the parser error code otherwise
13123  */
13124 
13125 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13126 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13127 	               const xmlChar *ID, xmlNodePtr *lst) {
13128     xmlParserCtxtPtr ctxt;
13129     xmlDocPtr newDoc;
13130     xmlNodePtr newRoot;
13131     xmlSAXHandlerPtr oldsax = NULL;
13132     int ret = 0;
13133     xmlChar start[4];
13134     xmlCharEncoding enc;
13135 
13136     if (ctx == NULL) return(-1);
13137 
13138     if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13139         (ctx->depth > 1024)) {
13140 	return(XML_ERR_ENTITY_LOOP);
13141     }
13142 
13143     if (lst != NULL)
13144         *lst = NULL;
13145     if ((URL == NULL) && (ID == NULL))
13146 	return(-1);
13147     if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13148 	return(-1);
13149 
13150     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
13151     if (ctxt == NULL) {
13152 	return(-1);
13153     }
13154 
13155     oldsax = ctxt->sax;
13156     ctxt->sax = ctx->sax;
13157     xmlDetectSAX2(ctxt);
13158     newDoc = xmlNewDoc(BAD_CAST "1.0");
13159     if (newDoc == NULL) {
13160 	xmlFreeParserCtxt(ctxt);
13161 	return(-1);
13162     }
13163     newDoc->properties = XML_DOC_INTERNAL;
13164     if (ctx->myDoc->dict) {
13165 	newDoc->dict = ctx->myDoc->dict;
13166 	xmlDictReference(newDoc->dict);
13167     }
13168     if (ctx->myDoc != NULL) {
13169 	newDoc->intSubset = ctx->myDoc->intSubset;
13170 	newDoc->extSubset = ctx->myDoc->extSubset;
13171     }
13172     if (ctx->myDoc->URL != NULL) {
13173 	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13174     }
13175     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13176     if (newRoot == NULL) {
13177 	ctxt->sax = oldsax;
13178 	xmlFreeParserCtxt(ctxt);
13179 	newDoc->intSubset = NULL;
13180 	newDoc->extSubset = NULL;
13181         xmlFreeDoc(newDoc);
13182 	return(-1);
13183     }
13184     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13185     nodePush(ctxt, newDoc->children);
13186     if (ctx->myDoc == NULL) {
13187 	ctxt->myDoc = newDoc;
13188     } else {
13189 	ctxt->myDoc = ctx->myDoc;
13190 	newDoc->children->doc = ctx->myDoc;
13191     }
13192 
13193     /*
13194      * Get the 4 first bytes and decode the charset
13195      * if enc != XML_CHAR_ENCODING_NONE
13196      * plug some encoding conversion routines.
13197      */
13198     GROW
13199     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13200 	start[0] = RAW;
13201 	start[1] = NXT(1);
13202 	start[2] = NXT(2);
13203 	start[3] = NXT(3);
13204 	enc = xmlDetectCharEncoding(start, 4);
13205 	if (enc != XML_CHAR_ENCODING_NONE) {
13206 	    xmlSwitchEncoding(ctxt, enc);
13207 	}
13208     }
13209 
13210     /*
13211      * Parse a possible text declaration first
13212      */
13213     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13214 	xmlParseTextDecl(ctxt);
13215 	/*
13216 	 * An XML-1.0 document can't reference an entity not XML-1.0
13217 	 */
13218 	if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13219 	    (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13220 	    xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13221 	                   "Version mismatch between document and entity\n");
13222 	}
13223     }
13224 
13225     /*
13226      * If the user provided its own SAX callbacks then reuse the
13227      * useData callback field, otherwise the expected setup in a
13228      * DOM builder is to have userData == ctxt
13229      */
13230     if (ctx->userData == ctx)
13231         ctxt->userData = ctxt;
13232     else
13233         ctxt->userData = ctx->userData;
13234 
13235     /*
13236      * Doing validity checking on chunk doesn't make sense
13237      */
13238     ctxt->instate = XML_PARSER_CONTENT;
13239     ctxt->validate = ctx->validate;
13240     ctxt->valid = ctx->valid;
13241     ctxt->loadsubset = ctx->loadsubset;
13242     ctxt->depth = ctx->depth + 1;
13243     ctxt->replaceEntities = ctx->replaceEntities;
13244     if (ctxt->validate) {
13245 	ctxt->vctxt.error = ctx->vctxt.error;
13246 	ctxt->vctxt.warning = ctx->vctxt.warning;
13247     } else {
13248 	ctxt->vctxt.error = NULL;
13249 	ctxt->vctxt.warning = NULL;
13250     }
13251     ctxt->vctxt.nodeTab = NULL;
13252     ctxt->vctxt.nodeNr = 0;
13253     ctxt->vctxt.nodeMax = 0;
13254     ctxt->vctxt.node = NULL;
13255     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13256     ctxt->dict = ctx->dict;
13257     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13258     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13259     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13260     ctxt->dictNames = ctx->dictNames;
13261     ctxt->attsDefault = ctx->attsDefault;
13262     ctxt->attsSpecial = ctx->attsSpecial;
13263     ctxt->linenumbers = ctx->linenumbers;
13264 
13265     xmlParseContent(ctxt);
13266 
13267     ctx->validate = ctxt->validate;
13268     ctx->valid = ctxt->valid;
13269     if ((RAW == '<') && (NXT(1) == '/')) {
13270 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13271     } else if (RAW != 0) {
13272 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13273     }
13274     if (ctxt->node != newDoc->children) {
13275 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13276     }
13277 
13278     if (!ctxt->wellFormed) {
13279         if (ctxt->errNo == 0)
13280 	    ret = 1;
13281 	else
13282 	    ret = ctxt->errNo;
13283     } else {
13284 	if (lst != NULL) {
13285 	    xmlNodePtr cur;
13286 
13287 	    /*
13288 	     * Return the newly created nodeset after unlinking it from
13289 	     * they pseudo parent.
13290 	     */
13291 	    cur = newDoc->children->children;
13292 	    *lst = cur;
13293 	    while (cur != NULL) {
13294 		cur->parent = NULL;
13295 		cur = cur->next;
13296 	    }
13297             newDoc->children->children = NULL;
13298 	}
13299 	ret = 0;
13300     }
13301     ctxt->sax = oldsax;
13302     ctxt->dict = NULL;
13303     ctxt->attsDefault = NULL;
13304     ctxt->attsSpecial = NULL;
13305     xmlFreeParserCtxt(ctxt);
13306     newDoc->intSubset = NULL;
13307     newDoc->extSubset = NULL;
13308     xmlFreeDoc(newDoc);
13309 
13310     return(ret);
13311 }
13312 
13313 /**
13314  * xmlParseExternalEntityPrivate:
13315  * @doc:  the document the chunk pertains to
13316  * @oldctxt:  the previous parser context if available
13317  * @sax:  the SAX handler bloc (possibly NULL)
13318  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13319  * @depth:  Used for loop detection, use 0
13320  * @URL:  the URL for the entity to load
13321  * @ID:  the System ID for the entity to load
13322  * @list:  the return value for the set of parsed nodes
13323  *
13324  * Private version of xmlParseExternalEntity()
13325  *
13326  * Returns 0 if the entity is well formed, -1 in case of args problem and
13327  *    the parser error code otherwise
13328  */
13329 
13330 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13331 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13332 	              xmlSAXHandlerPtr sax,
13333 		      void *user_data, int depth, const xmlChar *URL,
13334 		      const xmlChar *ID, xmlNodePtr *list) {
13335     xmlParserCtxtPtr ctxt;
13336     xmlDocPtr newDoc;
13337     xmlNodePtr newRoot;
13338     xmlSAXHandlerPtr oldsax = NULL;
13339     xmlParserErrors ret = XML_ERR_OK;
13340     xmlChar start[4];
13341     xmlCharEncoding enc;
13342 
13343     if (((depth > 40) &&
13344 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13345 	(depth > 1024)) {
13346 	return(XML_ERR_ENTITY_LOOP);
13347     }
13348 
13349     if (list != NULL)
13350         *list = NULL;
13351     if ((URL == NULL) && (ID == NULL))
13352 	return(XML_ERR_INTERNAL_ERROR);
13353     if (doc == NULL)
13354 	return(XML_ERR_INTERNAL_ERROR);
13355 
13356 
13357     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13358     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13359     ctxt->userData = ctxt;
13360     if (oldctxt != NULL) {
13361 	ctxt->_private = oldctxt->_private;
13362 	ctxt->loadsubset = oldctxt->loadsubset;
13363 	ctxt->validate = oldctxt->validate;
13364 	ctxt->external = oldctxt->external;
13365 	ctxt->record_info = oldctxt->record_info;
13366 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13367 	ctxt->node_seq.length = oldctxt->node_seq.length;
13368 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13369     } else {
13370 	/*
13371 	 * Doing validity checking on chunk without context
13372 	 * doesn't make sense
13373 	 */
13374 	ctxt->_private = NULL;
13375 	ctxt->validate = 0;
13376 	ctxt->external = 2;
13377 	ctxt->loadsubset = 0;
13378     }
13379     if (sax != NULL) {
13380 	oldsax = ctxt->sax;
13381         ctxt->sax = sax;
13382 	if (user_data != NULL)
13383 	    ctxt->userData = user_data;
13384     }
13385     xmlDetectSAX2(ctxt);
13386     newDoc = xmlNewDoc(BAD_CAST "1.0");
13387     if (newDoc == NULL) {
13388 	ctxt->node_seq.maximum = 0;
13389 	ctxt->node_seq.length = 0;
13390 	ctxt->node_seq.buffer = NULL;
13391 	xmlFreeParserCtxt(ctxt);
13392 	return(XML_ERR_INTERNAL_ERROR);
13393     }
13394     newDoc->properties = XML_DOC_INTERNAL;
13395     newDoc->intSubset = doc->intSubset;
13396     newDoc->extSubset = doc->extSubset;
13397     newDoc->dict = doc->dict;
13398     xmlDictReference(newDoc->dict);
13399 
13400     if (doc->URL != NULL) {
13401 	newDoc->URL = xmlStrdup(doc->URL);
13402     }
13403     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13404     if (newRoot == NULL) {
13405 	if (sax != NULL)
13406 	    ctxt->sax = oldsax;
13407 	ctxt->node_seq.maximum = 0;
13408 	ctxt->node_seq.length = 0;
13409 	ctxt->node_seq.buffer = NULL;
13410 	xmlFreeParserCtxt(ctxt);
13411 	newDoc->intSubset = NULL;
13412 	newDoc->extSubset = NULL;
13413         xmlFreeDoc(newDoc);
13414 	return(XML_ERR_INTERNAL_ERROR);
13415     }
13416     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13417     nodePush(ctxt, newDoc->children);
13418     ctxt->myDoc = doc;
13419     newRoot->doc = doc;
13420 
13421     /*
13422      * Get the 4 first bytes and decode the charset
13423      * if enc != XML_CHAR_ENCODING_NONE
13424      * plug some encoding conversion routines.
13425      */
13426     GROW;
13427     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13428 	start[0] = RAW;
13429 	start[1] = NXT(1);
13430 	start[2] = NXT(2);
13431 	start[3] = NXT(3);
13432 	enc = xmlDetectCharEncoding(start, 4);
13433 	if (enc != XML_CHAR_ENCODING_NONE) {
13434 	    xmlSwitchEncoding(ctxt, enc);
13435 	}
13436     }
13437 
13438     /*
13439      * Parse a possible text declaration first
13440      */
13441     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13442 	xmlParseTextDecl(ctxt);
13443     }
13444 
13445     ctxt->instate = XML_PARSER_CONTENT;
13446     ctxt->depth = depth;
13447 
13448     xmlParseContent(ctxt);
13449 
13450     if ((RAW == '<') && (NXT(1) == '/')) {
13451 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13452     } else if (RAW != 0) {
13453 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13454     }
13455     if (ctxt->node != newDoc->children) {
13456 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13457     }
13458 
13459     if (!ctxt->wellFormed) {
13460         if (ctxt->errNo == 0)
13461 	    ret = XML_ERR_INTERNAL_ERROR;
13462 	else
13463 	    ret = (xmlParserErrors)ctxt->errNo;
13464     } else {
13465 	if (list != NULL) {
13466 	    xmlNodePtr cur;
13467 
13468 	    /*
13469 	     * Return the newly created nodeset after unlinking it from
13470 	     * they pseudo parent.
13471 	     */
13472 	    cur = newDoc->children->children;
13473 	    *list = cur;
13474 	    while (cur != NULL) {
13475 		cur->parent = NULL;
13476 		cur = cur->next;
13477 	    }
13478             newDoc->children->children = NULL;
13479 	}
13480 	ret = XML_ERR_OK;
13481     }
13482 
13483     /*
13484      * Record in the parent context the number of entities replacement
13485      * done when parsing that reference.
13486      */
13487     if (oldctxt != NULL)
13488         oldctxt->nbentities += ctxt->nbentities;
13489 
13490     /*
13491      * Also record the size of the entity parsed
13492      */
13493     if (ctxt->input != NULL && oldctxt != NULL) {
13494 	oldctxt->sizeentities += ctxt->input->consumed;
13495 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13496     }
13497     /*
13498      * And record the last error if any
13499      */
13500     if (ctxt->lastError.code != XML_ERR_OK)
13501         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13502 
13503     if (sax != NULL)
13504 	ctxt->sax = oldsax;
13505     if (oldctxt != NULL) {
13506         oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13507         oldctxt->node_seq.length = ctxt->node_seq.length;
13508         oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13509     }
13510     ctxt->node_seq.maximum = 0;
13511     ctxt->node_seq.length = 0;
13512     ctxt->node_seq.buffer = NULL;
13513     xmlFreeParserCtxt(ctxt);
13514     newDoc->intSubset = NULL;
13515     newDoc->extSubset = NULL;
13516     xmlFreeDoc(newDoc);
13517 
13518     return(ret);
13519 }
13520 
13521 #ifdef LIBXML_SAX1_ENABLED
13522 /**
13523  * xmlParseExternalEntity:
13524  * @doc:  the document the chunk pertains to
13525  * @sax:  the SAX handler bloc (possibly NULL)
13526  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13527  * @depth:  Used for loop detection, use 0
13528  * @URL:  the URL for the entity to load
13529  * @ID:  the System ID for the entity to load
13530  * @lst:  the return value for the set of parsed nodes
13531  *
13532  * Parse an external general entity
13533  * An external general parsed entity is well-formed if it matches the
13534  * production labeled extParsedEnt.
13535  *
13536  * [78] extParsedEnt ::= TextDecl? content
13537  *
13538  * Returns 0 if the entity is well formed, -1 in case of args problem and
13539  *    the parser error code otherwise
13540  */
13541 
13542 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13543 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13544 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13545     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13546 		                       ID, lst));
13547 }
13548 
13549 /**
13550  * xmlParseBalancedChunkMemory:
13551  * @doc:  the document the chunk pertains to
13552  * @sax:  the SAX handler bloc (possibly NULL)
13553  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13554  * @depth:  Used for loop detection, use 0
13555  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13556  * @lst:  the return value for the set of parsed nodes
13557  *
13558  * Parse a well-balanced chunk of an XML document
13559  * called by the parser
13560  * The allowed sequence for the Well Balanced Chunk is the one defined by
13561  * the content production in the XML grammar:
13562  *
13563  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13564  *
13565  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13566  *    the parser error code otherwise
13567  */
13568 
13569 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13570 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13571      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13572     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13573                                                 depth, string, lst, 0 );
13574 }
13575 #endif /* LIBXML_SAX1_ENABLED */
13576 
13577 /**
13578  * xmlParseBalancedChunkMemoryInternal:
13579  * @oldctxt:  the existing parsing context
13580  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13581  * @user_data:  the user data field for the parser context
13582  * @lst:  the return value for the set of parsed nodes
13583  *
13584  *
13585  * Parse a well-balanced chunk of an XML document
13586  * called by the parser
13587  * The allowed sequence for the Well Balanced Chunk is the one defined by
13588  * the content production in the XML grammar:
13589  *
13590  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13591  *
13592  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13593  * error code otherwise
13594  *
13595  * In case recover is set to 1, the nodelist will not be empty even if
13596  * the parsed chunk is not well balanced.
13597  */
13598 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13599 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13600 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13601     xmlParserCtxtPtr ctxt;
13602     xmlDocPtr newDoc = NULL;
13603     xmlNodePtr newRoot;
13604     xmlSAXHandlerPtr oldsax = NULL;
13605     xmlNodePtr content = NULL;
13606     xmlNodePtr last = NULL;
13607     int size;
13608     xmlParserErrors ret = XML_ERR_OK;
13609 #ifdef SAX2
13610     int i;
13611 #endif
13612 
13613     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13614         (oldctxt->depth >  1024)) {
13615 	return(XML_ERR_ENTITY_LOOP);
13616     }
13617 
13618 
13619     if (lst != NULL)
13620         *lst = NULL;
13621     if (string == NULL)
13622         return(XML_ERR_INTERNAL_ERROR);
13623 
13624     size = xmlStrlen(string);
13625 
13626     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13627     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13628     if (user_data != NULL)
13629 	ctxt->userData = user_data;
13630     else
13631 	ctxt->userData = ctxt;
13632     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13633     ctxt->dict = oldctxt->dict;
13634     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13635     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13636     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13637 
13638 #ifdef SAX2
13639     /* propagate namespaces down the entity */
13640     for (i = 0;i < oldctxt->nsNr;i += 2) {
13641         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13642     }
13643 #endif
13644 
13645     oldsax = ctxt->sax;
13646     ctxt->sax = oldctxt->sax;
13647     xmlDetectSAX2(ctxt);
13648     ctxt->replaceEntities = oldctxt->replaceEntities;
13649     ctxt->options = oldctxt->options;
13650 
13651     ctxt->_private = oldctxt->_private;
13652     if (oldctxt->myDoc == NULL) {
13653 	newDoc = xmlNewDoc(BAD_CAST "1.0");
13654 	if (newDoc == NULL) {
13655 	    ctxt->sax = oldsax;
13656 	    ctxt->dict = NULL;
13657 	    xmlFreeParserCtxt(ctxt);
13658 	    return(XML_ERR_INTERNAL_ERROR);
13659 	}
13660 	newDoc->properties = XML_DOC_INTERNAL;
13661 	newDoc->dict = ctxt->dict;
13662 	xmlDictReference(newDoc->dict);
13663 	ctxt->myDoc = newDoc;
13664     } else {
13665 	ctxt->myDoc = oldctxt->myDoc;
13666         content = ctxt->myDoc->children;
13667 	last = ctxt->myDoc->last;
13668     }
13669     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13670     if (newRoot == NULL) {
13671 	ctxt->sax = oldsax;
13672 	ctxt->dict = NULL;
13673 	xmlFreeParserCtxt(ctxt);
13674 	if (newDoc != NULL) {
13675 	    xmlFreeDoc(newDoc);
13676 	}
13677 	return(XML_ERR_INTERNAL_ERROR);
13678     }
13679     ctxt->myDoc->children = NULL;
13680     ctxt->myDoc->last = NULL;
13681     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13682     nodePush(ctxt, ctxt->myDoc->children);
13683     ctxt->instate = XML_PARSER_CONTENT;
13684     ctxt->depth = oldctxt->depth + 1;
13685 
13686     ctxt->validate = 0;
13687     ctxt->loadsubset = oldctxt->loadsubset;
13688     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13689 	/*
13690 	 * ID/IDREF registration will be done in xmlValidateElement below
13691 	 */
13692 	ctxt->loadsubset |= XML_SKIP_IDS;
13693     }
13694     ctxt->dictNames = oldctxt->dictNames;
13695     ctxt->attsDefault = oldctxt->attsDefault;
13696     ctxt->attsSpecial = oldctxt->attsSpecial;
13697 
13698     xmlParseContent(ctxt);
13699     if ((RAW == '<') && (NXT(1) == '/')) {
13700 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13701     } else if (RAW != 0) {
13702 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13703     }
13704     if (ctxt->node != ctxt->myDoc->children) {
13705 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13706     }
13707 
13708     if (!ctxt->wellFormed) {
13709         if (ctxt->errNo == 0)
13710 	    ret = XML_ERR_INTERNAL_ERROR;
13711 	else
13712 	    ret = (xmlParserErrors)ctxt->errNo;
13713     } else {
13714       ret = XML_ERR_OK;
13715     }
13716 
13717     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13718 	xmlNodePtr cur;
13719 
13720 	/*
13721 	 * Return the newly created nodeset after unlinking it from
13722 	 * they pseudo parent.
13723 	 */
13724 	cur = ctxt->myDoc->children->children;
13725 	*lst = cur;
13726 	while (cur != NULL) {
13727 #ifdef LIBXML_VALID_ENABLED
13728 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13729 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13730 		(cur->type == XML_ELEMENT_NODE)) {
13731 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13732 			oldctxt->myDoc, cur);
13733 	    }
13734 #endif /* LIBXML_VALID_ENABLED */
13735 	    cur->parent = NULL;
13736 	    cur = cur->next;
13737 	}
13738 	ctxt->myDoc->children->children = NULL;
13739     }
13740     if (ctxt->myDoc != NULL) {
13741 	xmlFreeNode(ctxt->myDoc->children);
13742         ctxt->myDoc->children = content;
13743         ctxt->myDoc->last = last;
13744     }
13745 
13746     /*
13747      * Record in the parent context the number of entities replacement
13748      * done when parsing that reference.
13749      */
13750     if (oldctxt != NULL)
13751         oldctxt->nbentities += ctxt->nbentities;
13752 
13753     /*
13754      * Also record the last error if any
13755      */
13756     if (ctxt->lastError.code != XML_ERR_OK)
13757         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13758 
13759     ctxt->sax = oldsax;
13760     ctxt->dict = NULL;
13761     ctxt->attsDefault = NULL;
13762     ctxt->attsSpecial = NULL;
13763     xmlFreeParserCtxt(ctxt);
13764     if (newDoc != NULL) {
13765 	xmlFreeDoc(newDoc);
13766     }
13767 
13768     return(ret);
13769 }
13770 
13771 /**
13772  * xmlParseInNodeContext:
13773  * @node:  the context node
13774  * @data:  the input string
13775  * @datalen:  the input string length in bytes
13776  * @options:  a combination of xmlParserOption
13777  * @lst:  the return value for the set of parsed nodes
13778  *
13779  * Parse a well-balanced chunk of an XML document
13780  * within the context (DTD, namespaces, etc ...) of the given node.
13781  *
13782  * The allowed sequence for the data is a Well Balanced Chunk defined by
13783  * the content production in the XML grammar:
13784  *
13785  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13786  *
13787  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13788  * error code otherwise
13789  */
13790 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13791 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13792                       int options, xmlNodePtr *lst) {
13793 #ifdef SAX2
13794     xmlParserCtxtPtr ctxt;
13795     xmlDocPtr doc = NULL;
13796     xmlNodePtr fake, cur;
13797     int nsnr = 0;
13798 
13799     xmlParserErrors ret = XML_ERR_OK;
13800 
13801     /*
13802      * check all input parameters, grab the document
13803      */
13804     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13805         return(XML_ERR_INTERNAL_ERROR);
13806     switch (node->type) {
13807         case XML_ELEMENT_NODE:
13808         case XML_ATTRIBUTE_NODE:
13809         case XML_TEXT_NODE:
13810         case XML_CDATA_SECTION_NODE:
13811         case XML_ENTITY_REF_NODE:
13812         case XML_PI_NODE:
13813         case XML_COMMENT_NODE:
13814         case XML_DOCUMENT_NODE:
13815         case XML_HTML_DOCUMENT_NODE:
13816 	    break;
13817 	default:
13818 	    return(XML_ERR_INTERNAL_ERROR);
13819 
13820     }
13821     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13822            (node->type != XML_DOCUMENT_NODE) &&
13823 	   (node->type != XML_HTML_DOCUMENT_NODE))
13824 	node = node->parent;
13825     if (node == NULL)
13826 	return(XML_ERR_INTERNAL_ERROR);
13827     if (node->type == XML_ELEMENT_NODE)
13828 	doc = node->doc;
13829     else
13830         doc = (xmlDocPtr) node;
13831     if (doc == NULL)
13832 	return(XML_ERR_INTERNAL_ERROR);
13833 
13834     /*
13835      * allocate a context and set-up everything not related to the
13836      * node position in the tree
13837      */
13838     if (doc->type == XML_DOCUMENT_NODE)
13839 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13840 #ifdef LIBXML_HTML_ENABLED
13841     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13842 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13843         /*
13844          * When parsing in context, it makes no sense to add implied
13845          * elements like html/body/etc...
13846          */
13847         options |= HTML_PARSE_NOIMPLIED;
13848     }
13849 #endif
13850     else
13851         return(XML_ERR_INTERNAL_ERROR);
13852 
13853     if (ctxt == NULL)
13854         return(XML_ERR_NO_MEMORY);
13855 
13856     /*
13857      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13858      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13859      * we must wait until the last moment to free the original one.
13860      */
13861     if (doc->dict != NULL) {
13862         if (ctxt->dict != NULL)
13863 	    xmlDictFree(ctxt->dict);
13864 	ctxt->dict = doc->dict;
13865     } else
13866         options |= XML_PARSE_NODICT;
13867 
13868     if (doc->encoding != NULL) {
13869         xmlCharEncodingHandlerPtr hdlr;
13870 
13871         if (ctxt->encoding != NULL)
13872 	    xmlFree((xmlChar *) ctxt->encoding);
13873         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13874 
13875         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13876         if (hdlr != NULL) {
13877             xmlSwitchToEncoding(ctxt, hdlr);
13878 	} else {
13879             return(XML_ERR_UNSUPPORTED_ENCODING);
13880         }
13881     }
13882 
13883     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13884     xmlDetectSAX2(ctxt);
13885     ctxt->myDoc = doc;
13886     /* parsing in context, i.e. as within existing content */
13887     ctxt->instate = XML_PARSER_CONTENT;
13888 
13889     fake = xmlNewComment(NULL);
13890     if (fake == NULL) {
13891         xmlFreeParserCtxt(ctxt);
13892 	return(XML_ERR_NO_MEMORY);
13893     }
13894     xmlAddChild(node, fake);
13895 
13896     if (node->type == XML_ELEMENT_NODE) {
13897 	nodePush(ctxt, node);
13898 	/*
13899 	 * initialize the SAX2 namespaces stack
13900 	 */
13901 	cur = node;
13902 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13903 	    xmlNsPtr ns = cur->nsDef;
13904 	    const xmlChar *iprefix, *ihref;
13905 
13906 	    while (ns != NULL) {
13907 		if (ctxt->dict) {
13908 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13909 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13910 		} else {
13911 		    iprefix = ns->prefix;
13912 		    ihref = ns->href;
13913 		}
13914 
13915 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13916 		    nsPush(ctxt, iprefix, ihref);
13917 		    nsnr++;
13918 		}
13919 		ns = ns->next;
13920 	    }
13921 	    cur = cur->parent;
13922 	}
13923     }
13924 
13925     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13926 	/*
13927 	 * ID/IDREF registration will be done in xmlValidateElement below
13928 	 */
13929 	ctxt->loadsubset |= XML_SKIP_IDS;
13930     }
13931 
13932 #ifdef LIBXML_HTML_ENABLED
13933     if (doc->type == XML_HTML_DOCUMENT_NODE)
13934         __htmlParseContent(ctxt);
13935     else
13936 #endif
13937 	xmlParseContent(ctxt);
13938 
13939     nsPop(ctxt, nsnr);
13940     if ((RAW == '<') && (NXT(1) == '/')) {
13941 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13942     } else if (RAW != 0) {
13943 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13944     }
13945     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13946 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13947 	ctxt->wellFormed = 0;
13948     }
13949 
13950     if (!ctxt->wellFormed) {
13951         if (ctxt->errNo == 0)
13952 	    ret = XML_ERR_INTERNAL_ERROR;
13953 	else
13954 	    ret = (xmlParserErrors)ctxt->errNo;
13955     } else {
13956         ret = XML_ERR_OK;
13957     }
13958 
13959     /*
13960      * Return the newly created nodeset after unlinking it from
13961      * the pseudo sibling.
13962      */
13963 
13964     cur = fake->next;
13965     fake->next = NULL;
13966     node->last = fake;
13967 
13968     if (cur != NULL) {
13969 	cur->prev = NULL;
13970     }
13971 
13972     *lst = cur;
13973 
13974     while (cur != NULL) {
13975 	cur->parent = NULL;
13976 	cur = cur->next;
13977     }
13978 
13979     xmlUnlinkNode(fake);
13980     xmlFreeNode(fake);
13981 
13982 
13983     if (ret != XML_ERR_OK) {
13984         xmlFreeNodeList(*lst);
13985 	*lst = NULL;
13986     }
13987 
13988     if (doc->dict != NULL)
13989         ctxt->dict = NULL;
13990     xmlFreeParserCtxt(ctxt);
13991 
13992     return(ret);
13993 #else /* !SAX2 */
13994     return(XML_ERR_INTERNAL_ERROR);
13995 #endif
13996 }
13997 
13998 #ifdef LIBXML_SAX1_ENABLED
13999 /**
14000  * xmlParseBalancedChunkMemoryRecover:
14001  * @doc:  the document the chunk pertains to
14002  * @sax:  the SAX handler bloc (possibly NULL)
14003  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
14004  * @depth:  Used for loop detection, use 0
14005  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
14006  * @lst:  the return value for the set of parsed nodes
14007  * @recover: return nodes even if the data is broken (use 0)
14008  *
14009  *
14010  * Parse a well-balanced chunk of an XML document
14011  * called by the parser
14012  * The allowed sequence for the Well Balanced Chunk is the one defined by
14013  * the content production in the XML grammar:
14014  *
14015  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
14016  *
14017  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
14018  *    the parser error code otherwise
14019  *
14020  * In case recover is set to 1, the nodelist will not be empty even if
14021  * the parsed chunk is not well balanced, assuming the parsing succeeded to
14022  * some extent.
14023  */
14024 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)14025 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
14026      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
14027      int recover) {
14028     xmlParserCtxtPtr ctxt;
14029     xmlDocPtr newDoc;
14030     xmlSAXHandlerPtr oldsax = NULL;
14031     xmlNodePtr content, newRoot;
14032     int size;
14033     int ret = 0;
14034 
14035     if (depth > 40) {
14036 	return(XML_ERR_ENTITY_LOOP);
14037     }
14038 
14039 
14040     if (lst != NULL)
14041         *lst = NULL;
14042     if (string == NULL)
14043         return(-1);
14044 
14045     size = xmlStrlen(string);
14046 
14047     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
14048     if (ctxt == NULL) return(-1);
14049     ctxt->userData = ctxt;
14050     if (sax != NULL) {
14051 	oldsax = ctxt->sax;
14052         ctxt->sax = sax;
14053 	if (user_data != NULL)
14054 	    ctxt->userData = user_data;
14055     }
14056     newDoc = xmlNewDoc(BAD_CAST "1.0");
14057     if (newDoc == NULL) {
14058 	xmlFreeParserCtxt(ctxt);
14059 	return(-1);
14060     }
14061     newDoc->properties = XML_DOC_INTERNAL;
14062     if ((doc != NULL) && (doc->dict != NULL)) {
14063         xmlDictFree(ctxt->dict);
14064 	ctxt->dict = doc->dict;
14065 	xmlDictReference(ctxt->dict);
14066 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14067 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14068 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14069 	ctxt->dictNames = 1;
14070     } else {
14071 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
14072     }
14073     if (doc != NULL) {
14074 	newDoc->intSubset = doc->intSubset;
14075 	newDoc->extSubset = doc->extSubset;
14076     }
14077     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14078     if (newRoot == NULL) {
14079 	if (sax != NULL)
14080 	    ctxt->sax = oldsax;
14081 	xmlFreeParserCtxt(ctxt);
14082 	newDoc->intSubset = NULL;
14083 	newDoc->extSubset = NULL;
14084         xmlFreeDoc(newDoc);
14085 	return(-1);
14086     }
14087     xmlAddChild((xmlNodePtr) newDoc, newRoot);
14088     nodePush(ctxt, newRoot);
14089     if (doc == NULL) {
14090 	ctxt->myDoc = newDoc;
14091     } else {
14092 	ctxt->myDoc = newDoc;
14093 	newDoc->children->doc = doc;
14094 	/* Ensure that doc has XML spec namespace */
14095 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14096 	newDoc->oldNs = doc->oldNs;
14097     }
14098     ctxt->instate = XML_PARSER_CONTENT;
14099     ctxt->depth = depth;
14100 
14101     /*
14102      * Doing validity checking on chunk doesn't make sense
14103      */
14104     ctxt->validate = 0;
14105     ctxt->loadsubset = 0;
14106     xmlDetectSAX2(ctxt);
14107 
14108     if ( doc != NULL ){
14109         content = doc->children;
14110         doc->children = NULL;
14111         xmlParseContent(ctxt);
14112         doc->children = content;
14113     }
14114     else {
14115         xmlParseContent(ctxt);
14116     }
14117     if ((RAW == '<') && (NXT(1) == '/')) {
14118 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14119     } else if (RAW != 0) {
14120 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
14121     }
14122     if (ctxt->node != newDoc->children) {
14123 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14124     }
14125 
14126     if (!ctxt->wellFormed) {
14127         if (ctxt->errNo == 0)
14128 	    ret = 1;
14129 	else
14130 	    ret = ctxt->errNo;
14131     } else {
14132       ret = 0;
14133     }
14134 
14135     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14136 	xmlNodePtr cur;
14137 
14138 	/*
14139 	 * Return the newly created nodeset after unlinking it from
14140 	 * they pseudo parent.
14141 	 */
14142 	cur = newDoc->children->children;
14143 	*lst = cur;
14144 	while (cur != NULL) {
14145 	    xmlSetTreeDoc(cur, doc);
14146 	    cur->parent = NULL;
14147 	    cur = cur->next;
14148 	}
14149 	newDoc->children->children = NULL;
14150     }
14151 
14152     if (sax != NULL)
14153 	ctxt->sax = oldsax;
14154     xmlFreeParserCtxt(ctxt);
14155     newDoc->intSubset = NULL;
14156     newDoc->extSubset = NULL;
14157     newDoc->oldNs = NULL;
14158     xmlFreeDoc(newDoc);
14159 
14160     return(ret);
14161 }
14162 
14163 /**
14164  * xmlSAXParseEntity:
14165  * @sax:  the SAX handler block
14166  * @filename:  the filename
14167  *
14168  * parse an XML external entity out of context and build a tree.
14169  * It use the given SAX function block to handle the parsing callback.
14170  * If sax is NULL, fallback to the default DOM tree building routines.
14171  *
14172  * [78] extParsedEnt ::= TextDecl? content
14173  *
14174  * This correspond to a "Well Balanced" chunk
14175  *
14176  * Returns the resulting document tree
14177  */
14178 
14179 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)14180 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14181     xmlDocPtr ret;
14182     xmlParserCtxtPtr ctxt;
14183 
14184     ctxt = xmlCreateFileParserCtxt(filename);
14185     if (ctxt == NULL) {
14186 	return(NULL);
14187     }
14188     if (sax != NULL) {
14189 	if (ctxt->sax != NULL)
14190 	    xmlFree(ctxt->sax);
14191         ctxt->sax = sax;
14192         ctxt->userData = NULL;
14193     }
14194 
14195     xmlParseExtParsedEnt(ctxt);
14196 
14197     if (ctxt->wellFormed)
14198 	ret = ctxt->myDoc;
14199     else {
14200         ret = NULL;
14201         xmlFreeDoc(ctxt->myDoc);
14202         ctxt->myDoc = NULL;
14203     }
14204     if (sax != NULL)
14205         ctxt->sax = NULL;
14206     xmlFreeParserCtxt(ctxt);
14207 
14208     return(ret);
14209 }
14210 
14211 /**
14212  * xmlParseEntity:
14213  * @filename:  the filename
14214  *
14215  * parse an XML external entity out of context and build a tree.
14216  *
14217  * [78] extParsedEnt ::= TextDecl? content
14218  *
14219  * This correspond to a "Well Balanced" chunk
14220  *
14221  * Returns the resulting document tree
14222  */
14223 
14224 xmlDocPtr
xmlParseEntity(const char * filename)14225 xmlParseEntity(const char *filename) {
14226     return(xmlSAXParseEntity(NULL, filename));
14227 }
14228 #endif /* LIBXML_SAX1_ENABLED */
14229 
14230 /**
14231  * xmlCreateEntityParserCtxtInternal:
14232  * @URL:  the entity URL
14233  * @ID:  the entity PUBLIC ID
14234  * @base:  a possible base for the target URI
14235  * @pctx:  parser context used to set options on new context
14236  *
14237  * Create a parser context for an external entity
14238  * Automatic support for ZLIB/Compress compressed document is provided
14239  * by default if found at compile-time.
14240  *
14241  * Returns the new parser context or NULL
14242  */
14243 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)14244 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14245 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
14246     xmlParserCtxtPtr ctxt;
14247     xmlParserInputPtr inputStream;
14248     char *directory = NULL;
14249     xmlChar *uri;
14250 
14251     ctxt = xmlNewParserCtxt();
14252     if (ctxt == NULL) {
14253 	return(NULL);
14254     }
14255 
14256     if (pctx != NULL) {
14257         ctxt->options = pctx->options;
14258         ctxt->_private = pctx->_private;
14259     }
14260 
14261     uri = xmlBuildURI(URL, base);
14262 
14263     if (uri == NULL) {
14264 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14265 	if (inputStream == NULL) {
14266 	    xmlFreeParserCtxt(ctxt);
14267 	    return(NULL);
14268 	}
14269 
14270 	inputPush(ctxt, inputStream);
14271 
14272 	if ((ctxt->directory == NULL) && (directory == NULL))
14273 	    directory = xmlParserGetDirectory((char *)URL);
14274 	if ((ctxt->directory == NULL) && (directory != NULL))
14275 	    ctxt->directory = directory;
14276     } else {
14277 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14278 	if (inputStream == NULL) {
14279 	    xmlFree(uri);
14280 	    xmlFreeParserCtxt(ctxt);
14281 	    return(NULL);
14282 	}
14283 
14284 	inputPush(ctxt, inputStream);
14285 
14286 	if ((ctxt->directory == NULL) && (directory == NULL))
14287 	    directory = xmlParserGetDirectory((char *)uri);
14288 	if ((ctxt->directory == NULL) && (directory != NULL))
14289 	    ctxt->directory = directory;
14290 	xmlFree(uri);
14291     }
14292     return(ctxt);
14293 }
14294 
14295 /**
14296  * xmlCreateEntityParserCtxt:
14297  * @URL:  the entity URL
14298  * @ID:  the entity PUBLIC ID
14299  * @base:  a possible base for the target URI
14300  *
14301  * Create a parser context for an external entity
14302  * Automatic support for ZLIB/Compress compressed document is provided
14303  * by default if found at compile-time.
14304  *
14305  * Returns the new parser context or NULL
14306  */
14307 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14308 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14309 	                  const xmlChar *base) {
14310     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14311 
14312 }
14313 
14314 /************************************************************************
14315  *									*
14316  *		Front ends when parsing from a file			*
14317  *									*
14318  ************************************************************************/
14319 
14320 /**
14321  * xmlCreateURLParserCtxt:
14322  * @filename:  the filename or URL
14323  * @options:  a combination of xmlParserOption
14324  *
14325  * Create a parser context for a file or URL content.
14326  * Automatic support for ZLIB/Compress compressed document is provided
14327  * by default if found at compile-time and for file accesses
14328  *
14329  * Returns the new parser context or NULL
14330  */
14331 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14332 xmlCreateURLParserCtxt(const char *filename, int options)
14333 {
14334     xmlParserCtxtPtr ctxt;
14335     xmlParserInputPtr inputStream;
14336     char *directory = NULL;
14337 
14338     ctxt = xmlNewParserCtxt();
14339     if (ctxt == NULL) {
14340 	xmlErrMemory(NULL, "cannot allocate parser context");
14341 	return(NULL);
14342     }
14343 
14344     if (options)
14345 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14346     ctxt->linenumbers = 1;
14347 
14348     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14349     if (inputStream == NULL) {
14350 	xmlFreeParserCtxt(ctxt);
14351 	return(NULL);
14352     }
14353 
14354     inputPush(ctxt, inputStream);
14355     if ((ctxt->directory == NULL) && (directory == NULL))
14356         directory = xmlParserGetDirectory(filename);
14357     if ((ctxt->directory == NULL) && (directory != NULL))
14358         ctxt->directory = directory;
14359 
14360     return(ctxt);
14361 }
14362 
14363 /**
14364  * xmlCreateFileParserCtxt:
14365  * @filename:  the filename
14366  *
14367  * Create a parser context for a file content.
14368  * Automatic support for ZLIB/Compress compressed document is provided
14369  * by default if found at compile-time.
14370  *
14371  * Returns the new parser context or NULL
14372  */
14373 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14374 xmlCreateFileParserCtxt(const char *filename)
14375 {
14376     return(xmlCreateURLParserCtxt(filename, 0));
14377 }
14378 
14379 #ifdef LIBXML_SAX1_ENABLED
14380 /**
14381  * xmlSAXParseFileWithData:
14382  * @sax:  the SAX handler block
14383  * @filename:  the filename
14384  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14385  *             documents
14386  * @data:  the userdata
14387  *
14388  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14389  * compressed document is provided by default if found at compile-time.
14390  * It use the given SAX function block to handle the parsing callback.
14391  * If sax is NULL, fallback to the default DOM tree building routines.
14392  *
14393  * User data (void *) is stored within the parser context in the
14394  * context's _private member, so it is available nearly everywhere in libxml
14395  *
14396  * Returns the resulting document tree
14397  */
14398 
14399 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14400 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14401                         int recovery, void *data) {
14402     xmlDocPtr ret;
14403     xmlParserCtxtPtr ctxt;
14404 
14405     xmlInitParser();
14406 
14407     ctxt = xmlCreateFileParserCtxt(filename);
14408     if (ctxt == NULL) {
14409 	return(NULL);
14410     }
14411     if (sax != NULL) {
14412 	if (ctxt->sax != NULL)
14413 	    xmlFree(ctxt->sax);
14414         ctxt->sax = sax;
14415     }
14416     xmlDetectSAX2(ctxt);
14417     if (data!=NULL) {
14418 	ctxt->_private = data;
14419     }
14420 
14421     if (ctxt->directory == NULL)
14422         ctxt->directory = xmlParserGetDirectory(filename);
14423 
14424     ctxt->recovery = recovery;
14425 
14426     xmlParseDocument(ctxt);
14427 
14428     if ((ctxt->wellFormed) || recovery) {
14429         ret = ctxt->myDoc;
14430 	if (ret != NULL) {
14431 	    if (ctxt->input->buf->compressed > 0)
14432 		ret->compression = 9;
14433 	    else
14434 		ret->compression = ctxt->input->buf->compressed;
14435 	}
14436     }
14437     else {
14438        ret = NULL;
14439        xmlFreeDoc(ctxt->myDoc);
14440        ctxt->myDoc = NULL;
14441     }
14442     if (sax != NULL)
14443         ctxt->sax = NULL;
14444     xmlFreeParserCtxt(ctxt);
14445 
14446     return(ret);
14447 }
14448 
14449 /**
14450  * xmlSAXParseFile:
14451  * @sax:  the SAX handler block
14452  * @filename:  the filename
14453  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14454  *             documents
14455  *
14456  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14457  * compressed document is provided by default if found at compile-time.
14458  * It use the given SAX function block to handle the parsing callback.
14459  * If sax is NULL, fallback to the default DOM tree building routines.
14460  *
14461  * Returns the resulting document tree
14462  */
14463 
14464 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14465 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14466                           int recovery) {
14467     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14468 }
14469 
14470 /**
14471  * xmlRecoverDoc:
14472  * @cur:  a pointer to an array of xmlChar
14473  *
14474  * parse an XML in-memory document and build a tree.
14475  * In the case the document is not Well Formed, a attempt to build a
14476  * tree is tried anyway
14477  *
14478  * Returns the resulting document tree or NULL in case of failure
14479  */
14480 
14481 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14482 xmlRecoverDoc(const xmlChar *cur) {
14483     return(xmlSAXParseDoc(NULL, cur, 1));
14484 }
14485 
14486 /**
14487  * xmlParseFile:
14488  * @filename:  the filename
14489  *
14490  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14491  * compressed document is provided by default if found at compile-time.
14492  *
14493  * Returns the resulting document tree if the file was wellformed,
14494  * NULL otherwise.
14495  */
14496 
14497 xmlDocPtr
xmlParseFile(const char * filename)14498 xmlParseFile(const char *filename) {
14499     return(xmlSAXParseFile(NULL, filename, 0));
14500 }
14501 
14502 /**
14503  * xmlRecoverFile:
14504  * @filename:  the filename
14505  *
14506  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14507  * compressed document is provided by default if found at compile-time.
14508  * In the case the document is not Well Formed, it attempts to build
14509  * a tree anyway
14510  *
14511  * Returns the resulting document tree or NULL in case of failure
14512  */
14513 
14514 xmlDocPtr
xmlRecoverFile(const char * filename)14515 xmlRecoverFile(const char *filename) {
14516     return(xmlSAXParseFile(NULL, filename, 1));
14517 }
14518 
14519 
14520 /**
14521  * xmlSetupParserForBuffer:
14522  * @ctxt:  an XML parser context
14523  * @buffer:  a xmlChar * buffer
14524  * @filename:  a file name
14525  *
14526  * Setup the parser context to parse a new buffer; Clears any prior
14527  * contents from the parser context. The buffer parameter must not be
14528  * NULL, but the filename parameter can be
14529  */
14530 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14531 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14532                              const char* filename)
14533 {
14534     xmlParserInputPtr input;
14535 
14536     if ((ctxt == NULL) || (buffer == NULL))
14537         return;
14538 
14539     input = xmlNewInputStream(ctxt);
14540     if (input == NULL) {
14541         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14542         xmlClearParserCtxt(ctxt);
14543         return;
14544     }
14545 
14546     xmlClearParserCtxt(ctxt);
14547     if (filename != NULL)
14548         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14549     input->base = buffer;
14550     input->cur = buffer;
14551     input->end = &buffer[xmlStrlen(buffer)];
14552     inputPush(ctxt, input);
14553 }
14554 
14555 /**
14556  * xmlSAXUserParseFile:
14557  * @sax:  a SAX handler
14558  * @user_data:  The user data returned on SAX callbacks
14559  * @filename:  a file name
14560  *
14561  * parse an XML file and call the given SAX handler routines.
14562  * Automatic support for ZLIB/Compress compressed document is provided
14563  *
14564  * Returns 0 in case of success or a error number otherwise
14565  */
14566 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14567 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14568                     const char *filename) {
14569     int ret = 0;
14570     xmlParserCtxtPtr ctxt;
14571 
14572     ctxt = xmlCreateFileParserCtxt(filename);
14573     if (ctxt == NULL) return -1;
14574     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14575 	xmlFree(ctxt->sax);
14576     ctxt->sax = sax;
14577     xmlDetectSAX2(ctxt);
14578 
14579     if (user_data != NULL)
14580 	ctxt->userData = user_data;
14581 
14582     xmlParseDocument(ctxt);
14583 
14584     if (ctxt->wellFormed)
14585 	ret = 0;
14586     else {
14587         if (ctxt->errNo != 0)
14588 	    ret = ctxt->errNo;
14589 	else
14590 	    ret = -1;
14591     }
14592     if (sax != NULL)
14593 	ctxt->sax = NULL;
14594     if (ctxt->myDoc != NULL) {
14595         xmlFreeDoc(ctxt->myDoc);
14596 	ctxt->myDoc = NULL;
14597     }
14598     xmlFreeParserCtxt(ctxt);
14599 
14600     return ret;
14601 }
14602 #endif /* LIBXML_SAX1_ENABLED */
14603 
14604 /************************************************************************
14605  *									*
14606  *		Front ends when parsing from memory			*
14607  *									*
14608  ************************************************************************/
14609 
14610 /**
14611  * xmlCreateMemoryParserCtxt:
14612  * @buffer:  a pointer to a char array
14613  * @size:  the size of the array
14614  *
14615  * Create a parser context for an XML in-memory document.
14616  *
14617  * Returns the new parser context or NULL
14618  */
14619 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14620 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14621     xmlParserCtxtPtr ctxt;
14622     xmlParserInputPtr input;
14623     xmlParserInputBufferPtr buf;
14624 
14625     if (buffer == NULL)
14626 	return(NULL);
14627     if (size <= 0)
14628 	return(NULL);
14629 
14630     ctxt = xmlNewParserCtxt();
14631     if (ctxt == NULL)
14632 	return(NULL);
14633 
14634     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14635     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14636     if (buf == NULL) {
14637 	xmlFreeParserCtxt(ctxt);
14638 	return(NULL);
14639     }
14640 
14641     input = xmlNewInputStream(ctxt);
14642     if (input == NULL) {
14643 	xmlFreeParserInputBuffer(buf);
14644 	xmlFreeParserCtxt(ctxt);
14645 	return(NULL);
14646     }
14647 
14648     input->filename = NULL;
14649     input->buf = buf;
14650     xmlBufResetInput(input->buf->buffer, input);
14651 
14652     inputPush(ctxt, input);
14653     return(ctxt);
14654 }
14655 
14656 #ifdef LIBXML_SAX1_ENABLED
14657 /**
14658  * xmlSAXParseMemoryWithData:
14659  * @sax:  the SAX handler block
14660  * @buffer:  an pointer to a char array
14661  * @size:  the size of the array
14662  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14663  *             documents
14664  * @data:  the userdata
14665  *
14666  * parse an XML in-memory block and use the given SAX function block
14667  * to handle the parsing callback. If sax is NULL, fallback to the default
14668  * DOM tree building routines.
14669  *
14670  * User data (void *) is stored within the parser context in the
14671  * context's _private member, so it is available nearly everywhere in libxml
14672  *
14673  * Returns the resulting document tree
14674  */
14675 
14676 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14677 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14678 	          int size, int recovery, void *data) {
14679     xmlDocPtr ret;
14680     xmlParserCtxtPtr ctxt;
14681 
14682     xmlInitParser();
14683 
14684     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14685     if (ctxt == NULL) return(NULL);
14686     if (sax != NULL) {
14687 	if (ctxt->sax != NULL)
14688 	    xmlFree(ctxt->sax);
14689         ctxt->sax = sax;
14690     }
14691     xmlDetectSAX2(ctxt);
14692     if (data!=NULL) {
14693 	ctxt->_private=data;
14694     }
14695 
14696     ctxt->recovery = recovery;
14697 
14698     xmlParseDocument(ctxt);
14699 
14700     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14701     else {
14702        ret = NULL;
14703        xmlFreeDoc(ctxt->myDoc);
14704        ctxt->myDoc = NULL;
14705     }
14706     if (sax != NULL)
14707 	ctxt->sax = NULL;
14708     xmlFreeParserCtxt(ctxt);
14709 
14710     return(ret);
14711 }
14712 
14713 /**
14714  * xmlSAXParseMemory:
14715  * @sax:  the SAX handler block
14716  * @buffer:  an pointer to a char array
14717  * @size:  the size of the array
14718  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14719  *             documents
14720  *
14721  * parse an XML in-memory block and use the given SAX function block
14722  * to handle the parsing callback. If sax is NULL, fallback to the default
14723  * DOM tree building routines.
14724  *
14725  * Returns the resulting document tree
14726  */
14727 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14728 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14729 	          int size, int recovery) {
14730     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14731 }
14732 
14733 /**
14734  * xmlParseMemory:
14735  * @buffer:  an pointer to a char array
14736  * @size:  the size of the array
14737  *
14738  * parse an XML in-memory block and build a tree.
14739  *
14740  * Returns the resulting document tree
14741  */
14742 
xmlParseMemory(const char * buffer,int size)14743 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14744    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14745 }
14746 
14747 /**
14748  * xmlRecoverMemory:
14749  * @buffer:  an pointer to a char array
14750  * @size:  the size of the array
14751  *
14752  * parse an XML in-memory block and build a tree.
14753  * In the case the document is not Well Formed, an attempt to
14754  * build a tree is tried anyway
14755  *
14756  * Returns the resulting document tree or NULL in case of error
14757  */
14758 
xmlRecoverMemory(const char * buffer,int size)14759 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14760    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14761 }
14762 
14763 /**
14764  * xmlSAXUserParseMemory:
14765  * @sax:  a SAX handler
14766  * @user_data:  The user data returned on SAX callbacks
14767  * @buffer:  an in-memory XML document input
14768  * @size:  the length of the XML document in bytes
14769  *
14770  * A better SAX parsing routine.
14771  * parse an XML in-memory buffer and call the given SAX handler routines.
14772  *
14773  * Returns 0 in case of success or a error number otherwise
14774  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14775 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14776 			  const char *buffer, int size) {
14777     int ret = 0;
14778     xmlParserCtxtPtr ctxt;
14779 
14780     xmlInitParser();
14781 
14782     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14783     if (ctxt == NULL) return -1;
14784     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14785         xmlFree(ctxt->sax);
14786     ctxt->sax = sax;
14787     xmlDetectSAX2(ctxt);
14788 
14789     if (user_data != NULL)
14790 	ctxt->userData = user_data;
14791 
14792     xmlParseDocument(ctxt);
14793 
14794     if (ctxt->wellFormed)
14795 	ret = 0;
14796     else {
14797         if (ctxt->errNo != 0)
14798 	    ret = ctxt->errNo;
14799 	else
14800 	    ret = -1;
14801     }
14802     if (sax != NULL)
14803         ctxt->sax = NULL;
14804     if (ctxt->myDoc != NULL) {
14805         xmlFreeDoc(ctxt->myDoc);
14806 	ctxt->myDoc = NULL;
14807     }
14808     xmlFreeParserCtxt(ctxt);
14809 
14810     return ret;
14811 }
14812 #endif /* LIBXML_SAX1_ENABLED */
14813 
14814 /**
14815  * xmlCreateDocParserCtxt:
14816  * @cur:  a pointer to an array of xmlChar
14817  *
14818  * Creates a parser context for an XML in-memory document.
14819  *
14820  * Returns the new parser context or NULL
14821  */
14822 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14823 xmlCreateDocParserCtxt(const xmlChar *cur) {
14824     int len;
14825 
14826     if (cur == NULL)
14827 	return(NULL);
14828     len = xmlStrlen(cur);
14829     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14830 }
14831 
14832 #ifdef LIBXML_SAX1_ENABLED
14833 /**
14834  * xmlSAXParseDoc:
14835  * @sax:  the SAX handler block
14836  * @cur:  a pointer to an array of xmlChar
14837  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14838  *             documents
14839  *
14840  * parse an XML in-memory document and build a tree.
14841  * It use the given SAX function block to handle the parsing callback.
14842  * If sax is NULL, fallback to the default DOM tree building routines.
14843  *
14844  * Returns the resulting document tree
14845  */
14846 
14847 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14848 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14849     xmlDocPtr ret;
14850     xmlParserCtxtPtr ctxt;
14851     xmlSAXHandlerPtr oldsax = NULL;
14852 
14853     if (cur == NULL) return(NULL);
14854 
14855 
14856     ctxt = xmlCreateDocParserCtxt(cur);
14857     if (ctxt == NULL) return(NULL);
14858     if (sax != NULL) {
14859         oldsax = ctxt->sax;
14860         ctxt->sax = sax;
14861         ctxt->userData = NULL;
14862     }
14863     xmlDetectSAX2(ctxt);
14864 
14865     xmlParseDocument(ctxt);
14866     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14867     else {
14868        ret = NULL;
14869        xmlFreeDoc(ctxt->myDoc);
14870        ctxt->myDoc = NULL;
14871     }
14872     if (sax != NULL)
14873 	ctxt->sax = oldsax;
14874     xmlFreeParserCtxt(ctxt);
14875 
14876     return(ret);
14877 }
14878 
14879 /**
14880  * xmlParseDoc:
14881  * @cur:  a pointer to an array of xmlChar
14882  *
14883  * parse an XML in-memory document and build a tree.
14884  *
14885  * Returns the resulting document tree
14886  */
14887 
14888 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14889 xmlParseDoc(const xmlChar *cur) {
14890     return(xmlSAXParseDoc(NULL, cur, 0));
14891 }
14892 #endif /* LIBXML_SAX1_ENABLED */
14893 
14894 #ifdef LIBXML_LEGACY_ENABLED
14895 /************************************************************************
14896  *									*
14897  *	Specific function to keep track of entities references		*
14898  *	and used by the XSLT debugger					*
14899  *									*
14900  ************************************************************************/
14901 
14902 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14903 
14904 /**
14905  * xmlAddEntityReference:
14906  * @ent : A valid entity
14907  * @firstNode : A valid first node for children of entity
14908  * @lastNode : A valid last node of children entity
14909  *
14910  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14911  */
14912 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14913 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14914                       xmlNodePtr lastNode)
14915 {
14916     if (xmlEntityRefFunc != NULL) {
14917         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14918     }
14919 }
14920 
14921 
14922 /**
14923  * xmlSetEntityReferenceFunc:
14924  * @func: A valid function
14925  *
14926  * Set the function to call call back when a xml reference has been made
14927  */
14928 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14929 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14930 {
14931     xmlEntityRefFunc = func;
14932 }
14933 #endif /* LIBXML_LEGACY_ENABLED */
14934 
14935 /************************************************************************
14936  *									*
14937  *				Miscellaneous				*
14938  *									*
14939  ************************************************************************/
14940 
14941 #ifdef LIBXML_XPATH_ENABLED
14942 #include <libxml/xpath.h>
14943 #endif
14944 
14945 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14946 static int xmlParserInitialized = 0;
14947 
14948 /**
14949  * xmlInitParser:
14950  *
14951  * Initialization function for the XML parser.
14952  * This is not reentrant. Call once before processing in case of
14953  * use in multithreaded programs.
14954  */
14955 
14956 void
xmlInitParser(void)14957 xmlInitParser(void) {
14958     if (xmlParserInitialized != 0)
14959 	return;
14960 
14961 #ifdef LIBXML_THREAD_ENABLED
14962     __xmlGlobalInitMutexLock();
14963     if (xmlParserInitialized == 0) {
14964 #endif
14965 	xmlInitThreads();
14966 	xmlInitGlobals();
14967 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14968 	    (xmlGenericError == NULL))
14969 	    initGenericErrorDefaultFunc(NULL);
14970 	xmlInitMemory();
14971         xmlInitializeDict();
14972 	xmlInitCharEncodingHandlers();
14973 	xmlDefaultSAXHandlerInit();
14974 	xmlRegisterDefaultInputCallbacks();
14975 #ifdef LIBXML_OUTPUT_ENABLED
14976 	xmlRegisterDefaultOutputCallbacks();
14977 #endif /* LIBXML_OUTPUT_ENABLED */
14978 #ifdef LIBXML_HTML_ENABLED
14979 	htmlInitAutoClose();
14980 	htmlDefaultSAXHandlerInit();
14981 #endif
14982 #ifdef LIBXML_XPATH_ENABLED
14983 	xmlXPathInit();
14984 #endif
14985 	xmlParserInitialized = 1;
14986 #ifdef LIBXML_THREAD_ENABLED
14987     }
14988     __xmlGlobalInitMutexUnlock();
14989 #endif
14990 }
14991 
14992 /**
14993  * xmlCleanupParser:
14994  *
14995  * This function name is somewhat misleading. It does not clean up
14996  * parser state, it cleans up memory allocated by the library itself.
14997  * It is a cleanup function for the XML library. It tries to reclaim all
14998  * related global memory allocated for the library processing.
14999  * It doesn't deallocate any document related memory. One should
15000  * call xmlCleanupParser() only when the process has finished using
15001  * the library and all XML/HTML documents built with it.
15002  * See also xmlInitParser() which has the opposite function of preparing
15003  * the library for operations.
15004  *
15005  * WARNING: if your application is multithreaded or has plugin support
15006  *          calling this may crash the application if another thread or
15007  *          a plugin is still using libxml2. It's sometimes very hard to
15008  *          guess if libxml2 is in use in the application, some libraries
15009  *          or plugins may use it without notice. In case of doubt abstain
15010  *          from calling this function or do it just before calling exit()
15011  *          to avoid leak reports from valgrind !
15012  */
15013 
15014 void
xmlCleanupParser(void)15015 xmlCleanupParser(void) {
15016     if (!xmlParserInitialized)
15017 	return;
15018 
15019     xmlCleanupCharEncodingHandlers();
15020 #ifdef LIBXML_CATALOG_ENABLED
15021     xmlCatalogCleanup();
15022 #endif
15023     xmlDictCleanup();
15024     xmlCleanupInputCallbacks();
15025 #ifdef LIBXML_OUTPUT_ENABLED
15026     xmlCleanupOutputCallbacks();
15027 #endif
15028 #ifdef LIBXML_SCHEMAS_ENABLED
15029     xmlSchemaCleanupTypes();
15030     xmlRelaxNGCleanupTypes();
15031 #endif
15032     xmlResetLastError();
15033     xmlCleanupGlobals();
15034     xmlCleanupThreads(); /* must be last if called not from the main thread */
15035     xmlCleanupMemory();
15036     xmlParserInitialized = 0;
15037 }
15038 
15039 /************************************************************************
15040  *									*
15041  *	New set (2.6.0) of simpler and more flexible APIs		*
15042  *									*
15043  ************************************************************************/
15044 
15045 /**
15046  * DICT_FREE:
15047  * @str:  a string
15048  *
15049  * Free a string if it is not owned by the "dict" dictionary in the
15050  * current scope
15051  */
15052 #define DICT_FREE(str)						\
15053 	if ((str) && ((!dict) ||				\
15054 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
15055 	    xmlFree((char *)(str));
15056 
15057 /**
15058  * xmlCtxtReset:
15059  * @ctxt: an XML parser context
15060  *
15061  * Reset a parser context
15062  */
15063 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)15064 xmlCtxtReset(xmlParserCtxtPtr ctxt)
15065 {
15066     xmlParserInputPtr input;
15067     xmlDictPtr dict;
15068 
15069     if (ctxt == NULL)
15070         return;
15071 
15072     dict = ctxt->dict;
15073 
15074     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15075         xmlFreeInputStream(input);
15076     }
15077     ctxt->inputNr = 0;
15078     ctxt->input = NULL;
15079 
15080     ctxt->spaceNr = 0;
15081     if (ctxt->spaceTab != NULL) {
15082 	ctxt->spaceTab[0] = -1;
15083 	ctxt->space = &ctxt->spaceTab[0];
15084     } else {
15085         ctxt->space = NULL;
15086     }
15087 
15088 
15089     ctxt->nodeNr = 0;
15090     ctxt->node = NULL;
15091 
15092     ctxt->nameNr = 0;
15093     ctxt->name = NULL;
15094 
15095     DICT_FREE(ctxt->version);
15096     ctxt->version = NULL;
15097     DICT_FREE(ctxt->encoding);
15098     ctxt->encoding = NULL;
15099     DICT_FREE(ctxt->directory);
15100     ctxt->directory = NULL;
15101     DICT_FREE(ctxt->extSubURI);
15102     ctxt->extSubURI = NULL;
15103     DICT_FREE(ctxt->extSubSystem);
15104     ctxt->extSubSystem = NULL;
15105     if (ctxt->myDoc != NULL)
15106         xmlFreeDoc(ctxt->myDoc);
15107     ctxt->myDoc = NULL;
15108 
15109     ctxt->standalone = -1;
15110     ctxt->hasExternalSubset = 0;
15111     ctxt->hasPErefs = 0;
15112     ctxt->html = 0;
15113     ctxt->external = 0;
15114     ctxt->instate = XML_PARSER_START;
15115     ctxt->token = 0;
15116 
15117     ctxt->wellFormed = 1;
15118     ctxt->nsWellFormed = 1;
15119     ctxt->disableSAX = 0;
15120     ctxt->valid = 1;
15121 #if 0
15122     ctxt->vctxt.userData = ctxt;
15123     ctxt->vctxt.error = xmlParserValidityError;
15124     ctxt->vctxt.warning = xmlParserValidityWarning;
15125 #endif
15126     ctxt->record_info = 0;
15127     ctxt->nbChars = 0;
15128     ctxt->checkIndex = 0;
15129     ctxt->inSubset = 0;
15130     ctxt->errNo = XML_ERR_OK;
15131     ctxt->depth = 0;
15132     ctxt->charset = XML_CHAR_ENCODING_UTF8;
15133     ctxt->catalogs = NULL;
15134     ctxt->nbentities = 0;
15135     ctxt->sizeentities = 0;
15136     ctxt->sizeentcopy = 0;
15137     xmlInitNodeInfoSeq(&ctxt->node_seq);
15138 
15139     if (ctxt->attsDefault != NULL) {
15140         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15141         ctxt->attsDefault = NULL;
15142     }
15143     if (ctxt->attsSpecial != NULL) {
15144         xmlHashFree(ctxt->attsSpecial, NULL);
15145         ctxt->attsSpecial = NULL;
15146     }
15147 
15148 #ifdef LIBXML_CATALOG_ENABLED
15149     if (ctxt->catalogs != NULL)
15150 	xmlCatalogFreeLocal(ctxt->catalogs);
15151 #endif
15152     if (ctxt->lastError.code != XML_ERR_OK)
15153         xmlResetError(&ctxt->lastError);
15154 }
15155 
15156 /**
15157  * xmlCtxtResetPush:
15158  * @ctxt: an XML parser context
15159  * @chunk:  a pointer to an array of chars
15160  * @size:  number of chars in the array
15161  * @filename:  an optional file name or URI
15162  * @encoding:  the document encoding, or NULL
15163  *
15164  * Reset a push parser context
15165  *
15166  * Returns 0 in case of success and 1 in case of error
15167  */
15168 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)15169 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15170                  int size, const char *filename, const char *encoding)
15171 {
15172     xmlParserInputPtr inputStream;
15173     xmlParserInputBufferPtr buf;
15174     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15175 
15176     if (ctxt == NULL)
15177         return(1);
15178 
15179     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15180         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15181 
15182     buf = xmlAllocParserInputBuffer(enc);
15183     if (buf == NULL)
15184         return(1);
15185 
15186     if (ctxt == NULL) {
15187         xmlFreeParserInputBuffer(buf);
15188         return(1);
15189     }
15190 
15191     xmlCtxtReset(ctxt);
15192 
15193     if (ctxt->pushTab == NULL) {
15194         ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15195 	                                    sizeof(xmlChar *));
15196         if (ctxt->pushTab == NULL) {
15197 	    xmlErrMemory(ctxt, NULL);
15198             xmlFreeParserInputBuffer(buf);
15199             return(1);
15200         }
15201     }
15202 
15203     if (filename == NULL) {
15204         ctxt->directory = NULL;
15205     } else {
15206         ctxt->directory = xmlParserGetDirectory(filename);
15207     }
15208 
15209     inputStream = xmlNewInputStream(ctxt);
15210     if (inputStream == NULL) {
15211         xmlFreeParserInputBuffer(buf);
15212         return(1);
15213     }
15214 
15215     if (filename == NULL)
15216         inputStream->filename = NULL;
15217     else
15218         inputStream->filename = (char *)
15219             xmlCanonicPath((const xmlChar *) filename);
15220     inputStream->buf = buf;
15221     xmlBufResetInput(buf->buffer, inputStream);
15222 
15223     inputPush(ctxt, inputStream);
15224 
15225     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15226         (ctxt->input->buf != NULL)) {
15227 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15228         size_t cur = ctxt->input->cur - ctxt->input->base;
15229 
15230         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15231 
15232         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15233 #ifdef DEBUG_PUSH
15234         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15235 #endif
15236     }
15237 
15238     if (encoding != NULL) {
15239         xmlCharEncodingHandlerPtr hdlr;
15240 
15241         if (ctxt->encoding != NULL)
15242 	    xmlFree((xmlChar *) ctxt->encoding);
15243         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15244 
15245         hdlr = xmlFindCharEncodingHandler(encoding);
15246         if (hdlr != NULL) {
15247             xmlSwitchToEncoding(ctxt, hdlr);
15248 	} else {
15249 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15250 			      "Unsupported encoding %s\n", BAD_CAST encoding);
15251         }
15252     } else if (enc != XML_CHAR_ENCODING_NONE) {
15253         xmlSwitchEncoding(ctxt, enc);
15254     }
15255 
15256     return(0);
15257 }
15258 
15259 
15260 /**
15261  * xmlCtxtUseOptionsInternal:
15262  * @ctxt: an XML parser context
15263  * @options:  a combination of xmlParserOption
15264  * @encoding:  the user provided encoding to use
15265  *
15266  * Applies the options to the parser context
15267  *
15268  * Returns 0 in case of success, the set of unknown or unimplemented options
15269  *         in case of error.
15270  */
15271 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15272 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15273 {
15274     if (ctxt == NULL)
15275         return(-1);
15276     if (encoding != NULL) {
15277         if (ctxt->encoding != NULL)
15278 	    xmlFree((xmlChar *) ctxt->encoding);
15279         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15280     }
15281     if (options & XML_PARSE_RECOVER) {
15282         ctxt->recovery = 1;
15283         options -= XML_PARSE_RECOVER;
15284 	ctxt->options |= XML_PARSE_RECOVER;
15285     } else
15286         ctxt->recovery = 0;
15287     if (options & XML_PARSE_DTDLOAD) {
15288         ctxt->loadsubset = XML_DETECT_IDS;
15289         options -= XML_PARSE_DTDLOAD;
15290 	ctxt->options |= XML_PARSE_DTDLOAD;
15291     } else
15292         ctxt->loadsubset = 0;
15293     if (options & XML_PARSE_DTDATTR) {
15294         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15295         options -= XML_PARSE_DTDATTR;
15296 	ctxt->options |= XML_PARSE_DTDATTR;
15297     }
15298     if (options & XML_PARSE_NOENT) {
15299         ctxt->replaceEntities = 1;
15300         /* ctxt->loadsubset |= XML_DETECT_IDS; */
15301         options -= XML_PARSE_NOENT;
15302 	ctxt->options |= XML_PARSE_NOENT;
15303     } else
15304         ctxt->replaceEntities = 0;
15305     if (options & XML_PARSE_PEDANTIC) {
15306         ctxt->pedantic = 1;
15307         options -= XML_PARSE_PEDANTIC;
15308 	ctxt->options |= XML_PARSE_PEDANTIC;
15309     } else
15310         ctxt->pedantic = 0;
15311     if (options & XML_PARSE_NOBLANKS) {
15312         ctxt->keepBlanks = 0;
15313         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15314         options -= XML_PARSE_NOBLANKS;
15315 	ctxt->options |= XML_PARSE_NOBLANKS;
15316     } else
15317         ctxt->keepBlanks = 1;
15318     if (options & XML_PARSE_DTDVALID) {
15319         ctxt->validate = 1;
15320         if (options & XML_PARSE_NOWARNING)
15321             ctxt->vctxt.warning = NULL;
15322         if (options & XML_PARSE_NOERROR)
15323             ctxt->vctxt.error = NULL;
15324         options -= XML_PARSE_DTDVALID;
15325 	ctxt->options |= XML_PARSE_DTDVALID;
15326     } else
15327         ctxt->validate = 0;
15328     if (options & XML_PARSE_NOWARNING) {
15329         ctxt->sax->warning = NULL;
15330         options -= XML_PARSE_NOWARNING;
15331     }
15332     if (options & XML_PARSE_NOERROR) {
15333         ctxt->sax->error = NULL;
15334         ctxt->sax->fatalError = NULL;
15335         options -= XML_PARSE_NOERROR;
15336     }
15337 #ifdef LIBXML_SAX1_ENABLED
15338     if (options & XML_PARSE_SAX1) {
15339         ctxt->sax->startElement = xmlSAX2StartElement;
15340         ctxt->sax->endElement = xmlSAX2EndElement;
15341         ctxt->sax->startElementNs = NULL;
15342         ctxt->sax->endElementNs = NULL;
15343         ctxt->sax->initialized = 1;
15344         options -= XML_PARSE_SAX1;
15345 	ctxt->options |= XML_PARSE_SAX1;
15346     }
15347 #endif /* LIBXML_SAX1_ENABLED */
15348     if (options & XML_PARSE_NODICT) {
15349         ctxt->dictNames = 0;
15350         options -= XML_PARSE_NODICT;
15351 	ctxt->options |= XML_PARSE_NODICT;
15352     } else {
15353         ctxt->dictNames = 1;
15354     }
15355     if (options & XML_PARSE_NOCDATA) {
15356         ctxt->sax->cdataBlock = NULL;
15357         options -= XML_PARSE_NOCDATA;
15358 	ctxt->options |= XML_PARSE_NOCDATA;
15359     }
15360     if (options & XML_PARSE_NSCLEAN) {
15361 	ctxt->options |= XML_PARSE_NSCLEAN;
15362         options -= XML_PARSE_NSCLEAN;
15363     }
15364     if (options & XML_PARSE_NONET) {
15365 	ctxt->options |= XML_PARSE_NONET;
15366         options -= XML_PARSE_NONET;
15367     }
15368     if (options & XML_PARSE_COMPACT) {
15369 	ctxt->options |= XML_PARSE_COMPACT;
15370         options -= XML_PARSE_COMPACT;
15371     }
15372     if (options & XML_PARSE_OLD10) {
15373 	ctxt->options |= XML_PARSE_OLD10;
15374         options -= XML_PARSE_OLD10;
15375     }
15376     if (options & XML_PARSE_NOBASEFIX) {
15377 	ctxt->options |= XML_PARSE_NOBASEFIX;
15378         options -= XML_PARSE_NOBASEFIX;
15379     }
15380     if (options & XML_PARSE_HUGE) {
15381 	ctxt->options |= XML_PARSE_HUGE;
15382         options -= XML_PARSE_HUGE;
15383         if (ctxt->dict != NULL)
15384             xmlDictSetLimit(ctxt->dict, 0);
15385     }
15386     if (options & XML_PARSE_OLDSAX) {
15387 	ctxt->options |= XML_PARSE_OLDSAX;
15388         options -= XML_PARSE_OLDSAX;
15389     }
15390     if (options & XML_PARSE_IGNORE_ENC) {
15391 	ctxt->options |= XML_PARSE_IGNORE_ENC;
15392         options -= XML_PARSE_IGNORE_ENC;
15393     }
15394     if (options & XML_PARSE_BIG_LINES) {
15395 	ctxt->options |= XML_PARSE_BIG_LINES;
15396         options -= XML_PARSE_BIG_LINES;
15397     }
15398     ctxt->linenumbers = 1;
15399     return (options);
15400 }
15401 
15402 /**
15403  * xmlCtxtUseOptions:
15404  * @ctxt: an XML parser context
15405  * @options:  a combination of xmlParserOption
15406  *
15407  * Applies the options to the parser context
15408  *
15409  * Returns 0 in case of success, the set of unknown or unimplemented options
15410  *         in case of error.
15411  */
15412 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15413 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15414 {
15415    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15416 }
15417 
15418 /**
15419  * xmlDoRead:
15420  * @ctxt:  an XML parser context
15421  * @URL:  the base URL to use for the document
15422  * @encoding:  the document encoding, or NULL
15423  * @options:  a combination of xmlParserOption
15424  * @reuse:  keep the context for reuse
15425  *
15426  * Common front-end for the xmlRead functions
15427  *
15428  * Returns the resulting document tree or NULL
15429  */
15430 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15431 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15432           int options, int reuse)
15433 {
15434     xmlDocPtr ret;
15435 
15436     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15437     if (encoding != NULL) {
15438         xmlCharEncodingHandlerPtr hdlr;
15439 
15440 	hdlr = xmlFindCharEncodingHandler(encoding);
15441 	if (hdlr != NULL)
15442 	    xmlSwitchToEncoding(ctxt, hdlr);
15443     }
15444     if ((URL != NULL) && (ctxt->input != NULL) &&
15445         (ctxt->input->filename == NULL))
15446         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15447     xmlParseDocument(ctxt);
15448     if ((ctxt->wellFormed) || ctxt->recovery)
15449         ret = ctxt->myDoc;
15450     else {
15451         ret = NULL;
15452 	if (ctxt->myDoc != NULL) {
15453 	    xmlFreeDoc(ctxt->myDoc);
15454 	}
15455     }
15456     ctxt->myDoc = NULL;
15457     if (!reuse) {
15458 	xmlFreeParserCtxt(ctxt);
15459     }
15460 
15461     return (ret);
15462 }
15463 
15464 /**
15465  * xmlReadDoc:
15466  * @cur:  a pointer to a zero terminated string
15467  * @URL:  the base URL to use for the document
15468  * @encoding:  the document encoding, or NULL
15469  * @options:  a combination of xmlParserOption
15470  *
15471  * parse an XML in-memory document and build a tree.
15472  *
15473  * Returns the resulting document tree
15474  */
15475 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15476 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15477 {
15478     xmlParserCtxtPtr ctxt;
15479 
15480     if (cur == NULL)
15481         return (NULL);
15482     xmlInitParser();
15483 
15484     ctxt = xmlCreateDocParserCtxt(cur);
15485     if (ctxt == NULL)
15486         return (NULL);
15487     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15488 }
15489 
15490 /**
15491  * xmlReadFile:
15492  * @filename:  a file or URL
15493  * @encoding:  the document encoding, or NULL
15494  * @options:  a combination of xmlParserOption
15495  *
15496  * parse an XML file from the filesystem or the network.
15497  *
15498  * Returns the resulting document tree
15499  */
15500 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15501 xmlReadFile(const char *filename, const char *encoding, int options)
15502 {
15503     xmlParserCtxtPtr ctxt;
15504 
15505     xmlInitParser();
15506     ctxt = xmlCreateURLParserCtxt(filename, options);
15507     if (ctxt == NULL)
15508         return (NULL);
15509     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15510 }
15511 
15512 /**
15513  * xmlReadMemory:
15514  * @buffer:  a pointer to a char array
15515  * @size:  the size of the array
15516  * @URL:  the base URL to use for the document
15517  * @encoding:  the document encoding, or NULL
15518  * @options:  a combination of xmlParserOption
15519  *
15520  * parse an XML in-memory document and build a tree.
15521  *
15522  * Returns the resulting document tree
15523  */
15524 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15525 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15526 {
15527     xmlParserCtxtPtr ctxt;
15528 
15529     xmlInitParser();
15530     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15531     if (ctxt == NULL)
15532         return (NULL);
15533     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15534 }
15535 
15536 /**
15537  * xmlReadFd:
15538  * @fd:  an open file descriptor
15539  * @URL:  the base URL to use for the document
15540  * @encoding:  the document encoding, or NULL
15541  * @options:  a combination of xmlParserOption
15542  *
15543  * parse an XML from a file descriptor and build a tree.
15544  * NOTE that the file descriptor will not be closed when the
15545  *      reader is closed or reset.
15546  *
15547  * Returns the resulting document tree
15548  */
15549 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15550 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15551 {
15552     xmlParserCtxtPtr ctxt;
15553     xmlParserInputBufferPtr input;
15554     xmlParserInputPtr stream;
15555 
15556     if (fd < 0)
15557         return (NULL);
15558     xmlInitParser();
15559 
15560     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15561     if (input == NULL)
15562         return (NULL);
15563     input->closecallback = NULL;
15564     ctxt = xmlNewParserCtxt();
15565     if (ctxt == NULL) {
15566         xmlFreeParserInputBuffer(input);
15567         return (NULL);
15568     }
15569     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15570     if (stream == NULL) {
15571         xmlFreeParserInputBuffer(input);
15572 	xmlFreeParserCtxt(ctxt);
15573         return (NULL);
15574     }
15575     inputPush(ctxt, stream);
15576     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15577 }
15578 
15579 /**
15580  * xmlReadIO:
15581  * @ioread:  an I/O read function
15582  * @ioclose:  an I/O close function
15583  * @ioctx:  an I/O handler
15584  * @URL:  the base URL to use for the document
15585  * @encoding:  the document encoding, or NULL
15586  * @options:  a combination of xmlParserOption
15587  *
15588  * parse an XML document from I/O functions and source and build a tree.
15589  *
15590  * Returns the resulting document tree
15591  */
15592 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15593 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15594           void *ioctx, const char *URL, const char *encoding, int options)
15595 {
15596     xmlParserCtxtPtr ctxt;
15597     xmlParserInputBufferPtr input;
15598     xmlParserInputPtr stream;
15599 
15600     if (ioread == NULL)
15601         return (NULL);
15602     xmlInitParser();
15603 
15604     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15605                                          XML_CHAR_ENCODING_NONE);
15606     if (input == NULL) {
15607         if (ioclose != NULL)
15608             ioclose(ioctx);
15609         return (NULL);
15610     }
15611     ctxt = xmlNewParserCtxt();
15612     if (ctxt == NULL) {
15613         xmlFreeParserInputBuffer(input);
15614         return (NULL);
15615     }
15616     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15617     if (stream == NULL) {
15618         xmlFreeParserInputBuffer(input);
15619 	xmlFreeParserCtxt(ctxt);
15620         return (NULL);
15621     }
15622     inputPush(ctxt, stream);
15623     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15624 }
15625 
15626 /**
15627  * xmlCtxtReadDoc:
15628  * @ctxt:  an XML parser context
15629  * @cur:  a pointer to a zero terminated string
15630  * @URL:  the base URL to use for the document
15631  * @encoding:  the document encoding, or NULL
15632  * @options:  a combination of xmlParserOption
15633  *
15634  * parse an XML in-memory document and build a tree.
15635  * This reuses the existing @ctxt parser context
15636  *
15637  * Returns the resulting document tree
15638  */
15639 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15640 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15641                const char *URL, const char *encoding, int options)
15642 {
15643     xmlParserInputPtr stream;
15644 
15645     if (cur == NULL)
15646         return (NULL);
15647     if (ctxt == NULL)
15648         return (NULL);
15649     xmlInitParser();
15650 
15651     xmlCtxtReset(ctxt);
15652 
15653     stream = xmlNewStringInputStream(ctxt, cur);
15654     if (stream == NULL) {
15655         return (NULL);
15656     }
15657     inputPush(ctxt, stream);
15658     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15659 }
15660 
15661 /**
15662  * xmlCtxtReadFile:
15663  * @ctxt:  an XML parser context
15664  * @filename:  a file or URL
15665  * @encoding:  the document encoding, or NULL
15666  * @options:  a combination of xmlParserOption
15667  *
15668  * parse an XML file from the filesystem or the network.
15669  * This reuses the existing @ctxt parser context
15670  *
15671  * Returns the resulting document tree
15672  */
15673 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15674 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15675                 const char *encoding, int options)
15676 {
15677     xmlParserInputPtr stream;
15678 
15679     if (filename == NULL)
15680         return (NULL);
15681     if (ctxt == NULL)
15682         return (NULL);
15683     xmlInitParser();
15684 
15685     xmlCtxtReset(ctxt);
15686 
15687     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15688     if (stream == NULL) {
15689         return (NULL);
15690     }
15691     inputPush(ctxt, stream);
15692     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15693 }
15694 
15695 /**
15696  * xmlCtxtReadMemory:
15697  * @ctxt:  an XML parser context
15698  * @buffer:  a pointer to a char array
15699  * @size:  the size of the array
15700  * @URL:  the base URL to use for the document
15701  * @encoding:  the document encoding, or NULL
15702  * @options:  a combination of xmlParserOption
15703  *
15704  * parse an XML in-memory document and build a tree.
15705  * This reuses the existing @ctxt parser context
15706  *
15707  * Returns the resulting document tree
15708  */
15709 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15710 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15711                   const char *URL, const char *encoding, int options)
15712 {
15713     xmlParserInputBufferPtr input;
15714     xmlParserInputPtr stream;
15715 
15716     if (ctxt == NULL)
15717         return (NULL);
15718     if (buffer == NULL)
15719         return (NULL);
15720     xmlInitParser();
15721 
15722     xmlCtxtReset(ctxt);
15723 
15724     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15725     if (input == NULL) {
15726 	return(NULL);
15727     }
15728 
15729     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15730     if (stream == NULL) {
15731 	xmlFreeParserInputBuffer(input);
15732 	return(NULL);
15733     }
15734 
15735     inputPush(ctxt, stream);
15736     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15737 }
15738 
15739 /**
15740  * xmlCtxtReadFd:
15741  * @ctxt:  an XML parser context
15742  * @fd:  an open file descriptor
15743  * @URL:  the base URL to use for the document
15744  * @encoding:  the document encoding, or NULL
15745  * @options:  a combination of xmlParserOption
15746  *
15747  * parse an XML from a file descriptor and build a tree.
15748  * This reuses the existing @ctxt parser context
15749  * NOTE that the file descriptor will not be closed when the
15750  *      reader is closed or reset.
15751  *
15752  * Returns the resulting document tree
15753  */
15754 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15755 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15756               const char *URL, const char *encoding, int options)
15757 {
15758     xmlParserInputBufferPtr input;
15759     xmlParserInputPtr stream;
15760 
15761     if (fd < 0)
15762         return (NULL);
15763     if (ctxt == NULL)
15764         return (NULL);
15765     xmlInitParser();
15766 
15767     xmlCtxtReset(ctxt);
15768 
15769 
15770     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15771     if (input == NULL)
15772         return (NULL);
15773     input->closecallback = NULL;
15774     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15775     if (stream == NULL) {
15776         xmlFreeParserInputBuffer(input);
15777         return (NULL);
15778     }
15779     inputPush(ctxt, stream);
15780     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15781 }
15782 
15783 /**
15784  * xmlCtxtReadIO:
15785  * @ctxt:  an XML parser context
15786  * @ioread:  an I/O read function
15787  * @ioclose:  an I/O close function
15788  * @ioctx:  an I/O handler
15789  * @URL:  the base URL to use for the document
15790  * @encoding:  the document encoding, or NULL
15791  * @options:  a combination of xmlParserOption
15792  *
15793  * parse an XML document from I/O functions and source and build a tree.
15794  * This reuses the existing @ctxt parser context
15795  *
15796  * Returns the resulting document tree
15797  */
15798 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15799 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15800               xmlInputCloseCallback ioclose, void *ioctx,
15801 	      const char *URL,
15802               const char *encoding, int options)
15803 {
15804     xmlParserInputBufferPtr input;
15805     xmlParserInputPtr stream;
15806 
15807     if (ioread == NULL)
15808         return (NULL);
15809     if (ctxt == NULL)
15810         return (NULL);
15811     xmlInitParser();
15812 
15813     xmlCtxtReset(ctxt);
15814 
15815     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15816                                          XML_CHAR_ENCODING_NONE);
15817     if (input == NULL) {
15818         if (ioclose != NULL)
15819             ioclose(ioctx);
15820         return (NULL);
15821     }
15822     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15823     if (stream == NULL) {
15824         xmlFreeParserInputBuffer(input);
15825         return (NULL);
15826     }
15827     inputPush(ctxt, stream);
15828     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15829 }
15830 
15831 #define bottom_parser
15832 #include "elfgcchack.h"
15833